void NativeBlockOutputStream::writeData(const IDataType & type, const ColumnPtr & column, WriteBuffer & ostr, UInt64 offset, UInt64 limit) { /** If there are columns-constants - then we materialize them. * (Since the data type does not know how to serialize / deserialize constants.) */ ColumnPtr full_column = column->convertToFullColumnIfConst(); IDataType::SerializeBinaryBulkSettings settings; settings.getter = [&ostr](IDataType::SubstreamPath) -> WriteBuffer * { return &ostr; }; settings.position_independent_encoding = false; settings.low_cardinality_max_dictionary_size = 0; IDataType::SerializeBinaryBulkStatePtr state; type.serializeBinaryBulkStatePrefix(settings, state); type.serializeBinaryBulkWithMultipleStreams(*full_column, offset, limit, settings, state); type.serializeBinaryBulkStateSuffix(settings, state); }
bool Join::insertFromBlock(const Block & block) { std::unique_lock lock(rwlock); if (empty()) throw Exception("Logical error: Join was not initialized", ErrorCodes::LOGICAL_ERROR); size_t keys_size = key_names_right.size(); ColumnRawPtrs key_columns(keys_size); /// Rare case, when keys are constant. To avoid code bloat, simply materialize them. Columns materialized_columns; /// Memoize key columns to work. for (size_t i = 0; i < keys_size; ++i) { key_columns[i] = block.getByName(key_names_right[i]).column.get(); if (ColumnPtr converted = key_columns[i]->convertToFullColumnIfConst()) { materialized_columns.emplace_back(converted); key_columns[i] = materialized_columns.back().get(); } } /// We will insert to the map only keys, where all components are not NULL. ColumnPtr null_map_holder; ConstNullMapPtr null_map{}; extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map); size_t rows = block.rows(); blocks.push_back(block); Block * stored_block = &blocks.back(); if (getFullness(kind)) { /** Move the key columns to the beginning of the block. * This is where NonJoinedBlockInputStream will expect. */ size_t key_num = 0; for (const auto & name : key_names_right) { size_t pos = stored_block->getPositionByName(name); ColumnWithTypeAndName col = stored_block->safeGetByPosition(pos); stored_block->erase(pos); stored_block->insert(key_num, std::move(col)); ++key_num; } } else { /// Remove the key columns from stored_block, as they are not needed. for (const auto & name : key_names_right) stored_block->erase(stored_block->getPositionByName(name)); } size_t size = stored_block->columns(); /// Rare case, when joined columns are constant. To avoid code bloat, simply materialize them. for (size_t i = 0; i < size; ++i) { ColumnPtr col = stored_block->safeGetByPosition(i).column; if (ColumnPtr converted = col->convertToFullColumnIfConst()) stored_block->safeGetByPosition(i).column = converted; } /// In case of LEFT and FULL joins, if use_nulls, convert joined columns to Nullable. if (use_nulls && (kind == ASTTableJoin::Kind::Left || kind == ASTTableJoin::Kind::Full)) { for (size_t i = getFullness(kind) ? keys_size : 0; i < size; ++i) { convertColumnToNullable(stored_block->getByPosition(i)); } } if (kind != ASTTableJoin::Kind::Cross) { /// Fill the hash table. if (!getFullness(kind)) { if (strictness == ASTTableJoin::Strictness::Any) insertFromBlockImpl<ASTTableJoin::Strictness::Any>(type, maps_any, rows, key_columns, keys_size, key_sizes, stored_block, null_map, pool); else insertFromBlockImpl<ASTTableJoin::Strictness::All>(type, maps_all, rows, key_columns, keys_size, key_sizes, stored_block, null_map, pool); } else { if (strictness == ASTTableJoin::Strictness::Any) insertFromBlockImpl<ASTTableJoin::Strictness::Any>(type, maps_any_full, rows, key_columns, keys_size, key_sizes, stored_block, null_map, pool); else insertFromBlockImpl<ASTTableJoin::Strictness::All>(type, maps_all_full, rows, key_columns, keys_size, key_sizes, stored_block, null_map, pool); } } return limits.check(getTotalRowCount(), getTotalByteCount(), "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED); }
void NativeBlockOutputStream::writeData(const IDataType & type, const ColumnPtr & column, WriteBuffer & ostr, size_t offset, size_t limit) { /** If there are columns-constants - then we materialize them. * (Since the data type does not know how to serialize / deserialize constants.) */ ColumnPtr full_column; if (auto converted = column->convertToFullColumnIfConst()) full_column = converted; else full_column = column; if (type.isNullable()) { const DataTypeNullable & nullable_type = static_cast<const DataTypeNullable &>(type); const IDataType & nested_type = *nullable_type.getNestedType(); const ColumnNullable & nullable_col = static_cast<const ColumnNullable &>(*full_column.get()); const ColumnPtr & nested_col = nullable_col.getNestedColumn(); const IColumn & null_map = nullable_col.getNullMapConcreteColumn(); DataTypeUInt8{}.serializeBinaryBulk(null_map, ostr, offset, limit); writeData(nested_type, nested_col, ostr, offset, limit); } else if (const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(&type)) { /** For arrays, you first need to serialize the offsets, and then the values. */ const ColumnArray & column_array = typeid_cast<const ColumnArray &>(*full_column); type_arr->getOffsetsType()->serializeBinaryBulk(*column_array.getOffsetsColumn(), ostr, offset, limit); if (!typeid_cast<const ColumnArray &>(*full_column).getData().empty()) { const ColumnArray::Offsets_t & offsets = column_array.getOffsets(); if (offset > offsets.size()) return; /** offset - from which array to write. * limit - how many arrays should be written, or 0, if you write everything that is. * end - up to which array written part finishes. * * nested_offset - from which nested element to write. * nested_limit - how many nested elements to write, or 0, if you write everything that is. */ size_t end = std::min(offset + limit, offsets.size()); size_t nested_offset = offset ? offsets[offset - 1] : 0; size_t nested_limit = limit ? offsets[end - 1] - nested_offset : 0; const DataTypePtr & nested_type = type_arr->getNestedType(); DataTypePtr actual_type; if (nested_type->isNull()) { /// Special case: an array of Null is actually an array of Nullable(UInt8). actual_type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt8>()); } else actual_type = nested_type; if (limit == 0 || nested_limit) writeData(*actual_type, typeid_cast<const ColumnArray &>(*full_column).getDataPtr(), ostr, nested_offset, nested_limit); } } else type.serializeBinaryBulk(*full_column, ostr, offset, limit); }
static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped) { return ColumnArray::create(mapped->convertToFullColumnIfConst(), array.getOffsetsPtr()); }