void FunctionHasColumnInTable::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { auto get_string_from_block = [&](size_t column_pos) -> String { ColumnPtr column = block.getByPosition(column_pos).column; const ColumnConst * const_column = checkAndGetColumnConst<ColumnString>(column.get()); return const_column->getValue<String>(); }; size_t arg = 0; String host_name; String user_name; String password; if (arguments.size() > 3) host_name = get_string_from_block(arguments[arg++]); if (arguments.size() > 4) user_name = get_string_from_block(arguments[arg++]); if (arguments.size() > 5) password = get_string_from_block(arguments[arg++]); String database_name = get_string_from_block(arguments[arg++]); String table_name = get_string_from_block(arguments[arg++]); String column_name = get_string_from_block(arguments[arg++]); bool has_column; if (host_name.empty()) { const StoragePtr & table = global_context.getTable(database_name, table_name); has_column = table->hasColumn(column_name); } else { std::vector<std::vector<String>> host_names = {{ host_name }}; auto cluster = std::make_shared<Cluster>( global_context.getSettings(), host_names, !user_name.empty() ? user_name : "default", password, global_context.getTCPPort(), false); auto remote_columns = getStructureOfRemoteTable(*cluster, database_name, table_name, global_context); has_column = remote_columns.hasPhysical(column_name); } block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, has_column); }
const PositionListPairPtr ColumnBaseTyped<Type>::nested_loop_join(ColumnPtr join_column_){ assert(join_column_!=NULL); if(join_column_->type()!=typeid(Type)){ std::cout << "Fatal Error!!! Typemismatch for columns " << this->name_ << " and " << join_column_->getName() << std::endl; std::cout << "File: " << __FILE__ << " Line: " << __LINE__ << std::endl; exit(-1); } shared_pointer_namespace::shared_ptr<ColumnBaseTyped<Type> > join_column = shared_pointer_namespace::static_pointer_cast<ColumnBaseTyped<Type> >(join_column_); //static_cast<IntColumnPtr>(column1); PositionListPairPtr join_tids( new PositionListPair()); join_tids->first = PositionListPtr( new PositionList() ); join_tids->second = PositionListPtr( new PositionList() ); for(unsigned int i=0;i<this->size();i++){ for(unsigned int j=0;j<join_column->size();j++){ if((*this)[i]==(*join_column)[j]){ if(debug) std::cout << "MATCH: (" << i << "," << j << ")" << std::endl; join_tids->first->push_back(i); join_tids->second->push_back(j); } } } return join_tids; }
ColumnPtr FunctionArrayIntersect::castRemoveNullable(const ColumnPtr & column, const DataTypePtr & data_type) const { if (auto column_nullable = checkAndGetColumn<ColumnNullable>(column.get())) { auto nullable_type = checkAndGetDataType<DataTypeNullable>(data_type.get()); const auto & nested = column_nullable->getNestedColumnPtr(); if (nullable_type) { auto casted_column = castRemoveNullable(nested, nullable_type->getNestedType()); return ColumnNullable::create(casted_column, column_nullable->getNullMapColumnPtr()); } return castRemoveNullable(nested, data_type); } else if (auto column_array = checkAndGetColumn<ColumnArray>(column.get())) { auto array_type = checkAndGetDataType<DataTypeArray>(data_type.get()); if (!array_type) throw Exception{"Cannot cast array column to column with type " + data_type->getName() + " in function " + getName(), ErrorCodes::LOGICAL_ERROR}; auto casted_column = castRemoveNullable(column_array->getDataPtr(), array_type->getNestedType()); return ColumnArray::create(casted_column, column_array->getOffsetsPtr()); } else if (auto column_tuple = checkAndGetColumn<ColumnTuple>(column.get())) { auto tuple_type = checkAndGetDataType<DataTypeTuple>(data_type.get()); if (!tuple_type) throw Exception{"Cannot cast tuple column to type " + data_type->getName() + " in function " + getName(), ErrorCodes::LOGICAL_ERROR}; auto columns_number = column_tuple->getColumns().size(); Columns columns(columns_number); const auto & types = tuple_type->getElements(); for (auto i : ext::range(0, columns_number)) { columns[i] = castRemoveNullable(column_tuple->getColumnPtr(i), types[i]); } return ColumnTuple::create(columns); } return column; }
NamesAndTypesList getStructureOfRemoteTable( const Cluster & cluster, const std::string & database, const std::string & table, const Context & context) { /// Запрос на описание таблицы String query = "DESC TABLE " + backQuoteIfNeed(database) + "." + backQuoteIfNeed(table); Settings settings = context.getSettings(); NamesAndTypesList res; /// Отправляем на первый попавшийся удалённый шард. const auto & shard_info = cluster.getAnyShardInfo(); if (shard_info.isLocal()) return context.getTable(database, table)->getColumnsList(); ConnectionPoolPtr pool = shard_info.pool; BlockInputStreamPtr input = std::make_shared<RemoteBlockInputStream>( pool.get(), query, &settings, nullptr, Tables(), QueryProcessingStage::Complete, context); input->readPrefix(); const DataTypeFactory & data_type_factory = DataTypeFactory::instance(); while (Block current = input->read()) { ColumnPtr name = current.getByName("name").column; ColumnPtr type = current.getByName("type").column; size_t size = name->size(); for (size_t i = 0; i < size; ++i) { String column_name = (*name)[i].get<const String &>(); String data_type_name = (*type)[i].get<const String &>(); res.emplace_back(column_name, data_type_factory.get(data_type_name)); } } return res; }
void FunctionHasColumnInTable::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) { auto get_string_from_block = [&](size_t column_pos) -> const String & { ColumnPtr column = block.safeGetByPosition(column_pos).column; const ColumnConstString * const_column = typeid_cast<const ColumnConstString *>(column.get()); return const_column->getData(); }; const String & database_name = get_string_from_block(arguments[0]); const String & table_name = get_string_from_block(arguments[1]); const String & column_name = get_string_from_block(arguments[2]); const StoragePtr & table = global_context.getTable(database_name, table_name); const bool has_column = table->hasColumn(column_name); block.safeGetByPosition(result).column = std::make_shared<ColumnConstUInt8>( block.rows(), has_column); }
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t) override { const ColumnPtr column = block.getByPosition(arguments[0]).column; if (const ColumnString * col = checkAndGetColumn<ColumnString>(column.get())) { auto col_res = ColumnString::create(); ReverseImpl::vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets()); block.getByPosition(result).column = std::move(col_res); } else if (const ColumnFixedString * col = checkAndGetColumn<ColumnFixedString>(column.get())) { auto col_res = ColumnFixedString::create(col->getN()); ReverseImpl::vector_fixed(col->getChars(), col->getN(), col_res->getChars()); block.getByPosition(result).column = std::move(col_res); } else throw Exception( "Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); }
size_t getElementNum(const ColumnPtr & index_column, const DataTypeTuple & tuple) const { if (auto index_col = checkAndGetColumnConst<ColumnUInt8>(index_column.get())) { size_t index = index_col->getValue<UInt8>(); if (index == 0) throw Exception("Indices in tuples are 1-based.", ErrorCodes::ILLEGAL_INDEX); if (index > tuple.getElements().size()) throw Exception("Index for tuple element is out of range.", ErrorCodes::ILLEGAL_INDEX); return index - 1; } else if (auto name_col = checkAndGetColumnConst<ColumnString>(index_column.get())) { return tuple.getPositionByName(name_col->getValue<String>()); } else throw Exception("Second argument to " + getName() + " must be a constant UInt8 or String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); }
ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column) { if (!column) return column; if (const auto * column_array = typeid_cast<const ColumnArray *>(column.get())) return ColumnArray::create(recursiveRemoveLowCardinality(column_array->getDataPtr()), column_array->getOffsetsPtr()); if (const auto * column_const = typeid_cast<const ColumnConst *>(column.get())) return ColumnConst::create(recursiveRemoveLowCardinality(column_const->getDataColumnPtr()), column_const->size()); if (const auto * column_tuple = typeid_cast<const ColumnTuple *>(column.get())) { Columns columns = column_tuple->getColumns(); for (auto & element : columns) element = recursiveRemoveLowCardinality(element); return ColumnTuple::create(columns); } if (const auto * column_low_cardinality = typeid_cast<const ColumnLowCardinality *>(column.get())) return column_low_cardinality->convertToFullColumn(); return column; }
const PositionListPairPtr ColumnBaseTyped<Type>::sort_merge_join(ColumnPtr join_column_){ if(join_column_->type()!=typeid(Type)){ std::cout << "Fatal Error!!! Typemismatch for columns " << this->name_ << " and " << join_column_->getName() << std::endl; std::cout << "File: " << __FILE__ << " Line: " << __LINE__ << std::endl; exit(-1); } shared_pointer_namespace::shared_ptr<ColumnBaseTyped<Type> > join_column = shared_pointer_namespace::static_pointer_cast<ColumnBaseTyped<Type> >(join_column_); //static_cast<IntColumnPtr>(column1); PositionListPairPtr join_tids( new PositionListPair()); join_tids->first = PositionListPtr( new PositionList() ); join_tids->second = PositionListPtr( new PositionList() ); return join_tids; }
void NativeBlockOutputStream::writeData(const IDataType & type, const ColumnPtr & column, WriteBuffer & ostr, UInt64 offset, UInt64 limit) { /** If there are columns-constants - then we materialize them. * (Since the data type does not know how to serialize / deserialize constants.) */ ColumnPtr full_column = column->convertToFullColumnIfConst(); IDataType::SerializeBinaryBulkSettings settings; settings.getter = [&ostr](IDataType::SubstreamPath) -> WriteBuffer * { return &ostr; }; settings.position_independent_encoding = false; settings.low_cardinality_max_dictionary_size = 0; IDataType::SerializeBinaryBulkStatePtr state; type.serializeBinaryBulkStatePrefix(settings, state); type.serializeBinaryBulkWithMultipleStreams(*full_column, offset, limit, settings, state); type.serializeBinaryBulkStateSuffix(settings, state); }
const PositionListPairPtr ColumnBaseTyped<T>::hash_join(ColumnPtr join_column_){ typedef boost::unordered_multimap < T, TID, boost::hash<T>, std::equal_to<T> > HashTable; if(join_column_->type()!=typeid(T)){ std::cout << "Fatal Error!!! Typemismatch for columns " << this->name_ << " and " << join_column_->getName() << std::endl; std::cout << "File: " << __FILE__ << " Line: " << __LINE__ << std::endl; exit(-1); } shared_pointer_namespace::shared_ptr<ColumnBaseTyped<T> > join_column = shared_pointer_namespace::static_pointer_cast<ColumnBaseTyped<T> >(join_column_); //static_cast<IntColumnPtr>(column1); PositionListPairPtr join_tids( new PositionListPair()); join_tids->first = PositionListPtr( new PositionList() ); join_tids->second = PositionListPtr( new PositionList() ); //create hash table HashTable hashtable; for(unsigned int i=0;i<this->size();i++) hashtable.insert( std::pair<T,TID> ((*this)[i],i) ); //probe larger relation for(unsigned int i=0;i<join_column->size();i++){ std::pair<typename HashTable::iterator, typename HashTable::iterator> range = hashtable.equal_range((*join_column)[i]); for(typename HashTable::iterator it=range.first ; it!=range.second;it++){ if(it->first==(*join_column)[i]){ join_tids->first->push_back(it->second); join_tids->second->push_back(i); //cout << "match! " << it->second << ", " << i << " " << it->first << endl; } } } return join_tids; }
void CLASS::processNextRow() { LOG_DEBUG(mLogger, "In OnDemandResultSet::processNextRow()"); ColumnVector::iterator i; int count = 0; for (i = mColumns->begin(); i != mColumns->end(); i++, count++) { ColumnPtr column = *i; string columnName = column->getName(); string result; try { result = mResults->GetString(columnName); } catch (std::invalid_argument &e) { result = ""; LOG_DEBUG(mLogger, str_stream() << e.what() << " -- ignoring"); } SQLSMALLINT type = column->getDataType(); if (type == SQL_LONGVARBINARY || type == SQL_BINARY || type == SQL_VARBINARY) { LOG_DEBUG(mLogger, str_stream() << count << " " << column->getName() << ": [omitting possible binary data]"); } else { LOG_DEBUG(mLogger, str_stream() << count << " " << column->getName() << ": " << result); } if (column->isBound()) { column->setData(count, result); } } }
Block MergeTreeBaseBlockInputStream::readFromPart() { Block res; if (task->size_predictor) task->size_predictor->startBlock(); if (prewhere_actions) { do { /// Let's read the full block of columns needed to calculate the expression in PREWHERE. size_t space_left = std::max(1LU, max_block_size_marks); MarkRanges ranges_to_read; if (task->size_predictor) { /// FIXME: size prediction model is updated by filtered rows, but it predicts size of unfiltered rows also size_t recommended_marks = task->size_predictor->estimateNumMarks(preferred_block_size_bytes, storage.index_granularity); if (res && recommended_marks < 1) break; space_left = std::min(space_left, std::max(1LU, recommended_marks)); } while (!task->mark_ranges.empty() && space_left && !isCancelled()) { auto & range = task->mark_ranges.back(); size_t marks_to_read = std::min(range.end - range.begin, space_left); pre_reader->readRange(range.begin, range.begin + marks_to_read, res); ranges_to_read.emplace_back(range.begin, range.begin + marks_to_read); space_left -= marks_to_read; range.begin += marks_to_read; if (range.begin == range.end) task->mark_ranges.pop_back(); } /// In case of isCancelled. if (!res) return res; progressImpl({ res.rows(), res.bytes() }); pre_reader->fillMissingColumns(res, task->ordered_names, task->should_reorder); /// Compute the expression in PREWHERE. prewhere_actions->execute(res); ColumnPtr column = res.getByName(prewhere_column).column; if (task->remove_prewhere_column) res.erase(prewhere_column); const auto pre_bytes = res.bytes(); ColumnPtr observed_column; if (column->isNullable()) { ColumnNullable & nullable_col = static_cast<ColumnNullable &>(*column); observed_column = nullable_col.getNestedColumn(); } else observed_column = column; /** If the filter is a constant (for example, it says PREWHERE 1), * then either return an empty block, or return the block unchanged. */ if (const auto column_const = typeid_cast<const ColumnConstUInt8 *>(observed_column.get())) { if (!column_const->getData()) { res.clear(); return res; } for (const auto & range : ranges_to_read) reader->readRange(range.begin, range.end, res); progressImpl({ 0, res.bytes() - pre_bytes }); } else if (const auto column_vec = typeid_cast<const ColumnUInt8 *>(observed_column.get())) { size_t index_granularity = storage.index_granularity; const auto & pre_filter = column_vec->getData(); IColumn::Filter post_filter(pre_filter.size()); /// Let's read the rest of the columns in the required segments and compose our own filter for them. size_t pre_filter_pos = 0; size_t post_filter_pos = 0; for (const auto & range : ranges_to_read) { auto begin = range.begin; auto pre_filter_begin_pos = pre_filter_pos; for (auto mark = range.begin; mark <= range.end; ++mark) { UInt8 nonzero = 0; if (mark != range.end) { const size_t limit = std::min(pre_filter.size(), pre_filter_pos + index_granularity); for (size_t row = pre_filter_pos; row < limit; ++row) nonzero |= pre_filter[row]; } if (!nonzero) { if (mark > begin) { memcpy( &post_filter[post_filter_pos], &pre_filter[pre_filter_begin_pos], pre_filter_pos - pre_filter_begin_pos); post_filter_pos += pre_filter_pos - pre_filter_begin_pos; reader->readRange(begin, mark, res); } begin = mark + 1; pre_filter_begin_pos = std::min(pre_filter_pos + index_granularity, pre_filter.size()); } if (mark < range.end) pre_filter_pos = std::min(pre_filter_pos + index_granularity, pre_filter.size()); } } if (!post_filter_pos) { res.clear(); continue; } progressImpl({ 0, res.bytes() - pre_bytes }); post_filter.resize(post_filter_pos); /// Filter the columns related to PREWHERE using pre_filter, /// other columns - using post_filter. size_t rows = 0; for (const auto i : ext::range(0, res.columns())) { auto & col = res.safeGetByPosition(i); if (col.name == prewhere_column && res.columns() > 1) continue; col.column = col.column->filter(task->column_name_set.count(col.name) ? post_filter : pre_filter, -1); rows = col.column->size(); } /// Replace column with condition value from PREWHERE to a constant. if (!task->remove_prewhere_column) res.getByName(prewhere_column).column = std::make_shared<ColumnConstUInt8>(rows, 1); } else throw Exception{ "Illegal type " + column->getName() + " of column for filter. Must be ColumnUInt8 or ColumnConstUInt8.", ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER }; if (res) { if (task->size_predictor) task->size_predictor->update(res); reader->fillMissingColumnsAndReorder(res, task->ordered_names); } } while (!task->mark_ranges.empty() && !res && !isCancelled()); } else { size_t space_left = std::max(1LU, max_block_size_marks); while (!task->mark_ranges.empty() && space_left && !isCancelled()) { auto & range = task->mark_ranges.back(); size_t marks_to_read = std::min(range.end - range.begin, space_left); if (task->size_predictor) { size_t recommended_marks = task->size_predictor->estimateNumMarks(preferred_block_size_bytes, storage.index_granularity); if (res && recommended_marks < 1) break; marks_to_read = std::min(marks_to_read, std::max(1LU, recommended_marks)); } reader->readRange(range.begin, range.begin + marks_to_read, res); if (task->size_predictor) task->size_predictor->update(res); space_left -= marks_to_read; range.begin += marks_to_read; if (range.begin == range.end) task->mark_ranges.pop_back(); } /// In the case of isCancelled. if (!res) return res; progressImpl({ res.rows(), res.bytes() }); reader->fillMissingColumns(res, task->ordered_names, task->should_reorder); } return res; }
void FunctionCoalesce::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) { /// coalesce(arg0, arg1, ..., argN) is essentially /// multiIf(isNotNull(arg0), assumeNotNull(arg0), isNotNull(arg1), assumeNotNull(arg1), ..., argN) /// with constant NULL arguments removed. ColumnNumbers filtered_args; filtered_args.reserve(arguments.size()); for (const auto & arg : arguments) { const auto & type = block.getByPosition(arg).type; if (type->onlyNull()) continue; filtered_args.push_back(arg); if (!type->isNullable()) break; } FunctionIsNotNull is_not_null; FunctionAssumeNotNull assume_not_null; ColumnNumbers multi_if_args; Block temp_block = block; for (size_t i = 0; i < filtered_args.size(); ++i) { size_t res_pos = temp_block.columns(); bool is_last = i + 1 == filtered_args.size(); if (is_last) { multi_if_args.push_back(filtered_args[i]); } else { temp_block.insert({nullptr, std::make_shared<DataTypeUInt8>(), ""}); is_not_null.execute(temp_block, {filtered_args[i]}, res_pos); temp_block.insert({nullptr, removeNullable(block.getByPosition(filtered_args[i]).type), ""}); assume_not_null.execute(temp_block, {filtered_args[i]}, res_pos + 1); multi_if_args.push_back(res_pos); multi_if_args.push_back(res_pos + 1); } } /// If all arguments appeared to be NULL. if (multi_if_args.empty()) { block.getByPosition(result).column = block.getByPosition(result).type->createColumnConstWithDefaultValue(block.rows()); return; } if (multi_if_args.size() == 1) { block.getByPosition(result).column = block.getByPosition(multi_if_args.front()).column; return; } FunctionMultiIf{context}.execute(temp_block, multi_if_args, result); ColumnPtr res = std::move(temp_block.getByPosition(result).column); /// if last argument is not nullable, result should be also not nullable if (!block.getByPosition(multi_if_args.back()).column->isColumnNullable() && res->isColumnNullable()) res = static_cast<const ColumnNullable &>(*res).getNestedColumnPtr(); block.getByPosition(result).column = std::move(res); }
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { /// coalesce(arg0, arg1, ..., argN) is essentially /// multiIf(isNotNull(arg0), assumeNotNull(arg0), isNotNull(arg1), assumeNotNull(arg1), ..., argN) /// with constant NULL arguments removed. ColumnNumbers filtered_args; filtered_args.reserve(arguments.size()); for (const auto & arg : arguments) { const auto & type = block.getByPosition(arg).type; if (type->onlyNull()) continue; filtered_args.push_back(arg); if (!type->isNullable()) break; } auto is_not_null = FunctionFactory::instance().get("isNotNull", context); auto assume_not_null = FunctionFactory::instance().get("assumeNotNull", context); auto multi_if = FunctionFactory::instance().get("multiIf", context); ColumnNumbers multi_if_args; Block temp_block = block; for (size_t i = 0; i < filtered_args.size(); ++i) { size_t res_pos = temp_block.columns(); bool is_last = i + 1 == filtered_args.size(); if (is_last) { multi_if_args.push_back(filtered_args[i]); } else { temp_block.insert({nullptr, std::make_shared<DataTypeUInt8>(), ""}); is_not_null->build({temp_block.getByPosition(filtered_args[i])})->execute(temp_block, {filtered_args[i]}, res_pos, input_rows_count); temp_block.insert({nullptr, removeNullable(block.getByPosition(filtered_args[i]).type), ""}); assume_not_null->build({temp_block.getByPosition(filtered_args[i])})->execute(temp_block, {filtered_args[i]}, res_pos + 1, input_rows_count); multi_if_args.push_back(res_pos); multi_if_args.push_back(res_pos + 1); } } /// If all arguments appeared to be NULL. if (multi_if_args.empty()) { block.getByPosition(result).column = block.getByPosition(result).type->createColumnConstWithDefaultValue(input_rows_count); return; } if (multi_if_args.size() == 1) { block.getByPosition(result).column = block.getByPosition(multi_if_args.front()).column; return; } ColumnsWithTypeAndName multi_if_args_elems; multi_if_args_elems.reserve(multi_if_args.size()); for (auto column_num : multi_if_args) multi_if_args_elems.emplace_back(temp_block.getByPosition(column_num)); multi_if->build(multi_if_args_elems)->execute(temp_block, multi_if_args, result, input_rows_count); ColumnPtr res = std::move(temp_block.getByPosition(result).column); /// if last argument is not nullable, result should be also not nullable if (!block.getByPosition(multi_if_args.back()).column->isColumnNullable() && res->isColumnNullable()) res = static_cast<const ColumnNullable &>(*res).getNestedColumnPtr(); block.getByPosition(result).column = std::move(res); }
void NativeBlockOutputStream::writeData(const IDataType & type, const ColumnPtr & column, WriteBuffer & ostr, size_t offset, size_t limit) { /** If there are columns-constants - then we materialize them. * (Since the data type does not know how to serialize / deserialize constants.) */ ColumnPtr full_column; if (auto converted = column->convertToFullColumnIfConst()) full_column = converted; else full_column = column; if (type.isNullable()) { const DataTypeNullable & nullable_type = static_cast<const DataTypeNullable &>(type); const IDataType & nested_type = *nullable_type.getNestedType(); const ColumnNullable & nullable_col = static_cast<const ColumnNullable &>(*full_column.get()); const ColumnPtr & nested_col = nullable_col.getNestedColumn(); const IColumn & null_map = nullable_col.getNullMapConcreteColumn(); DataTypeUInt8{}.serializeBinaryBulk(null_map, ostr, offset, limit); writeData(nested_type, nested_col, ostr, offset, limit); } else if (const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(&type)) { /** For arrays, you first need to serialize the offsets, and then the values. */ const ColumnArray & column_array = typeid_cast<const ColumnArray &>(*full_column); type_arr->getOffsetsType()->serializeBinaryBulk(*column_array.getOffsetsColumn(), ostr, offset, limit); if (!typeid_cast<const ColumnArray &>(*full_column).getData().empty()) { const ColumnArray::Offsets_t & offsets = column_array.getOffsets(); if (offset > offsets.size()) return; /** offset - from which array to write. * limit - how many arrays should be written, or 0, if you write everything that is. * end - up to which array written part finishes. * * nested_offset - from which nested element to write. * nested_limit - how many nested elements to write, or 0, if you write everything that is. */ size_t end = std::min(offset + limit, offsets.size()); size_t nested_offset = offset ? offsets[offset - 1] : 0; size_t nested_limit = limit ? offsets[end - 1] - nested_offset : 0; const DataTypePtr & nested_type = type_arr->getNestedType(); DataTypePtr actual_type; if (nested_type->isNull()) { /// Special case: an array of Null is actually an array of Nullable(UInt8). actual_type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt8>()); } else actual_type = nested_type; if (limit == 0 || nested_limit) writeData(*actual_type, typeid_cast<const ColumnArray &>(*full_column).getDataPtr(), ostr, nested_offset, nested_limit); } } else type.serializeBinaryBulk(*full_column, ostr, offset, limit); }
int main(int, char **) { ColumnPtr x = ConcreteColumn::create(1); ColumnPtr y = x;//x->test(); std::cerr << "values: " << x->get() << ", " << y->get() << "\n"; std::cerr << "refcounts: " << x->use_count() << ", " << y->use_count() << "\n"; std::cerr << "addresses: " << x.get() << ", " << y.get() << "\n"; { MutableColumnPtr mut = y->mutate(); mut->set(2); std::cerr << "refcounts: " << x->use_count() << ", " << y->use_count() << ", " << mut->use_count() << "\n"; std::cerr << "addresses: " << x.get() << ", " << y.get() << ", " << mut.get() << "\n"; y = std::move(mut); } std::cerr << "values: " << x->get() << ", " << y->get() << "\n"; std::cerr << "refcounts: " << x->use_count() << ", " << y->use_count() << "\n"; std::cerr << "addresses: " << x.get() << ", " << y.get() << "\n"; x = ConcreteColumn::create(0); std::cerr << "values: " << x->get() << ", " << y->get() << "\n"; std::cerr << "refcounts: " << x->use_count() << ", " << y->use_count() << "\n"; std::cerr << "addresses: " << x.get() << ", " << y.get() << "\n"; { MutableColumnPtr mut = y->mutate(); mut->set(3); std::cerr << "refcounts: " << x->use_count() << ", " << y->use_count() << ", " << mut->use_count() << "\n"; std::cerr << "addresses: " << x.get() << ", " << y.get() << ", " << mut.get() << "\n"; y = std::move(mut); } std::cerr << "values: " << x->get() << ", " << y->get() << "\n"; std::cerr << "refcounts: " << x->use_count() << ", " << y->use_count() << "\n"; return 0; }
static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped) { return ColumnArray::create(mapped->convertToFullColumnIfConst(), array.getOffsetsPtr()); }
ColumnPtr recursiveLowCardinalityConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type) { if (!column) return column; if (from_type->equals(*to_type)) return column; if (const auto * column_const = typeid_cast<const ColumnConst *>(column.get())) return ColumnConst::create(recursiveLowCardinalityConversion(column_const->getDataColumnPtr(), from_type, to_type), column_const->size()); if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(from_type.get())) { if (to_type->equals(*low_cardinality_type->getDictionaryType())) return column->convertToFullColumnIfLowCardinality(); } if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(to_type.get())) { if (from_type->equals(*low_cardinality_type->getDictionaryType())) { auto col = low_cardinality_type->createColumn(); static_cast<ColumnLowCardinality &>(*col).insertRangeFromFullColumn(*column, 0, column->size()); return std::move(col); } } if (const auto * from_array_type = typeid_cast<const DataTypeArray *>(from_type.get())) { if (const auto * to_array_type = typeid_cast<const DataTypeArray *>(to_type.get())) { const auto * column_array = typeid_cast<const ColumnArray *>(column.get()); if (!column_array) throw Exception("Unexpected column " + column->getName() + " for type " + from_type->getName(), ErrorCodes::ILLEGAL_COLUMN); auto & nested_from = from_array_type->getNestedType(); auto & nested_to = to_array_type->getNestedType(); return ColumnArray::create( recursiveLowCardinalityConversion(column_array->getDataPtr(), nested_from, nested_to), column_array->getOffsetsPtr()); } } if (const auto * from_tuple_type = typeid_cast<const DataTypeTuple *>(from_type.get())) { if (const auto * to_tuple_type = typeid_cast<const DataTypeTuple *>(to_type.get())) { const auto * column_tuple = typeid_cast<const ColumnTuple *>(column.get()); if (!column_tuple) throw Exception("Unexpected column " + column->getName() + " for type " + from_type->getName(), ErrorCodes::ILLEGAL_COLUMN); Columns columns = column_tuple->getColumns(); auto & from_elements = from_tuple_type->getElements(); auto & to_elements = to_tuple_type->getElements(); for (size_t i = 0; i < columns.size(); ++i) { auto & element = columns[i]; element = recursiveLowCardinalityConversion(element, from_elements.at(i), to_elements.at(i)); } return ColumnTuple::create(columns); } } throw Exception("Cannot convert: " + from_type->getName() + " to " + to_type->getName(), ErrorCodes::TYPE_MISMATCH); }
bool Join::insertFromBlock(const Block & block) { std::unique_lock lock(rwlock); if (empty()) throw Exception("Logical error: Join was not initialized", ErrorCodes::LOGICAL_ERROR); size_t keys_size = key_names_right.size(); ColumnRawPtrs key_columns(keys_size); /// Rare case, when keys are constant. To avoid code bloat, simply materialize them. Columns materialized_columns; /// Memoize key columns to work. for (size_t i = 0; i < keys_size; ++i) { key_columns[i] = block.getByName(key_names_right[i]).column.get(); if (ColumnPtr converted = key_columns[i]->convertToFullColumnIfConst()) { materialized_columns.emplace_back(converted); key_columns[i] = materialized_columns.back().get(); } } /// We will insert to the map only keys, where all components are not NULL. ColumnPtr null_map_holder; ConstNullMapPtr null_map{}; extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map); size_t rows = block.rows(); blocks.push_back(block); Block * stored_block = &blocks.back(); if (getFullness(kind)) { /** Move the key columns to the beginning of the block. * This is where NonJoinedBlockInputStream will expect. */ size_t key_num = 0; for (const auto & name : key_names_right) { size_t pos = stored_block->getPositionByName(name); ColumnWithTypeAndName col = stored_block->safeGetByPosition(pos); stored_block->erase(pos); stored_block->insert(key_num, std::move(col)); ++key_num; } } else { /// Remove the key columns from stored_block, as they are not needed. for (const auto & name : key_names_right) stored_block->erase(stored_block->getPositionByName(name)); } size_t size = stored_block->columns(); /// Rare case, when joined columns are constant. To avoid code bloat, simply materialize them. for (size_t i = 0; i < size; ++i) { ColumnPtr col = stored_block->safeGetByPosition(i).column; if (ColumnPtr converted = col->convertToFullColumnIfConst()) stored_block->safeGetByPosition(i).column = converted; } /// In case of LEFT and FULL joins, if use_nulls, convert joined columns to Nullable. if (use_nulls && (kind == ASTTableJoin::Kind::Left || kind == ASTTableJoin::Kind::Full)) { for (size_t i = getFullness(kind) ? keys_size : 0; i < size; ++i) { convertColumnToNullable(stored_block->getByPosition(i)); } } if (kind != ASTTableJoin::Kind::Cross) { /// Fill the hash table. if (!getFullness(kind)) { if (strictness == ASTTableJoin::Strictness::Any) insertFromBlockImpl<ASTTableJoin::Strictness::Any>(type, maps_any, rows, key_columns, keys_size, key_sizes, stored_block, null_map, pool); else insertFromBlockImpl<ASTTableJoin::Strictness::All>(type, maps_all, rows, key_columns, keys_size, key_sizes, stored_block, null_map, pool); } else { if (strictness == ASTTableJoin::Strictness::Any) insertFromBlockImpl<ASTTableJoin::Strictness::Any>(type, maps_any_full, rows, key_columns, keys_size, key_sizes, stored_block, null_map, pool); else insertFromBlockImpl<ASTTableJoin::Strictness::All>(type, maps_all_full, rows, key_columns, keys_size, key_sizes, stored_block, null_map, pool); } } return limits.check(getTotalRowCount(), getTotalByteCount(), "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED); }