void DistinctBlockInputStream::buildFilter( Method & method, const ConstColumnPlainPtrs & columns, IColumn::Filter & filter, size_t rows, SetVariants & variants) const { typename Method::State state; state.init(columns); for (size_t i = 0; i < rows; ++i) { /// Make a key. typename Method::Key key = state.getKey(columns, columns.size(), i, key_sizes); typename Method::Data::iterator it = method.data.find(key); bool inserted; method.data.emplace(key, it, inserted); if (inserted) method.onNewKey(*it, columns.size(), i, variants.string_pool); /// Emit the record if there is no such key in the current set yet. /// Skip it otherwise. filter[i] = inserted; } }
void NO_INLINE Set::executeArrayImpl( Method & method, const ConstColumnPlainPtrs & key_columns, const ColumnArray::Offsets_t & offsets, ColumnUInt8::Container_t & vec_res, bool negative, size_t rows) const { typename Method::State state; state.init(key_columns); size_t keys_size = key_columns.size(); size_t prev_offset = 0; /// Для всех строчек for (size_t i = 0; i < rows; ++i) { UInt8 res = 0; /// Для всех элементов for (size_t j = prev_offset; j < offsets[i]; ++j) { /// Строим ключ typename Method::Key key = state.getKey(key_columns, keys_size, j, key_sizes); res |= negative ^ (method.data.end() != method.data.find(key)); if (res) break; } vec_res[i] = res; prev_offset = offsets[i]; } }
SetVariants::Type SetVariants::chooseMethod(const ConstColumnPlainPtrs & key_columns, Sizes & key_sizes) { size_t keys_size = key_columns.size(); bool all_fixed = true; size_t keys_bytes = 0; key_sizes.resize(keys_size); for (size_t j = 0; j < keys_size; ++j) { if (!key_columns[j]->isFixed()) { all_fixed = false; break; } key_sizes[j] = key_columns[j]->sizeOfField(); keys_bytes += key_sizes[j]; } /// Если есть один числовой ключ, который помещается в 64 бита if (keys_size == 1 && key_columns[0]->isNumeric()) { size_t size_of_field = key_columns[0]->sizeOfField(); if (size_of_field == 1) return SetVariants::Type::key8; if (size_of_field == 2) return SetVariants::Type::key16; if (size_of_field == 4) return SetVariants::Type::key32; if (size_of_field == 8) return SetVariants::Type::key64; throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8.", ErrorCodes::LOGICAL_ERROR); } /// Если ключи помещаются в N бит, будем использовать хэш-таблицу по упакованным в N-бит ключам if (all_fixed && keys_bytes <= 16) return SetVariants::Type::keys128; if (all_fixed && keys_bytes <= 32) return SetVariants::Type::keys256; /// If there is single string key, use hash table of it's values. if (keys_size == 1 && (typeid_cast<const ColumnString *>(key_columns[0]) || typeid_cast<const ColumnConstString *>(key_columns[0]))) return SetVariants::Type::key_string; if (keys_size == 1 && typeid_cast<const ColumnFixedString *>(key_columns[0])) return SetVariants::Type::key_fixed_string; /// Otherwise, will use set of cryptographic hashes of unambiguously serialized values. return SetVariants::Type::hashed; }
void ExternalQueryBuilder::composeKeyTuple(const ConstColumnPlainPtrs & key_columns, const std::size_t row, WriteBuffer & out) const { writeString("(", out); const auto keys_size = key_columns.size(); auto first = true; for (const auto i : ext::range(0, keys_size)) { if (!first) writeString(", ", out); first = false; (*dict_struct.key)[i].type->serializeTextQuoted(*key_columns[i], row, out); } writeString(")", out); }
void NO_INLINE Set::executeImpl( Method & method, const ConstColumnPlainPtrs & key_columns, ColumnUInt8::Container_t & vec_res, bool negative, size_t rows) const { typename Method::State state; state.init(key_columns); size_t keys_size = key_columns.size(); /// NOTE Не используется оптимизация для подряд идущих одинаковых значений. /// Для всех строчек for (size_t i = 0; i < rows; ++i) { /// Строим ключ typename Method::Key key = state.getKey(key_columns, keys_size, i, key_sizes); vec_res[i] = negative ^ (method.data.end() != method.data.find(key)); } }
void ExternalQueryBuilder::composeKeyCondition(const ConstColumnPlainPtrs & key_columns, const std::size_t row, WriteBuffer & out) const { writeString("(", out); const auto keys_size = key_columns.size(); auto first = true; for (const auto i : ext::range(0, keys_size)) { if (!first) writeString(" AND ", out); first = false; const auto & key_description = (*dict_struct.key)[i]; /// key_i=value_i writeString(key_description.name, out); writeString("=", out); key_description.type->serializeTextQuoted(*key_columns[i], row, out); } writeString(")", out); }
void NO_INLINE Set::insertFromBlockImpl( Method & method, const ConstColumnPlainPtrs & key_columns, size_t rows, SetVariants & variants) { typename Method::State state; state.init(key_columns); size_t keys_size = key_columns.size(); /// Для всех строчек for (size_t i = 0; i < rows; ++i) { /// Строим ключ typename Method::Key key = state.getKey(key_columns, keys_size, i, key_sizes); typename Method::Data::iterator it = method.data.find(key); bool inserted; method.data.emplace(key, it, inserted); if (inserted) method.onNewKey(*it, keys_size, i, variants.string_pool); } }