void Join::init(Type type_) { type = type_; if (kind == ASTTableJoin::Kind::Cross) return; if (!getFullness(kind)) { if (strictness == ASTTableJoin::Strictness::Any) initImpl(maps_any, type); else initImpl(maps_all, type); } else { if (strictness == ASTTableJoin::Strictness::Any) initImpl(maps_any_full, type); else initImpl(maps_all_full, type); } }
void Join::joinBlockImpl(Block & block, const Maps & maps) const { size_t keys_size = key_names_left.size(); ColumnRawPtrs key_columns(keys_size); /// Rare case, when keys are constant. To avoid code bloat, simply materialize them. Columns materialized_columns; /// Memoize key columns to work with. for (size_t i = 0; i < keys_size; ++i) { key_columns[i] = block.getByName(key_names_left[i]).column.get(); if (ColumnPtr converted = key_columns[i]->convertToFullColumnIfConst()) { materialized_columns.emplace_back(converted); key_columns[i] = materialized_columns.back().get(); } } /// Keys with NULL value in any column won't join to anything. ColumnPtr null_map_holder; ConstNullMapPtr null_map{}; extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map); size_t existing_columns = block.columns(); /** If you use FULL or RIGHT JOIN, then the columns from the "left" table must be materialized. * Because if they are constants, then in the "not joined" rows, they may have different values * - default values, which can differ from the values of these constants. */ if (getFullness(kind)) { for (size_t i = 0; i < existing_columns; ++i) { auto & col = block.getByPosition(i).column; if (ColumnPtr converted = col->convertToFullColumnIfConst()) col = converted; /// If use_nulls, convert left columns (except keys) to Nullable. if (use_nulls) { if (std::end(key_names_left) == std::find(key_names_left.begin(), key_names_left.end(), block.getByPosition(i).name)) convertColumnToNullable(block.getByPosition(i)); } } } /** For LEFT/INNER JOIN, the saved blocks do not contain keys. * For FULL/RIGHT JOIN, the saved blocks contain keys; * but they will not be used at this stage of joining (and will be in `AdderNonJoined`), and they need to be skipped. */ size_t num_columns_to_skip = 0; if (getFullness(kind)) num_columns_to_skip = keys_size; /// Add new columns to the block. size_t num_columns_to_add = sample_block_with_columns_to_add.columns(); MutableColumns added_columns; added_columns.reserve(num_columns_to_add); std::vector<std::pair<decltype(ColumnWithTypeAndName::type), decltype(ColumnWithTypeAndName::name)>> added_type_name; added_type_name.reserve(num_columns_to_add); std::vector<size_t> right_indexes; right_indexes.reserve(num_columns_to_add); for (size_t i = 0; i < num_columns_to_add; ++i) { const ColumnWithTypeAndName & src_column = sample_block_with_columns_to_add.safeGetByPosition(i); /// Don't insert column if it's in left block. if (!block.has(src_column.name)) { added_columns.push_back(src_column.column->cloneEmpty()); added_columns.back()->reserve(src_column.column->size()); added_type_name.emplace_back(src_column.type, src_column.name); right_indexes.push_back(num_columns_to_skip + i); } } size_t rows = block.rows(); /// Used with ANY INNER JOIN std::unique_ptr<IColumn::Filter> filter; bool filter_left_keys = (kind == ASTTableJoin::Kind::Inner || kind == ASTTableJoin::Kind::Right) && strictness == ASTTableJoin::Strictness::Any; filter = std::make_unique<IColumn::Filter>(rows); /// Used with ALL ... JOIN IColumn::Offset current_offset = 0; std::unique_ptr<IColumn::Offsets> offsets_to_replicate; if (strictness == ASTTableJoin::Strictness::All) offsets_to_replicate = std::make_unique<IColumn::Offsets>(rows); switch (type) { #define M(TYPE) \ case Join::Type::TYPE: \ joinBlockImplType<KIND, STRICTNESS, typename KeyGetterForType<Join::Type::TYPE>::Type>(\ *maps.TYPE, rows, key_columns, key_sizes, added_columns, null_map, \ filter, current_offset, offsets_to_replicate, right_indexes); \ break; APPLY_FOR_JOIN_VARIANTS(M) #undef M default: throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT); } const auto added_columns_size = added_columns.size(); for (size_t i = 0; i < added_columns_size; ++i) block.insert(ColumnWithTypeAndName(std::move(added_columns[i]), added_type_name[i].first, added_type_name[i].second)); /// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones. if (filter_left_keys) for (size_t i = 0; i < existing_columns; ++i) block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(*filter, -1); ColumnUInt64::Ptr mapping; /// Add join key columns from right block if they has different name. for (size_t i = 0; i < key_names_right.size(); ++i) { auto & right_name = key_names_right[i]; auto & left_name = key_names_left[i]; if (needed_key_names_right.count(right_name) && !block.has(right_name)) { const auto & col = block.getByName(left_name); auto column = col.column; if (!filter_left_keys) { if (!mapping) { auto mut_mapping = ColumnUInt64::create(column->size()); auto & data = mut_mapping->getData(); size_t size = column->size(); for (size_t j = 0; j < size; ++j) data[j] = (*filter)[j] ? j : size; mapping = std::move(mut_mapping); } auto mut_column = (*std::move(column)).mutate(); mut_column->insertDefault(); column = mut_column->index(*mapping, 0); } block.insert({column, col.type, right_name}); } } /// If ALL ... JOIN - we replicate all the columns except the new ones. if (offsets_to_replicate) { for (size_t i = 0; i < existing_columns; ++i) block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate); } }
bool Join::insertFromBlock(const Block & block) { std::unique_lock lock(rwlock); if (empty()) throw Exception("Logical error: Join was not initialized", ErrorCodes::LOGICAL_ERROR); size_t keys_size = key_names_right.size(); ColumnRawPtrs key_columns(keys_size); /// Rare case, when keys are constant. To avoid code bloat, simply materialize them. Columns materialized_columns; /// Memoize key columns to work. for (size_t i = 0; i < keys_size; ++i) { key_columns[i] = block.getByName(key_names_right[i]).column.get(); if (ColumnPtr converted = key_columns[i]->convertToFullColumnIfConst()) { materialized_columns.emplace_back(converted); key_columns[i] = materialized_columns.back().get(); } } /// We will insert to the map only keys, where all components are not NULL. ColumnPtr null_map_holder; ConstNullMapPtr null_map{}; extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map); size_t rows = block.rows(); blocks.push_back(block); Block * stored_block = &blocks.back(); if (getFullness(kind)) { /** Move the key columns to the beginning of the block. * This is where NonJoinedBlockInputStream will expect. */ size_t key_num = 0; for (const auto & name : key_names_right) { size_t pos = stored_block->getPositionByName(name); ColumnWithTypeAndName col = stored_block->safeGetByPosition(pos); stored_block->erase(pos); stored_block->insert(key_num, std::move(col)); ++key_num; } } else { /// Remove the key columns from stored_block, as they are not needed. for (const auto & name : key_names_right) stored_block->erase(stored_block->getPositionByName(name)); } size_t size = stored_block->columns(); /// Rare case, when joined columns are constant. To avoid code bloat, simply materialize them. for (size_t i = 0; i < size; ++i) { ColumnPtr col = stored_block->safeGetByPosition(i).column; if (ColumnPtr converted = col->convertToFullColumnIfConst()) stored_block->safeGetByPosition(i).column = converted; } /// In case of LEFT and FULL joins, if use_nulls, convert joined columns to Nullable. if (use_nulls && (kind == ASTTableJoin::Kind::Left || kind == ASTTableJoin::Kind::Full)) { for (size_t i = getFullness(kind) ? keys_size : 0; i < size; ++i) { convertColumnToNullable(stored_block->getByPosition(i)); } } if (kind != ASTTableJoin::Kind::Cross) { /// Fill the hash table. if (!getFullness(kind)) { if (strictness == ASTTableJoin::Strictness::Any) insertFromBlockImpl<ASTTableJoin::Strictness::Any>(type, maps_any, rows, key_columns, keys_size, key_sizes, stored_block, null_map, pool); else insertFromBlockImpl<ASTTableJoin::Strictness::All>(type, maps_all, rows, key_columns, keys_size, key_sizes, stored_block, null_map, pool); } else { if (strictness == ASTTableJoin::Strictness::Any) insertFromBlockImpl<ASTTableJoin::Strictness::Any>(type, maps_any_full, rows, key_columns, keys_size, key_sizes, stored_block, null_map, pool); else insertFromBlockImpl<ASTTableJoin::Strictness::All>(type, maps_all_full, rows, key_columns, keys_size, key_sizes, stored_block, null_map, pool); } } return limits.check(getTotalRowCount(), getTotalByteCount(), "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED); }
/* * insert a new superblock into the list * */ void insertSuperBlock(size_class_t *sizeClass, superblock_t *superBlock) { unsigned int i; superblock_t *pSb, *pPrevSb; /* first node - list is empty */ if (sizeClass->_SBlkList._length == 0 || sizeClass->_SBlkList._first==NULL) { sizeClass->_SBlkList._length++; sizeClass->_SBlkList._first = superBlock; sizeClass->_SBlkList._first->_meta._pNxtSBlk = NULL; sizeClass->_SBlkList._first->_meta._pPrvSblk = NULL; sizeClass->_sizeClassBytes=superBlock->_meta._sizeClassBytes; return; } /* at least one superblock exists */ pPrevSb = NULL; pSb = sizeClass->_SBlkList._first; while (pSb && getFullness(superBlock) < getFullness(pSb)) { pPrevSb = pSb; pSb = pSb->_meta._pNxtSBlk; } /* either pSb is NULL (end of list) or new superblock is fuller than pSb */ if (!pSb) { /* end of list */ if (pPrevSb == NULL) { printf("program bug! size_class.c cannot have prev==NULL \n "); exit(-1); } /* put superblock after pPrev as last*/ plantSuperBlock(pPrevSb, superBlock, NULL); } else if (pSb == sizeClass->_SBlkList._first) { /* node points to existing first */ superBlock->_meta._pNxtSBlk = sizeClass->_SBlkList._first; /* node points back to NULL */ superBlock->_meta._pPrvSblk = NULL; /* existing first points back at node */ sizeClass->_SBlkList._first->_meta._pPrvSblk = superBlock; /* first points to node */ sizeClass->_SBlkList._first = superBlock; } else { /* mid list */ /* put superblock after pPrev and before pSb*/ plantSuperBlock(pPrevSb, superBlock, pSb); } sizeClass->_SBlkList._length++; }