TupleIdSequence* CompressedColumnStoreTupleStorageSubBlock::getCodesSatisfyingComparison( const attribute_id attr_id, const std::uint32_t code, const TupleIdSequence *filter) const { #ifdef QUICKSTEP_ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT static constexpr bool short_circuit = true; #else static constexpr bool short_circuit = false; #endif comparison_functor<uint32_t> comp; TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_)); const void *attr_stripe = column_stripes_[attr_id]; if (!short_circuit || (filter == nullptr)) { switch (compression_info_.attribute_size(attr_id)) { case 1: for (tuple_id tid = 0; tid < *static_cast<const tuple_id*>(sub_block_memory_); ++tid) { if (comp(code, static_cast<const uint8_t*>(attr_stripe)[tid])) { matches->set(tid); } } break; case 2: for (tuple_id tid = 0; tid < *static_cast<const tuple_id*>(sub_block_memory_); ++tid) { if (comp(code, static_cast<const uint16_t*>(attr_stripe)[tid])) { matches->set(tid); } } break; case 4: for (tuple_id tid = 0; tid < *static_cast<const tuple_id*>(sub_block_memory_); ++tid) { if (comp(code, static_cast<const uint32_t*>(attr_stripe)[tid])) { matches->set(tid); } } break; default: FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed " "attribute ID " << attr_id << " in CompressedColumnStoreTupleStorageSubBlock::getCodesSatisfyingComparison()"); } if (filter != nullptr) { matches->intersectWith(*filter); } } else { switch (compression_info_.attribute_size(attr_id)) { case 1: for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { if (comp(code, static_cast<const uint8_t*>(attr_stripe)[*filter_it])) { matches->set(*filter_it); } } break; case 2: for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { if (comp(code, static_cast<const uint16_t*>(attr_stripe)[*filter_it])) { matches->set(*filter_it); } } break; case 4: for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { if (comp(code, static_cast<const uint32_t*>(attr_stripe)[*filter_it])) { matches->set(*filter_it); } } break; default: FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed " "attribute ID " << attr_id << " in CompressedColumnStoreTupleStorageSubBlock::getCodesSatisfyingComparison()"); } } return matches; }
TupleIdSequence* CompressedColumnStoreTupleStorageSubBlock::getCodesInRange( const attribute_id attr_id, const std::pair<std::uint32_t, std::uint32_t> range, const TupleIdSequence *filter) const { #ifdef QUICKSTEP_ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT static constexpr bool short_circuit = true; #else static constexpr bool short_circuit = false; #endif TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_)); if (attr_id == sort_column_id_) { // Special (fast) case: do a binary search of the sort column. pair<tuple_id, tuple_id> tuple_range = getCompressedSortColumnRange(range); matches->setRange(tuple_range.first, tuple_range.second - tuple_range.first, true); if (filter != nullptr) { matches->intersectWith(*filter); } } else { const void *attr_stripe = column_stripes_[attr_id]; if (!short_circuit || (filter == nullptr)) { switch (compression_info_.attribute_size(attr_id)) { case 1: for (tuple_id tid = 0; tid < *static_cast<const tuple_id*>(sub_block_memory_); ++tid) { if (range.first <= (static_cast<const uint8_t*>(attr_stripe)[tid]) && (static_cast<const uint8_t*>(attr_stripe)[tid] < range.second)) { matches->set(tid); } } break; case 2: for (tuple_id tid = 0; tid < *static_cast<const tuple_id*>(sub_block_memory_); ++tid) { if (range.first <= (static_cast<const uint16_t*>(attr_stripe)[tid]) && (static_cast<const uint16_t*>(attr_stripe)[tid] < range.second)) { matches->set(tid); } } break; case 4: for (tuple_id tid = 0; tid < *static_cast<const tuple_id*>(sub_block_memory_); ++tid) { if (range.first <= (static_cast<const uint32_t*>(attr_stripe)[tid]) && (static_cast<const uint32_t*>(attr_stripe)[tid] < range.second)) { matches->set(tid); } } break; default: FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed " "attribute ID " << attr_id << " in CompressedColumnStoreTupleStorageSubBlock::getCodesInRange()"); } if (filter != nullptr) { matches->intersectWith(*filter); } } else { switch (compression_info_.attribute_size(attr_id)) { case 1: for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { if (range.first <= (static_cast<const uint8_t*>(attr_stripe)[*filter_it]) && (static_cast<const uint8_t*>(attr_stripe)[*filter_it] < range.second)) { matches->set(*filter_it); } } break; case 2: for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { if (range.first <= (static_cast<const uint16_t*>(attr_stripe)[*filter_it]) && (static_cast<const uint16_t*>(attr_stripe)[*filter_it] < range.second)) { matches->set(*filter_it); } } break; case 4: for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { if (range.first <= (static_cast<const uint32_t*>(attr_stripe)[*filter_it]) && (static_cast<const uint32_t*>(attr_stripe)[*filter_it] < range.second)) { matches->set(*filter_it); } } break; default: FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed " "attribute ID " << attr_id << " in CompressedColumnStoreTupleStorageSubBlock::getCodesInRange()"); } } } return matches; }
TupleIdSequence* CompressedColumnStoreTupleStorageSubBlock::getNotEqualCodesExcludingNull( const attribute_id attr_id, const std::uint32_t code, const std::uint32_t null_code, const TupleIdSequence *filter) const { #ifdef QUICKSTEP_ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT static constexpr bool short_circuit = true; #else static constexpr bool short_circuit = false; #endif if (attr_id == sort_column_id_) { // Special (fast) case: do a binary search of the sort column. pair<uint32_t, uint32_t> code_range(code, code + 1); // Adjust the upper limit if doing so can avoid an extra binary search. if (dictionary_coded_attributes_[attr_id]) { if (code_range.second == compressedGetDictionary(attr_id).numberOfCodes()) { code_range.second = numeric_limits<uint32_t>::max(); } } else if (code_range.first == GetMaxTruncatedValue(compression_info_.attribute_size(attr_id))) { code_range.second = numeric_limits<uint32_t>::max(); } pair<tuple_id, tuple_id> tuple_range = getCompressedSortColumnRange(code_range); // Search for the beginning of nulls in the sort column. pair<uint32_t, uint32_t> null_range(null_code, numeric_limits<uint32_t>::max()); pair<tuple_id, tuple_id> limit_range = getCompressedSortColumnRange(null_range); // We searched for the range of equal codes, so return its complement. TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_)); matches->setRange(0, tuple_range.first, true); matches->setRange(tuple_range.second, limit_range.first - tuple_range.second, true); if (filter != nullptr) { matches->intersectWith(*filter); } return matches; } else { TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_)); const void *attr_stripe = column_stripes_[attr_id]; if (!short_circuit || (filter == nullptr)) { switch (compression_info_.attribute_size(attr_id)) { case 1: for (tuple_id tid = 0; tid < *static_cast<const tuple_id*>(sub_block_memory_); ++tid) { if ((code != static_cast<const uint8_t*>(attr_stripe)[tid]) && (null_code != static_cast<const uint8_t*>(attr_stripe)[tid])) { matches->set(tid); } } break; case 2: for (tuple_id tid = 0; tid < *static_cast<const tuple_id*>(sub_block_memory_); ++tid) { if ((code != static_cast<const uint16_t*>(attr_stripe)[tid]) && (null_code != static_cast<const uint16_t*>(attr_stripe)[tid])) { matches->set(tid); } } break; case 4: for (tuple_id tid = 0; tid <= *static_cast<const tuple_id*>(sub_block_memory_); ++tid) { if ((code != static_cast<const uint16_t*>(attr_stripe)[tid]) && (null_code != static_cast<const uint16_t*>(attr_stripe)[tid])) { matches->set(tid); } } break; default: FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed " "attribute ID " << attr_id << " in CompressedColumnStoreTupleStorageSubBlock::getNotEqualCodesExcludingNull()"); } if (filter != nullptr) { matches->intersectWith(*filter); } } else { switch (compression_info_.attribute_size(attr_id)) { case 1: for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { if ((code != static_cast<const uint8_t*>(attr_stripe)[*filter_it]) && (null_code != static_cast<const uint8_t*>(attr_stripe)[*filter_it])) { matches->set(*filter_it); } } break; case 2: for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { if ((code != static_cast<const uint16_t*>(attr_stripe)[*filter_it]) && (null_code != static_cast<const uint16_t*>(attr_stripe)[*filter_it])) { matches->set(*filter_it); } } break; case 4: for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { if ((code != static_cast<const uint16_t*>(attr_stripe)[*filter_it]) && (null_code != static_cast<const uint16_t*>(attr_stripe)[*filter_it])) { matches->set(*filter_it); } } break; default: FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed " "attribute ID " << attr_id << " in CompressedColumnStoreTupleStorageSubBlock::getNotEqualCodesExcludingNull()"); } } return matches; } }
TupleIdSequence* CompressedPackedRowStoreTupleStorageSubBlock::getCodesInRange( const attribute_id attr_id, const std::pair<std::uint32_t, std::uint32_t> range, const TupleIdSequence *filter) const { #ifdef QUICKSTEP_ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT static constexpr bool short_circuit = true; #else static constexpr bool short_circuit = false; #endif TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_)); const char *attr_location = static_cast<const char*>(tuple_storage_) + attribute_offsets_[attr_id]; if (!short_circuit || (filter == nullptr)) { switch (compression_info_.attribute_size(attr_id)) { case 1: for (tuple_id tid = 0; tid < *static_cast<const tuple_id*>(sub_block_memory_); ++tid, attr_location += tuple_length_bytes_) { if (range.first <= (*reinterpret_cast<const uint8_t*>(attr_location)) && (*reinterpret_cast<const uint8_t*>(attr_location) < range.second)) { matches->set(tid); } } break; case 2: for (tuple_id tid = 0; tid < *static_cast<const tuple_id*>(sub_block_memory_); ++tid, attr_location += tuple_length_bytes_) { if (range.first <= (*reinterpret_cast<const uint16_t*>(attr_location)) && (*reinterpret_cast<const uint16_t*>(attr_location) < range.second)) { matches->set(tid); } } break; case 4: for (tuple_id tid = 0; tid < *static_cast<const tuple_id*>(sub_block_memory_); ++tid, attr_location += tuple_length_bytes_) { if (range.first <= (*reinterpret_cast<const uint32_t*>(attr_location)) && (*reinterpret_cast<const uint32_t*>(attr_location) < range.second)) { matches->set(tid); } } break; default: FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed " "attribute ID " << attr_id << " in CompressedPackedRowStoreTupleStorageSubBlock::getCodesInRange()"); } if (filter != nullptr) { matches->intersectWith(*filter); } } else { switch (compression_info_.attribute_size(attr_id)) { case 1: for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { const void *local_attr_location = attr_location + (*filter_it) * tuple_length_bytes_; if (range.first <= (*reinterpret_cast<const uint8_t*>(local_attr_location)) && (*reinterpret_cast<const uint8_t*>(local_attr_location) < range.second)) { matches->set(*filter_it); } } break; case 2: for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { const void *local_attr_location = attr_location + (*filter_it) * tuple_length_bytes_; if (range.first <= (*reinterpret_cast<const uint16_t*>(local_attr_location)) && (*reinterpret_cast<const uint16_t*>(local_attr_location) < range.second)) { matches->set(*filter_it); } } break; case 4: for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { const void *local_attr_location = attr_location + (*filter_it) * tuple_length_bytes_; if (range.first <= (*reinterpret_cast<const uint32_t*>(local_attr_location)) && (*reinterpret_cast<const uint32_t*>(local_attr_location) < range.second)) { matches->set(*filter_it); } } break; default: FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed " "attribute ID " << attr_id << " in CompressedPackedRowStoreTupleStorageSubBlock::getCodesInRange()"); } } return matches; }
TupleIdSequence* PatternMatchingUncheckedComparator<is_like_pattern, is_negation, left_nullable, right_nullable> ::compareColumnVectorAndStaticValue( const ColumnVector &column_vector, const TypedValue &static_value, const TupleIdSequence *filter, const TupleIdSequence *existence_bitmap) const { // Specialized implementation for matching a ColumnVector of strings to a // single pattern. In this situation, the pattern will be compiled only once // in advance and then matched by each of the string in the ColumnVector. return InvokeOnColumnVector( column_vector, [&](const auto &column_vector) -> TupleIdSequence* { // NOLINT(build/c++11) #ifdef QUICKSTEP_ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT static constexpr bool short_circuit = true; #else static constexpr bool short_circuit = false; #endif DCHECK((existence_bitmap == nullptr) || (existence_bitmap->numTuples() == column_vector.size())); DCHECK((filter == nullptr) || ((existence_bitmap == nullptr) ? (filter->length() == column_vector.size()) : (filter->length() == existence_bitmap->length()))); TupleIdSequence *result = new TupleIdSequence( (existence_bitmap == nullptr) ? column_vector.size() : existence_bitmap->length()); if (right_nullable && static_value.isNull()) { return result; } // Transform and compile pattern in advance before the loop. const char *pattern = static_cast<const char *>(static_value.getDataPtr()); std::string regex_pattern; re2::StringPiece pattern_piece; std::size_t orig_pattern_len = strnlen(pattern, right_length_); if (is_like_pattern) { regex_pattern = this->transformLikeToRegex(pattern, orig_pattern_len); pattern_piece.set(regex_pattern.c_str(), regex_pattern.size()); } else { pattern_piece.set(pattern, orig_pattern_len); } const re2::RE2 re2_pattern(pattern_piece); if (short_circuit && (filter != nullptr)) { if (existence_bitmap != nullptr) { TupleIdSequence::const_iterator existence_it = existence_bitmap->begin(); for (std::size_t cv_pos = 0; cv_pos < column_vector.size(); ++cv_pos) { if (filter->get(*existence_it)) { const void *cv_value = column_vector.template getUntypedValue<left_nullable>(cv_pos); result->set(*existence_it, !(left_nullable && (cv_value == nullptr)) && this->matchDataPtrWithPattern(cv_value, re2_pattern)); } ++existence_it; } } else { for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { const void *cv_value = column_vector.template getUntypedValue<left_nullable>(*filter_it); result->set(*filter_it, !(left_nullable && (cv_value == nullptr)) && this->matchDataPtrWithPattern(cv_value, re2_pattern)); } } } else { if (existence_bitmap != nullptr) { TupleIdSequence::const_iterator existence_it = existence_bitmap->begin(); for (std::size_t cv_pos = 0; cv_pos < column_vector.size(); ++cv_pos) { const void *cv_value = column_vector.template getUntypedValue<left_nullable>(cv_pos); result->set(*existence_it, !(left_nullable && (cv_value == nullptr)) && this->matchDataPtrWithPattern(cv_value, re2_pattern)); ++existence_it; } } else { for (std::size_t pos = 0; pos < column_vector.size(); ++pos) { const void *cv_value = column_vector.template getUntypedValue<left_nullable>(pos); result->set(pos, !(left_nullable && (cv_value == nullptr)) && this->matchDataPtrWithPattern(cv_value, re2_pattern)); } } if (!short_circuit && (filter != nullptr)) { result->intersectWith(*filter); } } return result; }); }
TupleIdSequence* PatternMatchingUncheckedComparator<is_like_pattern, is_negation, left_nullable, right_nullable> ::compareValueAccessorAndStaticValue( ValueAccessor *accessor, const attribute_id value_accessor_attr_id, const TypedValue &static_value, const TupleIdSequence *filter) const { // Specialized implementation for matching a ValueAccessor of strings to a // single pattern. In this situation, the pattern will be compiled only once // in advance and then matched by each of the string in the ValueAccessor. return InvokeOnValueAccessorMaybeTupleIdSequenceAdapter( accessor, [&](auto *accessor) -> TupleIdSequence* { // NOLINT(build/c++11) #ifdef QUICKSTEP_ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT static constexpr bool short_circuit = true; #else static constexpr bool short_circuit = false; #endif TupleIdSequence *result = new TupleIdSequence(accessor->getEndPosition()); if (right_nullable && static_value.isNull()) { return result; } // Transform and compile pattern in advance before the loop. const char *pattern = static_cast<const char *>(static_value.getDataPtr()); std::string regex_pattern; re2::StringPiece pattern_piece; std::size_t orig_pattern_len = strnlen(pattern, right_length_); if (is_like_pattern) { regex_pattern = this->transformLikeToRegex(pattern, orig_pattern_len); pattern_piece.set(regex_pattern.c_str(), regex_pattern.size()); } else { pattern_piece.set(pattern, orig_pattern_len); } const re2::RE2 re2_pattern(pattern_piece); if (short_circuit && (filter != nullptr)) { DCHECK_EQ(filter->length(), result->length()); for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { const void *va_value = accessor->template getUntypedValueAtAbsolutePosition<left_nullable>(value_accessor_attr_id, *filter_it); result->set(*filter_it, this->matchDataPtrWithPattern(va_value, re2_pattern)); } } else { accessor->beginIteration(); if (accessor->isColumnAccessorSupported()) { // If ColumnAccessor is supported on the underlying accessor, we have a fast strided // column accessor available for the iteration on the underlying block. std::unique_ptr<const ColumnAccessor<left_nullable>> column_accessor (accessor->template getColumnAccessor<left_nullable>(value_accessor_attr_id)); DCHECK(column_accessor != nullptr); while (accessor->next()) { const void *va_value = column_accessor->getUntypedValue(); result->set(accessor->getCurrentPosition(), this->matchDataPtrWithPattern(va_value, re2_pattern)); } } else { while (accessor->next()) { const void *va_value = accessor->template getUntypedValue<left_nullable>(value_accessor_attr_id); result->set(accessor->getCurrentPosition(), this->matchDataPtrWithPattern(va_value, re2_pattern)); } } if (!short_circuit && (filter != nullptr)) { result->intersectWith(*filter); } } return result; }); }