TupleIdSequence* CompressedColumnStoreTupleStorageSubBlock::getMatchesForPredicate( const ComparisonPredicate &predicate, const TupleIdSequence *filter) const { if (dictionary_coded_attributes_[sort_column_id_] || truncated_attributes_[sort_column_id_]) { // NOTE(chasseur): The version from CompressedTupleStorageSubBlock will in // turn call getEqualCodes(), getNotEqualCodes(), or getCodesInRange() as // necessary for this block, which will use a fast binary search if // evaluating a predicate on the sort column. return CompressedTupleStorageSubBlock::getMatchesForPredicate(predicate, filter); } else { TupleIdSequence *matches = SortColumnPredicateEvaluator::EvaluatePredicateForUncompressedSortColumn( predicate, relation_, sort_column_id_, column_stripes_[sort_column_id_], *static_cast<const tuple_id*>(sub_block_memory_) - uncompressed_nulls_in_sort_column_); if (matches == nullptr) { // TODO(chasseur): There is considerable duplication of effort in // SortColumnPredicateEvaluator::EvaluatePredicateForUncompressedSortColumn() // and CompressedTupleStorageSubBlock::getMatchesForPredicate() which we // should try to eliminate. return CompressedTupleStorageSubBlock::getMatchesForPredicate(predicate, filter); } else { if (filter != nullptr) { matches->intersectWith(*filter); } return matches; } } }
TupleIdSequence* CompressedColumnStoreTupleStorageSubBlock::getNotEqualCodes( const attribute_id attr_id, const std::uint32_t code, const TupleIdSequence *filter) const { if (attr_id == sort_column_id_) { // Special (fast) case: do a binary search of the sort column. pair<uint32_t, uint32_t> code_range(code, code + 1); // Adjust the upper limit if doing so can avoid an extra binary search. if (dictionary_coded_attributes_[attr_id]) { if (code_range.second == compressedGetDictionary(attr_id).numberOfCodes()) { code_range.second = numeric_limits<uint32_t>::max(); } } else if (code_range.first == GetMaxTruncatedValue(compression_info_.attribute_size(attr_id))) { code_range.second = numeric_limits<uint32_t>::max(); } pair<tuple_id, tuple_id> tuple_range = getCompressedSortColumnRange(code_range); // We searched for the range of equal codes, so return its complement. TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_)); matches->setRange(0, tuple_range.first, true); matches->setRange(tuple_range.second, *static_cast<const tuple_id*>(sub_block_memory_) - tuple_range.second, true); if (filter != nullptr) { matches->intersectWith(*filter); } return matches; } else { return getCodesSatisfyingComparison<not_equal_to>(attr_id, code, filter); } }
TupleIdSequence* TupleStorageSubBlock::getMatchesForPredicate(const Predicate *pred) const { TupleIdSequence *matches = new TupleIdSequence(); tuple_id max_tid = getMaxTupleID(); if (pred == NULL) { if (isPacked()) { for (tuple_id tid = 0; tid <= max_tid; ++tid) { matches->append(tid); } } else { for (tuple_id tid = 0; tid <= max_tid; ++tid) { if (hasTupleWithID(tid)) { matches->append(tid); } } } } else { if (isPacked()) { for (tuple_id tid = 0; tid <= max_tid; ++tid) { if (pred->matchesForSingleTuple(*this, tid)) { matches->append(tid); } } } else { for (tuple_id tid = 0; tid <= max_tid; ++tid) { if (hasTupleWithID(tid) && (pred->matchesForSingleTuple(*this, tid))) { matches->append(tid); } } } } return matches; }
TupleIdSequence* CompressedColumnStoreTupleStorageSubBlock::getGreaterOrEqualCodes( const attribute_id attr_id, const std::uint32_t code, const TupleIdSequence *filter) const { if (attr_id == sort_column_id_) { // Special (fast) case: do a binary search of the sort column. TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_)); pair<tuple_id, tuple_id> tuple_range = getCompressedSortColumnRange(pair<uint32_t, uint32_t>(code, numeric_limits<uint32_t>::max())); matches->setRange(tuple_range.first, tuple_range.second - tuple_range.first, true); if (filter != nullptr) { matches->intersectWith(*filter); } return matches; } else { return getCodesSatisfyingComparison<less_equal>(attr_id, code, filter); } }
TupleIdSequence* CompressedPackedRowStoreTupleStorageSubBlock::getCodesInRange( const attribute_id attr_id, const std::pair<std::uint32_t, std::uint32_t> range) const { TupleIdSequence *matches = new TupleIdSequence(); const char *attr_location = static_cast<const char*>(tuple_storage_) + attribute_offsets_[attr_id]; switch (compression_info_.attribute_size(attr_id)) { case 1: for (tuple_id tid = 0; tid < *static_cast<const tuple_id*>(sub_block_memory_); ++tid, attr_location += tuple_length_bytes_) { if (range.first <= (*reinterpret_cast<const uint8_t*>(attr_location)) && (*reinterpret_cast<const uint8_t*>(attr_location) < range.second)) { matches->append(tid); } } break; case 2: for (tuple_id tid = 0; tid < *static_cast<const tuple_id*>(sub_block_memory_); ++tid, attr_location += tuple_length_bytes_) { if (range.first <= (*reinterpret_cast<const uint16_t*>(attr_location)) && (*reinterpret_cast<const uint16_t*>(attr_location) < range.second)) { matches->append(tid); } } break; case 4: for (tuple_id tid = 0; tid < *static_cast<const tuple_id*>(sub_block_memory_); ++tid, attr_location += tuple_length_bytes_) { if (range.first <= (*reinterpret_cast<const uint32_t*>(attr_location)) && (*reinterpret_cast<const uint32_t*>(attr_location) < range.second)) { matches->append(tid); } } break; default: FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed " "attribute ID " << attr_id << " in CompressedPackedRowStoreTupleStorageSubBlock::getCodesInRange()"); } return matches; }
TupleIdSequence* CompressedColumnStoreTupleStorageSubBlock::getCodesSatisfyingComparison( const attribute_id attr_id, const std::uint32_t code, const TupleIdSequence *filter) const { #ifdef QUICKSTEP_ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT static constexpr bool short_circuit = true; #else static constexpr bool short_circuit = false; #endif comparison_functor<uint32_t> comp; TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_)); const void *attr_stripe = column_stripes_[attr_id]; if (!short_circuit || (filter == nullptr)) { switch (compression_info_.attribute_size(attr_id)) { case 1: for (tuple_id tid = 0; tid < *static_cast<const tuple_id*>(sub_block_memory_); ++tid) { if (comp(code, static_cast<const uint8_t*>(attr_stripe)[tid])) { matches->set(tid); } } break; case 2: for (tuple_id tid = 0; tid < *static_cast<const tuple_id*>(sub_block_memory_); ++tid) { if (comp(code, static_cast<const uint16_t*>(attr_stripe)[tid])) { matches->set(tid); } } break; case 4: for (tuple_id tid = 0; tid < *static_cast<const tuple_id*>(sub_block_memory_); ++tid) { if (comp(code, static_cast<const uint32_t*>(attr_stripe)[tid])) { matches->set(tid); } } break; default: FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed " "attribute ID " << attr_id << " in CompressedColumnStoreTupleStorageSubBlock::getCodesSatisfyingComparison()"); } if (filter != nullptr) { matches->intersectWith(*filter); } } else { switch (compression_info_.attribute_size(attr_id)) { case 1: for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { if (comp(code, static_cast<const uint8_t*>(attr_stripe)[*filter_it])) { matches->set(*filter_it); } } break; case 2: for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { if (comp(code, static_cast<const uint16_t*>(attr_stripe)[*filter_it])) { matches->set(*filter_it); } } break; case 4: for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { if (comp(code, static_cast<const uint32_t*>(attr_stripe)[*filter_it])) { matches->set(*filter_it); } } break; default: FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed " "attribute ID " << attr_id << " in CompressedColumnStoreTupleStorageSubBlock::getCodesSatisfyingComparison()"); } } return matches; }
TupleIdSequence* CompressedColumnStoreTupleStorageSubBlock::getCodesInRange( const attribute_id attr_id, const std::pair<std::uint32_t, std::uint32_t> range, const TupleIdSequence *filter) const { #ifdef QUICKSTEP_ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT static constexpr bool short_circuit = true; #else static constexpr bool short_circuit = false; #endif TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_)); if (attr_id == sort_column_id_) { // Special (fast) case: do a binary search of the sort column. pair<tuple_id, tuple_id> tuple_range = getCompressedSortColumnRange(range); matches->setRange(tuple_range.first, tuple_range.second - tuple_range.first, true); if (filter != nullptr) { matches->intersectWith(*filter); } } else { const void *attr_stripe = column_stripes_[attr_id]; if (!short_circuit || (filter == nullptr)) { switch (compression_info_.attribute_size(attr_id)) { case 1: for (tuple_id tid = 0; tid < *static_cast<const tuple_id*>(sub_block_memory_); ++tid) { if (range.first <= (static_cast<const uint8_t*>(attr_stripe)[tid]) && (static_cast<const uint8_t*>(attr_stripe)[tid] < range.second)) { matches->set(tid); } } break; case 2: for (tuple_id tid = 0; tid < *static_cast<const tuple_id*>(sub_block_memory_); ++tid) { if (range.first <= (static_cast<const uint16_t*>(attr_stripe)[tid]) && (static_cast<const uint16_t*>(attr_stripe)[tid] < range.second)) { matches->set(tid); } } break; case 4: for (tuple_id tid = 0; tid < *static_cast<const tuple_id*>(sub_block_memory_); ++tid) { if (range.first <= (static_cast<const uint32_t*>(attr_stripe)[tid]) && (static_cast<const uint32_t*>(attr_stripe)[tid] < range.second)) { matches->set(tid); } } break; default: FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed " "attribute ID " << attr_id << " in CompressedColumnStoreTupleStorageSubBlock::getCodesInRange()"); } if (filter != nullptr) { matches->intersectWith(*filter); } } else { switch (compression_info_.attribute_size(attr_id)) { case 1: for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { if (range.first <= (static_cast<const uint8_t*>(attr_stripe)[*filter_it]) && (static_cast<const uint8_t*>(attr_stripe)[*filter_it] < range.second)) { matches->set(*filter_it); } } break; case 2: for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { if (range.first <= (static_cast<const uint16_t*>(attr_stripe)[*filter_it]) && (static_cast<const uint16_t*>(attr_stripe)[*filter_it] < range.second)) { matches->set(*filter_it); } } break; case 4: for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { if (range.first <= (static_cast<const uint32_t*>(attr_stripe)[*filter_it]) && (static_cast<const uint32_t*>(attr_stripe)[*filter_it] < range.second)) { matches->set(*filter_it); } } break; default: FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed " "attribute ID " << attr_id << " in CompressedColumnStoreTupleStorageSubBlock::getCodesInRange()"); } } } return matches; }
TupleIdSequence* CompressedColumnStoreTupleStorageSubBlock::getNotEqualCodesExcludingNull( const attribute_id attr_id, const std::uint32_t code, const std::uint32_t null_code, const TupleIdSequence *filter) const { #ifdef QUICKSTEP_ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT static constexpr bool short_circuit = true; #else static constexpr bool short_circuit = false; #endif if (attr_id == sort_column_id_) { // Special (fast) case: do a binary search of the sort column. pair<uint32_t, uint32_t> code_range(code, code + 1); // Adjust the upper limit if doing so can avoid an extra binary search. if (dictionary_coded_attributes_[attr_id]) { if (code_range.second == compressedGetDictionary(attr_id).numberOfCodes()) { code_range.second = numeric_limits<uint32_t>::max(); } } else if (code_range.first == GetMaxTruncatedValue(compression_info_.attribute_size(attr_id))) { code_range.second = numeric_limits<uint32_t>::max(); } pair<tuple_id, tuple_id> tuple_range = getCompressedSortColumnRange(code_range); // Search for the beginning of nulls in the sort column. pair<uint32_t, uint32_t> null_range(null_code, numeric_limits<uint32_t>::max()); pair<tuple_id, tuple_id> limit_range = getCompressedSortColumnRange(null_range); // We searched for the range of equal codes, so return its complement. TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_)); matches->setRange(0, tuple_range.first, true); matches->setRange(tuple_range.second, limit_range.first - tuple_range.second, true); if (filter != nullptr) { matches->intersectWith(*filter); } return matches; } else { TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_)); const void *attr_stripe = column_stripes_[attr_id]; if (!short_circuit || (filter == nullptr)) { switch (compression_info_.attribute_size(attr_id)) { case 1: for (tuple_id tid = 0; tid < *static_cast<const tuple_id*>(sub_block_memory_); ++tid) { if ((code != static_cast<const uint8_t*>(attr_stripe)[tid]) && (null_code != static_cast<const uint8_t*>(attr_stripe)[tid])) { matches->set(tid); } } break; case 2: for (tuple_id tid = 0; tid < *static_cast<const tuple_id*>(sub_block_memory_); ++tid) { if ((code != static_cast<const uint16_t*>(attr_stripe)[tid]) && (null_code != static_cast<const uint16_t*>(attr_stripe)[tid])) { matches->set(tid); } } break; case 4: for (tuple_id tid = 0; tid <= *static_cast<const tuple_id*>(sub_block_memory_); ++tid) { if ((code != static_cast<const uint16_t*>(attr_stripe)[tid]) && (null_code != static_cast<const uint16_t*>(attr_stripe)[tid])) { matches->set(tid); } } break; default: FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed " "attribute ID " << attr_id << " in CompressedColumnStoreTupleStorageSubBlock::getNotEqualCodesExcludingNull()"); } if (filter != nullptr) { matches->intersectWith(*filter); } } else { switch (compression_info_.attribute_size(attr_id)) { case 1: for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { if ((code != static_cast<const uint8_t*>(attr_stripe)[*filter_it]) && (null_code != static_cast<const uint8_t*>(attr_stripe)[*filter_it])) { matches->set(*filter_it); } } break; case 2: for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { if ((code != static_cast<const uint16_t*>(attr_stripe)[*filter_it]) && (null_code != static_cast<const uint16_t*>(attr_stripe)[*filter_it])) { matches->set(*filter_it); } } break; case 4: for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { if ((code != static_cast<const uint16_t*>(attr_stripe)[*filter_it]) && (null_code != static_cast<const uint16_t*>(attr_stripe)[*filter_it])) { matches->set(*filter_it); } } break; default: FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed " "attribute ID " << attr_id << " in CompressedColumnStoreTupleStorageSubBlock::getNotEqualCodesExcludingNull()"); } } return matches; } }
/** * @brief Take the intersection of this TupleIdSequence with another's * complement (i.e. set difference), modifying this TupleIdSequence * in-place. * @warning This TupleIdSequence must be the same length as the other, and * the set-difference only has semantic meaning if both * TupleIdSequences refer to tuples in the same block. * * @param other Another TupleIdSequence to intersect with the complement of. **/ inline void intersectWithComplement(const TupleIdSequence &other) { DEBUG_ASSERT(length() == other.length()); internal_bitvector_.unsetFrom(other.internal_bitvector_); }
TupleIdSequence* CompressedPackedRowStoreTupleStorageSubBlock::getCodesInRange( const attribute_id attr_id, const std::pair<std::uint32_t, std::uint32_t> range, const TupleIdSequence *filter) const { #ifdef QUICKSTEP_ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT static constexpr bool short_circuit = true; #else static constexpr bool short_circuit = false; #endif TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_)); const char *attr_location = static_cast<const char*>(tuple_storage_) + attribute_offsets_[attr_id]; if (!short_circuit || (filter == nullptr)) { switch (compression_info_.attribute_size(attr_id)) { case 1: for (tuple_id tid = 0; tid < *static_cast<const tuple_id*>(sub_block_memory_); ++tid, attr_location += tuple_length_bytes_) { if (range.first <= (*reinterpret_cast<const uint8_t*>(attr_location)) && (*reinterpret_cast<const uint8_t*>(attr_location) < range.second)) { matches->set(tid); } } break; case 2: for (tuple_id tid = 0; tid < *static_cast<const tuple_id*>(sub_block_memory_); ++tid, attr_location += tuple_length_bytes_) { if (range.first <= (*reinterpret_cast<const uint16_t*>(attr_location)) && (*reinterpret_cast<const uint16_t*>(attr_location) < range.second)) { matches->set(tid); } } break; case 4: for (tuple_id tid = 0; tid < *static_cast<const tuple_id*>(sub_block_memory_); ++tid, attr_location += tuple_length_bytes_) { if (range.first <= (*reinterpret_cast<const uint32_t*>(attr_location)) && (*reinterpret_cast<const uint32_t*>(attr_location) < range.second)) { matches->set(tid); } } break; default: FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed " "attribute ID " << attr_id << " in CompressedPackedRowStoreTupleStorageSubBlock::getCodesInRange()"); } if (filter != nullptr) { matches->intersectWith(*filter); } } else { switch (compression_info_.attribute_size(attr_id)) { case 1: for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { const void *local_attr_location = attr_location + (*filter_it) * tuple_length_bytes_; if (range.first <= (*reinterpret_cast<const uint8_t*>(local_attr_location)) && (*reinterpret_cast<const uint8_t*>(local_attr_location) < range.second)) { matches->set(*filter_it); } } break; case 2: for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { const void *local_attr_location = attr_location + (*filter_it) * tuple_length_bytes_; if (range.first <= (*reinterpret_cast<const uint16_t*>(local_attr_location)) && (*reinterpret_cast<const uint16_t*>(local_attr_location) < range.second)) { matches->set(*filter_it); } } break; case 4: for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { const void *local_attr_location = attr_location + (*filter_it) * tuple_length_bytes_; if (range.first <= (*reinterpret_cast<const uint32_t*>(local_attr_location)) && (*reinterpret_cast<const uint32_t*>(local_attr_location) < range.second)) { matches->set(*filter_it); } } break; default: FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed " "attribute ID " << attr_id << " in CompressedPackedRowStoreTupleStorageSubBlock::getCodesInRange()"); } } return matches; }
TupleIdSequence* PatternMatchingUncheckedComparator<is_like_pattern, is_negation, left_nullable, right_nullable> ::compareColumnVectorAndStaticValue( const ColumnVector &column_vector, const TypedValue &static_value, const TupleIdSequence *filter, const TupleIdSequence *existence_bitmap) const { // Specialized implementation for matching a ColumnVector of strings to a // single pattern. In this situation, the pattern will be compiled only once // in advance and then matched by each of the string in the ColumnVector. return InvokeOnColumnVector( column_vector, [&](const auto &column_vector) -> TupleIdSequence* { // NOLINT(build/c++11) #ifdef QUICKSTEP_ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT static constexpr bool short_circuit = true; #else static constexpr bool short_circuit = false; #endif DCHECK((existence_bitmap == nullptr) || (existence_bitmap->numTuples() == column_vector.size())); DCHECK((filter == nullptr) || ((existence_bitmap == nullptr) ? (filter->length() == column_vector.size()) : (filter->length() == existence_bitmap->length()))); TupleIdSequence *result = new TupleIdSequence( (existence_bitmap == nullptr) ? column_vector.size() : existence_bitmap->length()); if (right_nullable && static_value.isNull()) { return result; } // Transform and compile pattern in advance before the loop. const char *pattern = static_cast<const char *>(static_value.getDataPtr()); std::string regex_pattern; re2::StringPiece pattern_piece; std::size_t orig_pattern_len = strnlen(pattern, right_length_); if (is_like_pattern) { regex_pattern = this->transformLikeToRegex(pattern, orig_pattern_len); pattern_piece.set(regex_pattern.c_str(), regex_pattern.size()); } else { pattern_piece.set(pattern, orig_pattern_len); } const re2::RE2 re2_pattern(pattern_piece); if (short_circuit && (filter != nullptr)) { if (existence_bitmap != nullptr) { TupleIdSequence::const_iterator existence_it = existence_bitmap->begin(); for (std::size_t cv_pos = 0; cv_pos < column_vector.size(); ++cv_pos) { if (filter->get(*existence_it)) { const void *cv_value = column_vector.template getUntypedValue<left_nullable>(cv_pos); result->set(*existence_it, !(left_nullable && (cv_value == nullptr)) && this->matchDataPtrWithPattern(cv_value, re2_pattern)); } ++existence_it; } } else { for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { const void *cv_value = column_vector.template getUntypedValue<left_nullable>(*filter_it); result->set(*filter_it, !(left_nullable && (cv_value == nullptr)) && this->matchDataPtrWithPattern(cv_value, re2_pattern)); } } } else { if (existence_bitmap != nullptr) { TupleIdSequence::const_iterator existence_it = existence_bitmap->begin(); for (std::size_t cv_pos = 0; cv_pos < column_vector.size(); ++cv_pos) { const void *cv_value = column_vector.template getUntypedValue<left_nullable>(cv_pos); result->set(*existence_it, !(left_nullable && (cv_value == nullptr)) && this->matchDataPtrWithPattern(cv_value, re2_pattern)); ++existence_it; } } else { for (std::size_t pos = 0; pos < column_vector.size(); ++pos) { const void *cv_value = column_vector.template getUntypedValue<left_nullable>(pos); result->set(pos, !(left_nullable && (cv_value == nullptr)) && this->matchDataPtrWithPattern(cv_value, re2_pattern)); } } if (!short_circuit && (filter != nullptr)) { result->intersectWith(*filter); } } return result; }); }
TupleIdSequence* PatternMatchingUncheckedComparator<is_like_pattern, is_negation, left_nullable, right_nullable> ::compareValueAccessorAndStaticValue( ValueAccessor *accessor, const attribute_id value_accessor_attr_id, const TypedValue &static_value, const TupleIdSequence *filter) const { // Specialized implementation for matching a ValueAccessor of strings to a // single pattern. In this situation, the pattern will be compiled only once // in advance and then matched by each of the string in the ValueAccessor. return InvokeOnValueAccessorMaybeTupleIdSequenceAdapter( accessor, [&](auto *accessor) -> TupleIdSequence* { // NOLINT(build/c++11) #ifdef QUICKSTEP_ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT static constexpr bool short_circuit = true; #else static constexpr bool short_circuit = false; #endif TupleIdSequence *result = new TupleIdSequence(accessor->getEndPosition()); if (right_nullable && static_value.isNull()) { return result; } // Transform and compile pattern in advance before the loop. const char *pattern = static_cast<const char *>(static_value.getDataPtr()); std::string regex_pattern; re2::StringPiece pattern_piece; std::size_t orig_pattern_len = strnlen(pattern, right_length_); if (is_like_pattern) { regex_pattern = this->transformLikeToRegex(pattern, orig_pattern_len); pattern_piece.set(regex_pattern.c_str(), regex_pattern.size()); } else { pattern_piece.set(pattern, orig_pattern_len); } const re2::RE2 re2_pattern(pattern_piece); if (short_circuit && (filter != nullptr)) { DCHECK_EQ(filter->length(), result->length()); for (TupleIdSequence::const_iterator filter_it = filter->begin(); filter_it != filter->end(); ++filter_it) { const void *va_value = accessor->template getUntypedValueAtAbsolutePosition<left_nullable>(value_accessor_attr_id, *filter_it); result->set(*filter_it, this->matchDataPtrWithPattern(va_value, re2_pattern)); } } else { accessor->beginIteration(); if (accessor->isColumnAccessorSupported()) { // If ColumnAccessor is supported on the underlying accessor, we have a fast strided // column accessor available for the iteration on the underlying block. std::unique_ptr<const ColumnAccessor<left_nullable>> column_accessor (accessor->template getColumnAccessor<left_nullable>(value_accessor_attr_id)); DCHECK(column_accessor != nullptr); while (accessor->next()) { const void *va_value = column_accessor->getUntypedValue(); result->set(accessor->getCurrentPosition(), this->matchDataPtrWithPattern(va_value, re2_pattern)); } } else { while (accessor->next()) { const void *va_value = accessor->template getUntypedValue<left_nullable>(value_accessor_attr_id); result->set(accessor->getCurrentPosition(), this->matchDataPtrWithPattern(va_value, re2_pattern)); } } if (!short_circuit && (filter != nullptr)) { result->intersectWith(*filter); } } return result; }); }
TupleIdSequence* SortColumnPredicateEvaluator::EvaluatePredicateForUncompressedSortColumn( const Predicate &predicate, const CatalogRelation &relation, const attribute_id sort_attribute_id, void *sort_attribute_stripe, const tuple_id num_tuples) { // Determine if the predicate is a comparison of the sort column with a literal. if (predicate.isAttributeLiteralComparisonPredicate()) { const ComparisonPredicate &comparison_predicate = static_cast<const ComparisonPredicate&>(predicate); const CatalogAttribute *comparison_attribute = NULL; bool left_literal = false; if (comparison_predicate.getLeftOperand().hasStaticValue()) { DEBUG_ASSERT(comparison_predicate.getRightOperand().getDataSource() == Scalar::kAttribute); comparison_attribute = &(static_cast<const ScalarAttribute&>(comparison_predicate.getRightOperand()).getAttribute()); left_literal = true; } else { DEBUG_ASSERT(comparison_predicate.getLeftOperand().getDataSource() == Scalar::kAttribute); comparison_attribute = &(static_cast<const ScalarAttribute&>(comparison_predicate.getLeftOperand()).getAttribute()); left_literal = false; } DEBUG_ASSERT(comparison_attribute->getParent().getID() == relation.getID()); if (comparison_attribute->getID() == sort_attribute_id) { const LiteralTypeInstance* comparison_literal; if (left_literal) { comparison_literal = &(comparison_predicate.getLeftOperand().getStaticValue()); } else { comparison_literal = &(comparison_predicate.getRightOperand().getStaticValue()); } // NOTE(chasseur): A standards-compliant implementation of lower_bound // always compares the iterator on the left with the literal on the right, // while upper_bound compares the literal on the left with the iterator // on the right. These will work even if comparison_attribute and // comparison_literal are different types. const Comparison &less_comparison = Comparison::GetComparison(Comparison::kLess); ScopedPtr<UncheckedComparator> fast_comparator_lower( less_comparison.makeUncheckedComparatorForTypes(comparison_attribute->getType(), comparison_literal->getType())); STLUncheckedComparatorWrapper comp_lower(*fast_comparator_lower); ScopedPtr<UncheckedComparator> fast_comparator_upper( less_comparison.makeUncheckedComparatorForTypes(comparison_literal->getType(), comparison_attribute->getType())); STLUncheckedComparatorWrapper comp_upper(*fast_comparator_upper); // Find the bounds on the range of matching tuples. tuple_id min_match = 0; tuple_id max_match_bound = num_tuples; ColumnStripeIterator begin_it(sort_attribute_stripe, relation.getAttributeById(sort_attribute_id).getType().maximumByteLength(), 0); ColumnStripeIterator end_it(sort_attribute_stripe, relation.getAttributeById(sort_attribute_id).getType().maximumByteLength(), num_tuples); switch (comparison_predicate.getComparison().getComparisonID()) { case Comparison::kEqual: // Note: There is a special branch below for kNotEqual which takes the // complement of the matched range. case Comparison::kNotEqual: min_match = lower_bound(begin_it, end_it, comparison_literal->getDataPtr(), comp_lower).getTuplePosition(); max_match_bound = upper_bound(begin_it, end_it, comparison_literal->getDataPtr(), comp_upper).getTuplePosition(); break; case Comparison::kLess: if (left_literal) { min_match = upper_bound(begin_it, end_it, comparison_literal->getDataPtr(), comp_upper).getTuplePosition(); } else { max_match_bound = lower_bound(begin_it, end_it, comparison_literal->getDataPtr(), comp_lower).getTuplePosition(); } break; case Comparison::kLessOrEqual: if (left_literal) { min_match = lower_bound(begin_it, end_it, comparison_literal->getDataPtr(), comp_lower).getTuplePosition(); } else { max_match_bound = upper_bound(begin_it, end_it, comparison_literal->getDataPtr(), comp_upper).getTuplePosition(); } break; case Comparison::kGreater: if (left_literal) { max_match_bound = lower_bound(begin_it, end_it, comparison_literal->getDataPtr(), comp_lower).getTuplePosition(); } else { min_match = upper_bound(begin_it, end_it, comparison_literal->getDataPtr(), comp_upper).getTuplePosition(); } break; case Comparison::kGreaterOrEqual: if (left_literal) { max_match_bound = upper_bound(begin_it, end_it, comparison_literal->getDataPtr(), comp_upper).getTuplePosition(); } else { min_match = lower_bound(begin_it, end_it, comparison_literal->getDataPtr(), comp_lower).getTuplePosition(); } break; default: FATAL_ERROR("Unknown Comparison in SortColumnPredicateEvaluator::" "EvaluatePredicateForUncompressedSortColumn()"); } // Create and return the sequence of matches. TupleIdSequence *matches = new TupleIdSequence(); if (comparison_predicate.getComparison().getComparisonID() == Comparison::kNotEqual) { // Special case: return all tuples NOT in the range for kEqual. for (tuple_id tid = 0; tid < min_match; ++tid) { matches->append(tid); } for (tuple_id tid = max_match_bound; tid < num_tuples; ++tid) { matches->append(tid); } } else { for (tuple_id tid = min_match; tid < max_match_bound; ++tid) { matches->append(tid); } } return matches; } else { return NULL; } } else { // Can not evaluate a non-comparison predicate, so pass through. return NULL; } }