TupleIdSequence* CompressedColumnStoreTupleStorageSubBlock::getMatchesForPredicate(
    const ComparisonPredicate &predicate,
    const TupleIdSequence *filter) const {
  if (dictionary_coded_attributes_[sort_column_id_] || truncated_attributes_[sort_column_id_]) {
    // NOTE(chasseur): The version from CompressedTupleStorageSubBlock will in
    // turn call getEqualCodes(), getNotEqualCodes(), or getCodesInRange() as
    // necessary for this block, which will use a fast binary search if
    // evaluating a predicate on the sort column.
    return CompressedTupleStorageSubBlock::getMatchesForPredicate(predicate, filter);
  } else {
    TupleIdSequence *matches = SortColumnPredicateEvaluator::EvaluatePredicateForUncompressedSortColumn(
        predicate,
        relation_,
        sort_column_id_,
        column_stripes_[sort_column_id_],
        *static_cast<const tuple_id*>(sub_block_memory_) - uncompressed_nulls_in_sort_column_);
    if (matches == nullptr) {
      // TODO(chasseur): There is considerable duplication of effort in
      // SortColumnPredicateEvaluator::EvaluatePredicateForUncompressedSortColumn()
      // and CompressedTupleStorageSubBlock::getMatchesForPredicate() which we
      // should try to eliminate.
      return CompressedTupleStorageSubBlock::getMatchesForPredicate(predicate, filter);
    } else {
      if (filter != nullptr) {
        matches->intersectWith(*filter);
      }
      return matches;
    }
  }
}
TupleIdSequence* CompressedColumnStoreTupleStorageSubBlock::getNotEqualCodes(
    const attribute_id attr_id,
    const std::uint32_t code,
    const TupleIdSequence *filter) const {
  if (attr_id == sort_column_id_) {
    // Special (fast) case: do a binary search of the sort column.
    pair<uint32_t, uint32_t> code_range(code, code + 1);

    // Adjust the upper limit if doing so can avoid an extra binary search.
    if (dictionary_coded_attributes_[attr_id]) {
      if (code_range.second == compressedGetDictionary(attr_id).numberOfCodes()) {
        code_range.second = numeric_limits<uint32_t>::max();
      }
    } else if (code_range.first == GetMaxTruncatedValue(compression_info_.attribute_size(attr_id))) {
      code_range.second = numeric_limits<uint32_t>::max();
    }

    pair<tuple_id, tuple_id> tuple_range = getCompressedSortColumnRange(code_range);

    // We searched for the range of equal codes, so return its complement.
    TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_));
    matches->setRange(0, tuple_range.first, true);
    matches->setRange(tuple_range.second,
                      *static_cast<const tuple_id*>(sub_block_memory_) - tuple_range.second,
                      true);
    if (filter != nullptr) {
      matches->intersectWith(*filter);
    }
    return matches;
  } else {
    return getCodesSatisfyingComparison<not_equal_to>(attr_id, code, filter);
  }
}
TupleIdSequence* TupleStorageSubBlock::getMatchesForPredicate(const Predicate *pred) const {
  TupleIdSequence *matches = new TupleIdSequence();

  tuple_id max_tid = getMaxTupleID();

  if (pred == NULL) {
    if (isPacked()) {
      for (tuple_id tid = 0; tid <= max_tid; ++tid) {
        matches->append(tid);
      }
    } else {
      for (tuple_id tid = 0; tid <= max_tid; ++tid) {
        if (hasTupleWithID(tid)) {
          matches->append(tid);
        }
      }
    }
  } else {
    if (isPacked()) {
      for (tuple_id tid = 0; tid <= max_tid; ++tid) {
        if (pred->matchesForSingleTuple(*this, tid)) {
          matches->append(tid);
        }
      }
    } else {
      for (tuple_id tid = 0; tid <= max_tid; ++tid) {
        if (hasTupleWithID(tid) && (pred->matchesForSingleTuple(*this, tid))) {
          matches->append(tid);
        }
      }
    }
  }

  return matches;
}
TupleIdSequence* CompressedColumnStoreTupleStorageSubBlock::getGreaterOrEqualCodes(
    const attribute_id attr_id,
    const std::uint32_t code,
    const TupleIdSequence *filter) const {
  if (attr_id == sort_column_id_) {
    // Special (fast) case: do a binary search of the sort column.
    TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_));
    pair<tuple_id, tuple_id> tuple_range
        = getCompressedSortColumnRange(pair<uint32_t, uint32_t>(code, numeric_limits<uint32_t>::max()));
    matches->setRange(tuple_range.first, tuple_range.second - tuple_range.first, true);
    if (filter != nullptr) {
      matches->intersectWith(*filter);
    }
    return matches;
  } else {
    return getCodesSatisfyingComparison<less_equal>(attr_id, code, filter);
  }
}
TupleIdSequence* CompressedPackedRowStoreTupleStorageSubBlock::getCodesInRange(
    const attribute_id attr_id,
    const std::pair<std::uint32_t, std::uint32_t> range) const {
  TupleIdSequence *matches = new TupleIdSequence();
  const char *attr_location = static_cast<const char*>(tuple_storage_)
                              + attribute_offsets_[attr_id];
  switch (compression_info_.attribute_size(attr_id)) {
    case 1:
      for (tuple_id tid = 0;
           tid < *static_cast<const tuple_id*>(sub_block_memory_);
           ++tid, attr_location += tuple_length_bytes_) {
        if (range.first <= (*reinterpret_cast<const uint8_t*>(attr_location))
            && (*reinterpret_cast<const uint8_t*>(attr_location) < range.second)) {
          matches->append(tid);
        }
      }
      break;
    case 2:
      for (tuple_id tid = 0;
           tid < *static_cast<const tuple_id*>(sub_block_memory_);
           ++tid, attr_location += tuple_length_bytes_) {
        if (range.first <= (*reinterpret_cast<const uint16_t*>(attr_location))
            && (*reinterpret_cast<const uint16_t*>(attr_location) < range.second)) {
          matches->append(tid);
        }
      }
      break;
    case 4:
      for (tuple_id tid = 0;
           tid < *static_cast<const tuple_id*>(sub_block_memory_);
           ++tid, attr_location += tuple_length_bytes_) {
        if (range.first <= (*reinterpret_cast<const uint32_t*>(attr_location))
            && (*reinterpret_cast<const uint32_t*>(attr_location) < range.second)) {
          matches->append(tid);
        }
      }
      break;
    default:
      FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed "
                  "attribute ID " << attr_id
                  << " in CompressedPackedRowStoreTupleStorageSubBlock::getCodesInRange()");
  }
  return matches;
}
TupleIdSequence* CompressedColumnStoreTupleStorageSubBlock::getCodesSatisfyingComparison(
    const attribute_id attr_id,
    const std::uint32_t code,
    const TupleIdSequence *filter) const {
#ifdef QUICKSTEP_ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT
  static constexpr bool short_circuit = true;
#else
  static constexpr bool short_circuit = false;
#endif
  comparison_functor<uint32_t> comp;
  TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_));
  const void *attr_stripe = column_stripes_[attr_id];
  if (!short_circuit || (filter == nullptr)) {
    switch (compression_info_.attribute_size(attr_id)) {
      case 1:
        for (tuple_id tid = 0;
             tid < *static_cast<const tuple_id*>(sub_block_memory_);
             ++tid) {
          if (comp(code, static_cast<const uint8_t*>(attr_stripe)[tid])) {
            matches->set(tid);
          }
        }
        break;
      case 2:
        for (tuple_id tid = 0;
             tid < *static_cast<const tuple_id*>(sub_block_memory_);
             ++tid) {
          if (comp(code, static_cast<const uint16_t*>(attr_stripe)[tid])) {
            matches->set(tid);
          }
        }
        break;
      case 4:
        for (tuple_id tid = 0;
             tid < *static_cast<const tuple_id*>(sub_block_memory_);
             ++tid) {
          if (comp(code, static_cast<const uint32_t*>(attr_stripe)[tid])) {
            matches->set(tid);
          }
        }
        break;
      default:
        FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed "
                    "attribute ID " << attr_id
                    << " in CompressedColumnStoreTupleStorageSubBlock::getCodesSatisfyingComparison()");
    }
    if (filter != nullptr) {
      matches->intersectWith(*filter);
    }
  } else {
    switch (compression_info_.attribute_size(attr_id)) {
      case 1:
        for (TupleIdSequence::const_iterator filter_it = filter->begin();
             filter_it != filter->end();
             ++filter_it) {
          if (comp(code, static_cast<const uint8_t*>(attr_stripe)[*filter_it])) {
            matches->set(*filter_it);
          }
        }
        break;
      case 2:
        for (TupleIdSequence::const_iterator filter_it = filter->begin();
             filter_it != filter->end();
             ++filter_it) {
          if (comp(code, static_cast<const uint16_t*>(attr_stripe)[*filter_it])) {
            matches->set(*filter_it);
          }
        }
        break;
      case 4:
        for (TupleIdSequence::const_iterator filter_it = filter->begin();
             filter_it != filter->end();
             ++filter_it) {
          if (comp(code, static_cast<const uint32_t*>(attr_stripe)[*filter_it])) {
            matches->set(*filter_it);
          }
        }
        break;
      default:
        FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed "
                    "attribute ID " << attr_id
                    << " in CompressedColumnStoreTupleStorageSubBlock::getCodesSatisfyingComparison()");
    }
  }

  return matches;
}
TupleIdSequence* CompressedColumnStoreTupleStorageSubBlock::getCodesInRange(
    const attribute_id attr_id,
    const std::pair<std::uint32_t, std::uint32_t> range,
    const TupleIdSequence *filter) const {
#ifdef QUICKSTEP_ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT
  static constexpr bool short_circuit = true;
#else
  static constexpr bool short_circuit = false;
#endif
  TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_));
  if (attr_id == sort_column_id_) {
    // Special (fast) case: do a binary search of the sort column.
    pair<tuple_id, tuple_id> tuple_range = getCompressedSortColumnRange(range);
    matches->setRange(tuple_range.first, tuple_range.second - tuple_range.first, true);
    if (filter != nullptr) {
      matches->intersectWith(*filter);
    }
  } else {
    const void *attr_stripe = column_stripes_[attr_id];
    if (!short_circuit || (filter == nullptr)) {
      switch (compression_info_.attribute_size(attr_id)) {
        case 1:
          for (tuple_id tid = 0;
               tid < *static_cast<const tuple_id*>(sub_block_memory_);
               ++tid) {
            if (range.first <= (static_cast<const uint8_t*>(attr_stripe)[tid])
                && (static_cast<const uint8_t*>(attr_stripe)[tid] < range.second)) {
              matches->set(tid);
            }
          }
          break;
        case 2:
          for (tuple_id tid = 0;
               tid < *static_cast<const tuple_id*>(sub_block_memory_);
               ++tid) {
            if (range.first <= (static_cast<const uint16_t*>(attr_stripe)[tid])
                && (static_cast<const uint16_t*>(attr_stripe)[tid] < range.second)) {
              matches->set(tid);
            }
          }
          break;
        case 4:
          for (tuple_id tid = 0;
               tid < *static_cast<const tuple_id*>(sub_block_memory_);
               ++tid) {
            if (range.first <= (static_cast<const uint32_t*>(attr_stripe)[tid])
                && (static_cast<const uint32_t*>(attr_stripe)[tid] < range.second)) {
              matches->set(tid);
            }
          }
          break;
        default:
          FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed "
                      "attribute ID " << attr_id
                      << " in CompressedColumnStoreTupleStorageSubBlock::getCodesInRange()");
      }
      if (filter != nullptr) {
        matches->intersectWith(*filter);
      }
    } else {
      switch (compression_info_.attribute_size(attr_id)) {
        case 1:
          for (TupleIdSequence::const_iterator filter_it = filter->begin();
               filter_it != filter->end();
               ++filter_it) {
            if (range.first <= (static_cast<const uint8_t*>(attr_stripe)[*filter_it])
                && (static_cast<const uint8_t*>(attr_stripe)[*filter_it] < range.second)) {
              matches->set(*filter_it);
            }
          }
          break;
        case 2:
          for (TupleIdSequence::const_iterator filter_it = filter->begin();
               filter_it != filter->end();
               ++filter_it) {
            if (range.first <= (static_cast<const uint16_t*>(attr_stripe)[*filter_it])
                && (static_cast<const uint16_t*>(attr_stripe)[*filter_it] < range.second)) {
              matches->set(*filter_it);
            }
          }
          break;
        case 4:
          for (TupleIdSequence::const_iterator filter_it = filter->begin();
               filter_it != filter->end();
               ++filter_it) {
            if (range.first <= (static_cast<const uint32_t*>(attr_stripe)[*filter_it])
                && (static_cast<const uint32_t*>(attr_stripe)[*filter_it] < range.second)) {
              matches->set(*filter_it);
            }
          }
          break;
        default:
          FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed "
                      "attribute ID " << attr_id
                      << " in CompressedColumnStoreTupleStorageSubBlock::getCodesInRange()");
      }
    }
  }

  return matches;
}
TupleIdSequence* CompressedColumnStoreTupleStorageSubBlock::getNotEqualCodesExcludingNull(
    const attribute_id attr_id,
    const std::uint32_t code,
    const std::uint32_t null_code,
    const TupleIdSequence *filter) const {
#ifdef QUICKSTEP_ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT
  static constexpr bool short_circuit = true;
#else
  static constexpr bool short_circuit = false;
#endif
  if (attr_id == sort_column_id_) {
    // Special (fast) case: do a binary search of the sort column.
    pair<uint32_t, uint32_t> code_range(code, code + 1);

    // Adjust the upper limit if doing so can avoid an extra binary search.
    if (dictionary_coded_attributes_[attr_id]) {
      if (code_range.second == compressedGetDictionary(attr_id).numberOfCodes()) {
        code_range.second = numeric_limits<uint32_t>::max();
      }
    } else if (code_range.first == GetMaxTruncatedValue(compression_info_.attribute_size(attr_id))) {
      code_range.second = numeric_limits<uint32_t>::max();
    }

    pair<tuple_id, tuple_id> tuple_range = getCompressedSortColumnRange(code_range);

    // Search for the beginning of nulls in the sort column.
    pair<uint32_t, uint32_t> null_range(null_code, numeric_limits<uint32_t>::max());
    pair<tuple_id, tuple_id> limit_range = getCompressedSortColumnRange(null_range);

    // We searched for the range of equal codes, so return its complement.
    TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_));
    matches->setRange(0, tuple_range.first, true);
    matches->setRange(tuple_range.second, limit_range.first - tuple_range.second, true);
    if (filter != nullptr) {
      matches->intersectWith(*filter);
    }
    return matches;
  } else {
    TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_));
    const void *attr_stripe = column_stripes_[attr_id];
    if (!short_circuit || (filter == nullptr)) {
      switch (compression_info_.attribute_size(attr_id)) {
        case 1:
          for (tuple_id tid = 0;
               tid < *static_cast<const tuple_id*>(sub_block_memory_);
               ++tid) {
            if ((code != static_cast<const uint8_t*>(attr_stripe)[tid])
                && (null_code != static_cast<const uint8_t*>(attr_stripe)[tid])) {
              matches->set(tid);
            }
          }
          break;
        case 2:
          for (tuple_id tid = 0;
               tid < *static_cast<const tuple_id*>(sub_block_memory_);
               ++tid) {
            if ((code != static_cast<const uint16_t*>(attr_stripe)[tid])
                && (null_code != static_cast<const uint16_t*>(attr_stripe)[tid])) {
              matches->set(tid);
            }
          }
          break;
        case 4:
          for (tuple_id tid = 0;
               tid <= *static_cast<const tuple_id*>(sub_block_memory_);
               ++tid) {
            if ((code != static_cast<const uint16_t*>(attr_stripe)[tid])
                && (null_code != static_cast<const uint16_t*>(attr_stripe)[tid])) {
              matches->set(tid);
            }
          }
          break;
        default:
          FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed "
                      "attribute ID " << attr_id
                      << " in CompressedColumnStoreTupleStorageSubBlock::getNotEqualCodesExcludingNull()");
      }
      if (filter != nullptr) {
        matches->intersectWith(*filter);
      }
    } else {
      switch (compression_info_.attribute_size(attr_id)) {
        case 1:
          for (TupleIdSequence::const_iterator filter_it = filter->begin();
               filter_it != filter->end();
               ++filter_it) {
            if ((code != static_cast<const uint8_t*>(attr_stripe)[*filter_it])
                && (null_code != static_cast<const uint8_t*>(attr_stripe)[*filter_it])) {
              matches->set(*filter_it);
            }
          }
          break;
        case 2:
          for (TupleIdSequence::const_iterator filter_it = filter->begin();
               filter_it != filter->end();
               ++filter_it) {
            if ((code != static_cast<const uint16_t*>(attr_stripe)[*filter_it])
                && (null_code != static_cast<const uint16_t*>(attr_stripe)[*filter_it])) {
              matches->set(*filter_it);
            }
          }
          break;
        case 4:
          for (TupleIdSequence::const_iterator filter_it = filter->begin();
               filter_it != filter->end();
               ++filter_it) {
            if ((code != static_cast<const uint16_t*>(attr_stripe)[*filter_it])
                && (null_code != static_cast<const uint16_t*>(attr_stripe)[*filter_it])) {
              matches->set(*filter_it);
            }
          }
          break;
        default:
          FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed "
                      "attribute ID " << attr_id
                      << " in CompressedColumnStoreTupleStorageSubBlock::getNotEqualCodesExcludingNull()");
      }
    }
    return matches;
  }
}
 /**
  * @brief Take the intersection of this TupleIdSequence with another's
  *        complement (i.e. set difference), modifying this TupleIdSequence
  *        in-place.
  * @warning This TupleIdSequence must be the same length as the other, and
  *          the set-difference only has semantic meaning if both
  *          TupleIdSequences refer to tuples in the same block.
  *
  * @param other Another TupleIdSequence to intersect with the complement of.
  **/
 inline void intersectWithComplement(const TupleIdSequence &other) {
   DEBUG_ASSERT(length() == other.length());
   internal_bitvector_.unsetFrom(other.internal_bitvector_);
 }
TupleIdSequence* CompressedPackedRowStoreTupleStorageSubBlock::getCodesInRange(
    const attribute_id attr_id,
    const std::pair<std::uint32_t, std::uint32_t> range,
    const TupleIdSequence *filter) const {
#ifdef QUICKSTEP_ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT
  static constexpr bool short_circuit = true;
#else
  static constexpr bool short_circuit = false;
#endif
  TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_));
  const char *attr_location = static_cast<const char*>(tuple_storage_)
                              + attribute_offsets_[attr_id];
  if (!short_circuit || (filter == nullptr)) {
    switch (compression_info_.attribute_size(attr_id)) {
      case 1:
        for (tuple_id tid = 0;
             tid < *static_cast<const tuple_id*>(sub_block_memory_);
             ++tid, attr_location += tuple_length_bytes_) {
          if (range.first <= (*reinterpret_cast<const uint8_t*>(attr_location))
              && (*reinterpret_cast<const uint8_t*>(attr_location) < range.second)) {
            matches->set(tid);
          }
        }
        break;
      case 2:
        for (tuple_id tid = 0;
             tid < *static_cast<const tuple_id*>(sub_block_memory_);
             ++tid, attr_location += tuple_length_bytes_) {
          if (range.first <= (*reinterpret_cast<const uint16_t*>(attr_location))
              && (*reinterpret_cast<const uint16_t*>(attr_location) < range.second)) {
            matches->set(tid);
          }
        }
        break;
      case 4:
        for (tuple_id tid = 0;
             tid < *static_cast<const tuple_id*>(sub_block_memory_);
             ++tid, attr_location += tuple_length_bytes_) {
          if (range.first <= (*reinterpret_cast<const uint32_t*>(attr_location))
              && (*reinterpret_cast<const uint32_t*>(attr_location) < range.second)) {
            matches->set(tid);
          }
        }
        break;
      default:
        FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed "
                    "attribute ID " << attr_id
                    << " in CompressedPackedRowStoreTupleStorageSubBlock::getCodesInRange()");
    }
    if (filter != nullptr) {
      matches->intersectWith(*filter);
    }
  } else {
    switch (compression_info_.attribute_size(attr_id)) {
      case 1:
        for (TupleIdSequence::const_iterator filter_it = filter->begin();
             filter_it != filter->end();
             ++filter_it) {
          const void *local_attr_location = attr_location + (*filter_it) * tuple_length_bytes_;
          if (range.first <= (*reinterpret_cast<const uint8_t*>(local_attr_location))
              && (*reinterpret_cast<const uint8_t*>(local_attr_location) < range.second)) {
            matches->set(*filter_it);
          }
        }
        break;
      case 2:
        for (TupleIdSequence::const_iterator filter_it = filter->begin();
             filter_it != filter->end();
             ++filter_it) {
          const void *local_attr_location = attr_location + (*filter_it) * tuple_length_bytes_;
          if (range.first <= (*reinterpret_cast<const uint16_t*>(local_attr_location))
              && (*reinterpret_cast<const uint16_t*>(local_attr_location) < range.second)) {
            matches->set(*filter_it);
          }
        }
        break;
      case 4:
        for (TupleIdSequence::const_iterator filter_it = filter->begin();
             filter_it != filter->end();
             ++filter_it) {
          const void *local_attr_location = attr_location + (*filter_it) * tuple_length_bytes_;
          if (range.first <= (*reinterpret_cast<const uint32_t*>(local_attr_location))
              && (*reinterpret_cast<const uint32_t*>(local_attr_location) < range.second)) {
            matches->set(*filter_it);
          }
        }
        break;
      default:
        FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed "
                    "attribute ID " << attr_id
                    << " in CompressedPackedRowStoreTupleStorageSubBlock::getCodesInRange()");
    }
  }
  return matches;
}
Esempio n. 11
0
TupleIdSequence* PatternMatchingUncheckedComparator<is_like_pattern, is_negation,
                                                    left_nullable, right_nullable>
    ::compareColumnVectorAndStaticValue(
        const ColumnVector &column_vector,
        const TypedValue &static_value,
        const TupleIdSequence *filter,
        const TupleIdSequence *existence_bitmap) const {
  // Specialized implementation for matching a ColumnVector of strings to a
  // single pattern. In this situation, the pattern will be compiled only once
  // in advance and then matched by each of the string in the ColumnVector.
  return InvokeOnColumnVector(
      column_vector,
      [&](const auto &column_vector) -> TupleIdSequence* {  // NOLINT(build/c++11)
#ifdef QUICKSTEP_ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT
    static constexpr bool short_circuit = true;
#else
    static constexpr bool short_circuit = false;
#endif
    DCHECK((existence_bitmap == nullptr)
           || (existence_bitmap->numTuples() == column_vector.size()));
    DCHECK((filter == nullptr)
           || ((existence_bitmap == nullptr) ? (filter->length() == column_vector.size())
                                             : (filter->length() == existence_bitmap->length())));
    TupleIdSequence *result = new TupleIdSequence(
        (existence_bitmap == nullptr) ? column_vector.size()
                                      : existence_bitmap->length());
    if (right_nullable && static_value.isNull()) {
      return result;
    }

    // Transform and compile pattern in advance before the loop.
    const char *pattern = static_cast<const char *>(static_value.getDataPtr());
    std::string regex_pattern;
    re2::StringPiece pattern_piece;
    std::size_t orig_pattern_len = strnlen(pattern, right_length_);
    if (is_like_pattern) {
      regex_pattern = this->transformLikeToRegex(pattern, orig_pattern_len);
      pattern_piece.set(regex_pattern.c_str(), regex_pattern.size());
    } else {
      pattern_piece.set(pattern, orig_pattern_len);
    }
    const re2::RE2 re2_pattern(pattern_piece);

    if (short_circuit && (filter != nullptr)) {
      if (existence_bitmap != nullptr) {
        TupleIdSequence::const_iterator existence_it = existence_bitmap->begin();
        for (std::size_t cv_pos = 0;
             cv_pos < column_vector.size();
             ++cv_pos) {
          if (filter->get(*existence_it)) {
            const void *cv_value
                = column_vector.template getUntypedValue<left_nullable>(cv_pos);
            result->set(*existence_it,
                        !(left_nullable && (cv_value == nullptr))
                            && this->matchDataPtrWithPattern(cv_value, re2_pattern));
          }
          ++existence_it;
        }
      } else {
        for (TupleIdSequence::const_iterator filter_it = filter->begin();
             filter_it != filter->end();
             ++filter_it) {
          const void *cv_value
              = column_vector.template getUntypedValue<left_nullable>(*filter_it);
          result->set(*filter_it,
                      !(left_nullable && (cv_value == nullptr))
                          && this->matchDataPtrWithPattern(cv_value, re2_pattern));
        }
      }
    } else {
      if (existence_bitmap != nullptr) {
        TupleIdSequence::const_iterator existence_it = existence_bitmap->begin();
        for (std::size_t cv_pos = 0;
             cv_pos < column_vector.size();
             ++cv_pos) {
          const void *cv_value
              = column_vector.template getUntypedValue<left_nullable>(cv_pos);
          result->set(*existence_it,
                      !(left_nullable && (cv_value == nullptr))
                          && this->matchDataPtrWithPattern(cv_value, re2_pattern));
          ++existence_it;
        }
      } else {
        for (std::size_t pos = 0;
             pos < column_vector.size();
             ++pos) {
          const void *cv_value
              = column_vector.template getUntypedValue<left_nullable>(pos);
          result->set(pos,
                      !(left_nullable && (cv_value == nullptr))
                          && this->matchDataPtrWithPattern(cv_value, re2_pattern));
        }
      }
      if (!short_circuit && (filter != nullptr)) {
        result->intersectWith(*filter);
      }
    }

    return result;
  });
}
Esempio n. 12
0
TupleIdSequence* PatternMatchingUncheckedComparator<is_like_pattern, is_negation,
                                                    left_nullable, right_nullable>
    ::compareValueAccessorAndStaticValue(
        ValueAccessor *accessor,
        const attribute_id value_accessor_attr_id,
        const TypedValue &static_value,
        const TupleIdSequence *filter) const {
  // Specialized implementation for matching a ValueAccessor of strings to a
  // single pattern. In this situation, the pattern will be compiled only once
  // in advance and then matched by each of the string in the ValueAccessor.
  return InvokeOnValueAccessorMaybeTupleIdSequenceAdapter(
      accessor,
      [&](auto *accessor) -> TupleIdSequence* {  // NOLINT(build/c++11)
#ifdef QUICKSTEP_ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT
    static constexpr bool short_circuit = true;
#else
    static constexpr bool short_circuit = false;
#endif
    TupleIdSequence *result = new TupleIdSequence(accessor->getEndPosition());
    if (right_nullable && static_value.isNull()) {
      return result;
    }

    // Transform and compile pattern in advance before the loop.
    const char *pattern = static_cast<const char *>(static_value.getDataPtr());
    std::string regex_pattern;
    re2::StringPiece pattern_piece;
    std::size_t orig_pattern_len = strnlen(pattern, right_length_);
    if (is_like_pattern) {
      regex_pattern = this->transformLikeToRegex(pattern, orig_pattern_len);
      pattern_piece.set(regex_pattern.c_str(), regex_pattern.size());
    } else {
      pattern_piece.set(pattern, orig_pattern_len);
    }
    const re2::RE2 re2_pattern(pattern_piece);

    if (short_circuit && (filter != nullptr)) {
      DCHECK_EQ(filter->length(), result->length());
      for (TupleIdSequence::const_iterator filter_it = filter->begin();
           filter_it != filter->end();
           ++filter_it) {
        const void *va_value
            = accessor->template getUntypedValueAtAbsolutePosition<left_nullable>(value_accessor_attr_id,
                                                                                  *filter_it);
        result->set(*filter_it,
                    this->matchDataPtrWithPattern(va_value, re2_pattern));
      }
    } else {
      accessor->beginIteration();
      if (accessor->isColumnAccessorSupported()) {
        // If ColumnAccessor is supported on the underlying accessor, we have a fast strided
        // column accessor available for the iteration on the underlying block.
        std::unique_ptr<const ColumnAccessor<left_nullable>>
            column_accessor
            (accessor->template getColumnAccessor<left_nullable>(value_accessor_attr_id));
        DCHECK(column_accessor != nullptr);
        while (accessor->next()) {
          const void *va_value = column_accessor->getUntypedValue();
          result->set(accessor->getCurrentPosition(),
                      this->matchDataPtrWithPattern(va_value, re2_pattern));
        }
      } else {
        while (accessor->next()) {
          const void *va_value
              = accessor->template getUntypedValue<left_nullable>(value_accessor_attr_id);
          result->set(accessor->getCurrentPosition(),
                      this->matchDataPtrWithPattern(va_value, re2_pattern));
        }
      }
      if (!short_circuit && (filter != nullptr)) {
        result->intersectWith(*filter);
      }
    }

    return result;
  });
}
TupleIdSequence* SortColumnPredicateEvaluator::EvaluatePredicateForUncompressedSortColumn(
    const Predicate &predicate,
    const CatalogRelation &relation,
    const attribute_id sort_attribute_id,
    void *sort_attribute_stripe,
    const tuple_id num_tuples) {
  // Determine if the predicate is a comparison of the sort column with a literal.
  if (predicate.isAttributeLiteralComparisonPredicate()) {
    const ComparisonPredicate &comparison_predicate = static_cast<const ComparisonPredicate&>(predicate);

    const CatalogAttribute *comparison_attribute = NULL;
    bool left_literal = false;
    if (comparison_predicate.getLeftOperand().hasStaticValue()) {
      DEBUG_ASSERT(comparison_predicate.getRightOperand().getDataSource() == Scalar::kAttribute);
      comparison_attribute
          = &(static_cast<const ScalarAttribute&>(comparison_predicate.getRightOperand()).getAttribute());
      left_literal = true;
    } else {
      DEBUG_ASSERT(comparison_predicate.getLeftOperand().getDataSource() == Scalar::kAttribute);
      comparison_attribute
          = &(static_cast<const ScalarAttribute&>(comparison_predicate.getLeftOperand()).getAttribute());
      left_literal = false;
    }

    DEBUG_ASSERT(comparison_attribute->getParent().getID() == relation.getID());
    if (comparison_attribute->getID() == sort_attribute_id) {
      const LiteralTypeInstance* comparison_literal;
      if (left_literal) {
        comparison_literal = &(comparison_predicate.getLeftOperand().getStaticValue());
      } else {
        comparison_literal = &(comparison_predicate.getRightOperand().getStaticValue());
      }

      // NOTE(chasseur): A standards-compliant implementation of lower_bound
      // always compares the iterator on the left with the literal on the right,
      // while upper_bound compares the literal on the left with the iterator
      // on the right. These will work even if comparison_attribute and
      // comparison_literal are different types.
      const Comparison &less_comparison = Comparison::GetComparison(Comparison::kLess);
      ScopedPtr<UncheckedComparator> fast_comparator_lower(
          less_comparison.makeUncheckedComparatorForTypes(comparison_attribute->getType(),
                                                          comparison_literal->getType()));
      STLUncheckedComparatorWrapper comp_lower(*fast_comparator_lower);
      ScopedPtr<UncheckedComparator> fast_comparator_upper(
          less_comparison.makeUncheckedComparatorForTypes(comparison_literal->getType(),
                                                          comparison_attribute->getType()));
      STLUncheckedComparatorWrapper comp_upper(*fast_comparator_upper);

      // Find the bounds on the range of matching tuples.
      tuple_id min_match = 0;
      tuple_id max_match_bound = num_tuples;
      ColumnStripeIterator begin_it(sort_attribute_stripe,
                                    relation.getAttributeById(sort_attribute_id).getType().maximumByteLength(),
                                    0);
      ColumnStripeIterator end_it(sort_attribute_stripe,
                                  relation.getAttributeById(sort_attribute_id).getType().maximumByteLength(),
                                  num_tuples);

      switch (comparison_predicate.getComparison().getComparisonID()) {
        case Comparison::kEqual:
        // Note: There is a special branch below for kNotEqual which takes the
        // complement of the matched range.
        case Comparison::kNotEqual:
          min_match = lower_bound(begin_it,
                                  end_it,
                                  comparison_literal->getDataPtr(),
                                  comp_lower).getTuplePosition();
          max_match_bound = upper_bound(begin_it,
                                        end_it,
                                        comparison_literal->getDataPtr(),
                                        comp_upper).getTuplePosition();
          break;
        case Comparison::kLess:
          if (left_literal) {
            min_match = upper_bound(begin_it,
                                    end_it,
                                    comparison_literal->getDataPtr(),
                                    comp_upper).getTuplePosition();
          } else {
            max_match_bound = lower_bound(begin_it,
                                          end_it,
                                          comparison_literal->getDataPtr(),
                                          comp_lower).getTuplePosition();
          }
          break;
        case Comparison::kLessOrEqual:
          if (left_literal) {
            min_match = lower_bound(begin_it,
                                    end_it,
                                    comparison_literal->getDataPtr(),
                                    comp_lower).getTuplePosition();
          } else {
            max_match_bound = upper_bound(begin_it,
                                          end_it,
                                          comparison_literal->getDataPtr(),
                                          comp_upper).getTuplePosition();
          }
          break;
        case Comparison::kGreater:
          if (left_literal) {
            max_match_bound = lower_bound(begin_it,
                                          end_it,
                                          comparison_literal->getDataPtr(),
                                          comp_lower).getTuplePosition();
          } else {
            min_match = upper_bound(begin_it,
                                    end_it,
                                    comparison_literal->getDataPtr(),
                                    comp_upper).getTuplePosition();
          }
          break;
        case Comparison::kGreaterOrEqual:
          if (left_literal) {
            max_match_bound = upper_bound(begin_it,
                                          end_it,
                                          comparison_literal->getDataPtr(),
                                          comp_upper).getTuplePosition();
          } else {
            min_match = lower_bound(begin_it,
                                    end_it,
                                    comparison_literal->getDataPtr(),
                                    comp_lower).getTuplePosition();
          }
          break;
        default:
          FATAL_ERROR("Unknown Comparison in SortColumnPredicateEvaluator::"
                      "EvaluatePredicateForUncompressedSortColumn()");
      }

      // Create and return the sequence of matches.
      TupleIdSequence *matches = new TupleIdSequence();
      if (comparison_predicate.getComparison().getComparisonID() == Comparison::kNotEqual) {
        // Special case: return all tuples NOT in the range for kEqual.
        for (tuple_id tid = 0; tid < min_match; ++tid) {
          matches->append(tid);
        }
        for (tuple_id tid = max_match_bound; tid < num_tuples; ++tid) {
          matches->append(tid);
        }
      } else {
        for (tuple_id tid = min_match; tid < max_match_bound; ++tid) {
          matches->append(tid);
        }
      }

      return matches;
    } else {
      return NULL;
    }
  } else {
    // Can not evaluate a non-comparison predicate, so pass through.
    return NULL;
  }
}