TupleIdSequence* CompressedColumnStoreTupleStorageSubBlock::getCodesSatisfyingComparison(
    const attribute_id attr_id,
    const std::uint32_t code,
    const TupleIdSequence *filter) const {
#ifdef QUICKSTEP_ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT
  static constexpr bool short_circuit = true;
#else
  static constexpr bool short_circuit = false;
#endif
  comparison_functor<uint32_t> comp;
  TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_));
  const void *attr_stripe = column_stripes_[attr_id];
  if (!short_circuit || (filter == nullptr)) {
    switch (compression_info_.attribute_size(attr_id)) {
      case 1:
        for (tuple_id tid = 0;
             tid < *static_cast<const tuple_id*>(sub_block_memory_);
             ++tid) {
          if (comp(code, static_cast<const uint8_t*>(attr_stripe)[tid])) {
            matches->set(tid);
          }
        }
        break;
      case 2:
        for (tuple_id tid = 0;
             tid < *static_cast<const tuple_id*>(sub_block_memory_);
             ++tid) {
          if (comp(code, static_cast<const uint16_t*>(attr_stripe)[tid])) {
            matches->set(tid);
          }
        }
        break;
      case 4:
        for (tuple_id tid = 0;
             tid < *static_cast<const tuple_id*>(sub_block_memory_);
             ++tid) {
          if (comp(code, static_cast<const uint32_t*>(attr_stripe)[tid])) {
            matches->set(tid);
          }
        }
        break;
      default:
        FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed "
                    "attribute ID " << attr_id
                    << " in CompressedColumnStoreTupleStorageSubBlock::getCodesSatisfyingComparison()");
    }
    if (filter != nullptr) {
      matches->intersectWith(*filter);
    }
  } else {
    switch (compression_info_.attribute_size(attr_id)) {
      case 1:
        for (TupleIdSequence::const_iterator filter_it = filter->begin();
             filter_it != filter->end();
             ++filter_it) {
          if (comp(code, static_cast<const uint8_t*>(attr_stripe)[*filter_it])) {
            matches->set(*filter_it);
          }
        }
        break;
      case 2:
        for (TupleIdSequence::const_iterator filter_it = filter->begin();
             filter_it != filter->end();
             ++filter_it) {
          if (comp(code, static_cast<const uint16_t*>(attr_stripe)[*filter_it])) {
            matches->set(*filter_it);
          }
        }
        break;
      case 4:
        for (TupleIdSequence::const_iterator filter_it = filter->begin();
             filter_it != filter->end();
             ++filter_it) {
          if (comp(code, static_cast<const uint32_t*>(attr_stripe)[*filter_it])) {
            matches->set(*filter_it);
          }
        }
        break;
      default:
        FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed "
                    "attribute ID " << attr_id
                    << " in CompressedColumnStoreTupleStorageSubBlock::getCodesSatisfyingComparison()");
    }
  }

  return matches;
}
TupleIdSequence* CompressedColumnStoreTupleStorageSubBlock::getCodesInRange(
    const attribute_id attr_id,
    const std::pair<std::uint32_t, std::uint32_t> range,
    const TupleIdSequence *filter) const {
#ifdef QUICKSTEP_ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT
  static constexpr bool short_circuit = true;
#else
  static constexpr bool short_circuit = false;
#endif
  TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_));
  if (attr_id == sort_column_id_) {
    // Special (fast) case: do a binary search of the sort column.
    pair<tuple_id, tuple_id> tuple_range = getCompressedSortColumnRange(range);
    matches->setRange(tuple_range.first, tuple_range.second - tuple_range.first, true);
    if (filter != nullptr) {
      matches->intersectWith(*filter);
    }
  } else {
    const void *attr_stripe = column_stripes_[attr_id];
    if (!short_circuit || (filter == nullptr)) {
      switch (compression_info_.attribute_size(attr_id)) {
        case 1:
          for (tuple_id tid = 0;
               tid < *static_cast<const tuple_id*>(sub_block_memory_);
               ++tid) {
            if (range.first <= (static_cast<const uint8_t*>(attr_stripe)[tid])
                && (static_cast<const uint8_t*>(attr_stripe)[tid] < range.second)) {
              matches->set(tid);
            }
          }
          break;
        case 2:
          for (tuple_id tid = 0;
               tid < *static_cast<const tuple_id*>(sub_block_memory_);
               ++tid) {
            if (range.first <= (static_cast<const uint16_t*>(attr_stripe)[tid])
                && (static_cast<const uint16_t*>(attr_stripe)[tid] < range.second)) {
              matches->set(tid);
            }
          }
          break;
        case 4:
          for (tuple_id tid = 0;
               tid < *static_cast<const tuple_id*>(sub_block_memory_);
               ++tid) {
            if (range.first <= (static_cast<const uint32_t*>(attr_stripe)[tid])
                && (static_cast<const uint32_t*>(attr_stripe)[tid] < range.second)) {
              matches->set(tid);
            }
          }
          break;
        default:
          FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed "
                      "attribute ID " << attr_id
                      << " in CompressedColumnStoreTupleStorageSubBlock::getCodesInRange()");
      }
      if (filter != nullptr) {
        matches->intersectWith(*filter);
      }
    } else {
      switch (compression_info_.attribute_size(attr_id)) {
        case 1:
          for (TupleIdSequence::const_iterator filter_it = filter->begin();
               filter_it != filter->end();
               ++filter_it) {
            if (range.first <= (static_cast<const uint8_t*>(attr_stripe)[*filter_it])
                && (static_cast<const uint8_t*>(attr_stripe)[*filter_it] < range.second)) {
              matches->set(*filter_it);
            }
          }
          break;
        case 2:
          for (TupleIdSequence::const_iterator filter_it = filter->begin();
               filter_it != filter->end();
               ++filter_it) {
            if (range.first <= (static_cast<const uint16_t*>(attr_stripe)[*filter_it])
                && (static_cast<const uint16_t*>(attr_stripe)[*filter_it] < range.second)) {
              matches->set(*filter_it);
            }
          }
          break;
        case 4:
          for (TupleIdSequence::const_iterator filter_it = filter->begin();
               filter_it != filter->end();
               ++filter_it) {
            if (range.first <= (static_cast<const uint32_t*>(attr_stripe)[*filter_it])
                && (static_cast<const uint32_t*>(attr_stripe)[*filter_it] < range.second)) {
              matches->set(*filter_it);
            }
          }
          break;
        default:
          FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed "
                      "attribute ID " << attr_id
                      << " in CompressedColumnStoreTupleStorageSubBlock::getCodesInRange()");
      }
    }
  }

  return matches;
}
TupleIdSequence* CompressedColumnStoreTupleStorageSubBlock::getNotEqualCodesExcludingNull(
    const attribute_id attr_id,
    const std::uint32_t code,
    const std::uint32_t null_code,
    const TupleIdSequence *filter) const {
#ifdef QUICKSTEP_ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT
  static constexpr bool short_circuit = true;
#else
  static constexpr bool short_circuit = false;
#endif
  if (attr_id == sort_column_id_) {
    // Special (fast) case: do a binary search of the sort column.
    pair<uint32_t, uint32_t> code_range(code, code + 1);

    // Adjust the upper limit if doing so can avoid an extra binary search.
    if (dictionary_coded_attributes_[attr_id]) {
      if (code_range.second == compressedGetDictionary(attr_id).numberOfCodes()) {
        code_range.second = numeric_limits<uint32_t>::max();
      }
    } else if (code_range.first == GetMaxTruncatedValue(compression_info_.attribute_size(attr_id))) {
      code_range.second = numeric_limits<uint32_t>::max();
    }

    pair<tuple_id, tuple_id> tuple_range = getCompressedSortColumnRange(code_range);

    // Search for the beginning of nulls in the sort column.
    pair<uint32_t, uint32_t> null_range(null_code, numeric_limits<uint32_t>::max());
    pair<tuple_id, tuple_id> limit_range = getCompressedSortColumnRange(null_range);

    // We searched for the range of equal codes, so return its complement.
    TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_));
    matches->setRange(0, tuple_range.first, true);
    matches->setRange(tuple_range.second, limit_range.first - tuple_range.second, true);
    if (filter != nullptr) {
      matches->intersectWith(*filter);
    }
    return matches;
  } else {
    TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_));
    const void *attr_stripe = column_stripes_[attr_id];
    if (!short_circuit || (filter == nullptr)) {
      switch (compression_info_.attribute_size(attr_id)) {
        case 1:
          for (tuple_id tid = 0;
               tid < *static_cast<const tuple_id*>(sub_block_memory_);
               ++tid) {
            if ((code != static_cast<const uint8_t*>(attr_stripe)[tid])
                && (null_code != static_cast<const uint8_t*>(attr_stripe)[tid])) {
              matches->set(tid);
            }
          }
          break;
        case 2:
          for (tuple_id tid = 0;
               tid < *static_cast<const tuple_id*>(sub_block_memory_);
               ++tid) {
            if ((code != static_cast<const uint16_t*>(attr_stripe)[tid])
                && (null_code != static_cast<const uint16_t*>(attr_stripe)[tid])) {
              matches->set(tid);
            }
          }
          break;
        case 4:
          for (tuple_id tid = 0;
               tid <= *static_cast<const tuple_id*>(sub_block_memory_);
               ++tid) {
            if ((code != static_cast<const uint16_t*>(attr_stripe)[tid])
                && (null_code != static_cast<const uint16_t*>(attr_stripe)[tid])) {
              matches->set(tid);
            }
          }
          break;
        default:
          FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed "
                      "attribute ID " << attr_id
                      << " in CompressedColumnStoreTupleStorageSubBlock::getNotEqualCodesExcludingNull()");
      }
      if (filter != nullptr) {
        matches->intersectWith(*filter);
      }
    } else {
      switch (compression_info_.attribute_size(attr_id)) {
        case 1:
          for (TupleIdSequence::const_iterator filter_it = filter->begin();
               filter_it != filter->end();
               ++filter_it) {
            if ((code != static_cast<const uint8_t*>(attr_stripe)[*filter_it])
                && (null_code != static_cast<const uint8_t*>(attr_stripe)[*filter_it])) {
              matches->set(*filter_it);
            }
          }
          break;
        case 2:
          for (TupleIdSequence::const_iterator filter_it = filter->begin();
               filter_it != filter->end();
               ++filter_it) {
            if ((code != static_cast<const uint16_t*>(attr_stripe)[*filter_it])
                && (null_code != static_cast<const uint16_t*>(attr_stripe)[*filter_it])) {
              matches->set(*filter_it);
            }
          }
          break;
        case 4:
          for (TupleIdSequence::const_iterator filter_it = filter->begin();
               filter_it != filter->end();
               ++filter_it) {
            if ((code != static_cast<const uint16_t*>(attr_stripe)[*filter_it])
                && (null_code != static_cast<const uint16_t*>(attr_stripe)[*filter_it])) {
              matches->set(*filter_it);
            }
          }
          break;
        default:
          FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed "
                      "attribute ID " << attr_id
                      << " in CompressedColumnStoreTupleStorageSubBlock::getNotEqualCodesExcludingNull()");
      }
    }
    return matches;
  }
}
TupleIdSequence* CompressedPackedRowStoreTupleStorageSubBlock::getCodesInRange(
    const attribute_id attr_id,
    const std::pair<std::uint32_t, std::uint32_t> range,
    const TupleIdSequence *filter) const {
#ifdef QUICKSTEP_ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT
  static constexpr bool short_circuit = true;
#else
  static constexpr bool short_circuit = false;
#endif
  TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_));
  const char *attr_location = static_cast<const char*>(tuple_storage_)
                              + attribute_offsets_[attr_id];
  if (!short_circuit || (filter == nullptr)) {
    switch (compression_info_.attribute_size(attr_id)) {
      case 1:
        for (tuple_id tid = 0;
             tid < *static_cast<const tuple_id*>(sub_block_memory_);
             ++tid, attr_location += tuple_length_bytes_) {
          if (range.first <= (*reinterpret_cast<const uint8_t*>(attr_location))
              && (*reinterpret_cast<const uint8_t*>(attr_location) < range.second)) {
            matches->set(tid);
          }
        }
        break;
      case 2:
        for (tuple_id tid = 0;
             tid < *static_cast<const tuple_id*>(sub_block_memory_);
             ++tid, attr_location += tuple_length_bytes_) {
          if (range.first <= (*reinterpret_cast<const uint16_t*>(attr_location))
              && (*reinterpret_cast<const uint16_t*>(attr_location) < range.second)) {
            matches->set(tid);
          }
        }
        break;
      case 4:
        for (tuple_id tid = 0;
             tid < *static_cast<const tuple_id*>(sub_block_memory_);
             ++tid, attr_location += tuple_length_bytes_) {
          if (range.first <= (*reinterpret_cast<const uint32_t*>(attr_location))
              && (*reinterpret_cast<const uint32_t*>(attr_location) < range.second)) {
            matches->set(tid);
          }
        }
        break;
      default:
        FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed "
                    "attribute ID " << attr_id
                    << " in CompressedPackedRowStoreTupleStorageSubBlock::getCodesInRange()");
    }
    if (filter != nullptr) {
      matches->intersectWith(*filter);
    }
  } else {
    switch (compression_info_.attribute_size(attr_id)) {
      case 1:
        for (TupleIdSequence::const_iterator filter_it = filter->begin();
             filter_it != filter->end();
             ++filter_it) {
          const void *local_attr_location = attr_location + (*filter_it) * tuple_length_bytes_;
          if (range.first <= (*reinterpret_cast<const uint8_t*>(local_attr_location))
              && (*reinterpret_cast<const uint8_t*>(local_attr_location) < range.second)) {
            matches->set(*filter_it);
          }
        }
        break;
      case 2:
        for (TupleIdSequence::const_iterator filter_it = filter->begin();
             filter_it != filter->end();
             ++filter_it) {
          const void *local_attr_location = attr_location + (*filter_it) * tuple_length_bytes_;
          if (range.first <= (*reinterpret_cast<const uint16_t*>(local_attr_location))
              && (*reinterpret_cast<const uint16_t*>(local_attr_location) < range.second)) {
            matches->set(*filter_it);
          }
        }
        break;
      case 4:
        for (TupleIdSequence::const_iterator filter_it = filter->begin();
             filter_it != filter->end();
             ++filter_it) {
          const void *local_attr_location = attr_location + (*filter_it) * tuple_length_bytes_;
          if (range.first <= (*reinterpret_cast<const uint32_t*>(local_attr_location))
              && (*reinterpret_cast<const uint32_t*>(local_attr_location) < range.second)) {
            matches->set(*filter_it);
          }
        }
        break;
      default:
        FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed "
                    "attribute ID " << attr_id
                    << " in CompressedPackedRowStoreTupleStorageSubBlock::getCodesInRange()");
    }
  }
  return matches;
}
Exemplo n.º 5
0
TupleIdSequence* PatternMatchingUncheckedComparator<is_like_pattern, is_negation,
                                                    left_nullable, right_nullable>
    ::compareColumnVectorAndStaticValue(
        const ColumnVector &column_vector,
        const TypedValue &static_value,
        const TupleIdSequence *filter,
        const TupleIdSequence *existence_bitmap) const {
  // Specialized implementation for matching a ColumnVector of strings to a
  // single pattern. In this situation, the pattern will be compiled only once
  // in advance and then matched by each of the string in the ColumnVector.
  return InvokeOnColumnVector(
      column_vector,
      [&](const auto &column_vector) -> TupleIdSequence* {  // NOLINT(build/c++11)
#ifdef QUICKSTEP_ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT
    static constexpr bool short_circuit = true;
#else
    static constexpr bool short_circuit = false;
#endif
    DCHECK((existence_bitmap == nullptr)
           || (existence_bitmap->numTuples() == column_vector.size()));
    DCHECK((filter == nullptr)
           || ((existence_bitmap == nullptr) ? (filter->length() == column_vector.size())
                                             : (filter->length() == existence_bitmap->length())));
    TupleIdSequence *result = new TupleIdSequence(
        (existence_bitmap == nullptr) ? column_vector.size()
                                      : existence_bitmap->length());
    if (right_nullable && static_value.isNull()) {
      return result;
    }

    // Transform and compile pattern in advance before the loop.
    const char *pattern = static_cast<const char *>(static_value.getDataPtr());
    std::string regex_pattern;
    re2::StringPiece pattern_piece;
    std::size_t orig_pattern_len = strnlen(pattern, right_length_);
    if (is_like_pattern) {
      regex_pattern = this->transformLikeToRegex(pattern, orig_pattern_len);
      pattern_piece.set(regex_pattern.c_str(), regex_pattern.size());
    } else {
      pattern_piece.set(pattern, orig_pattern_len);
    }
    const re2::RE2 re2_pattern(pattern_piece);

    if (short_circuit && (filter != nullptr)) {
      if (existence_bitmap != nullptr) {
        TupleIdSequence::const_iterator existence_it = existence_bitmap->begin();
        for (std::size_t cv_pos = 0;
             cv_pos < column_vector.size();
             ++cv_pos) {
          if (filter->get(*existence_it)) {
            const void *cv_value
                = column_vector.template getUntypedValue<left_nullable>(cv_pos);
            result->set(*existence_it,
                        !(left_nullable && (cv_value == nullptr))
                            && this->matchDataPtrWithPattern(cv_value, re2_pattern));
          }
          ++existence_it;
        }
      } else {
        for (TupleIdSequence::const_iterator filter_it = filter->begin();
             filter_it != filter->end();
             ++filter_it) {
          const void *cv_value
              = column_vector.template getUntypedValue<left_nullable>(*filter_it);
          result->set(*filter_it,
                      !(left_nullable && (cv_value == nullptr))
                          && this->matchDataPtrWithPattern(cv_value, re2_pattern));
        }
      }
    } else {
      if (existence_bitmap != nullptr) {
        TupleIdSequence::const_iterator existence_it = existence_bitmap->begin();
        for (std::size_t cv_pos = 0;
             cv_pos < column_vector.size();
             ++cv_pos) {
          const void *cv_value
              = column_vector.template getUntypedValue<left_nullable>(cv_pos);
          result->set(*existence_it,
                      !(left_nullable && (cv_value == nullptr))
                          && this->matchDataPtrWithPattern(cv_value, re2_pattern));
          ++existence_it;
        }
      } else {
        for (std::size_t pos = 0;
             pos < column_vector.size();
             ++pos) {
          const void *cv_value
              = column_vector.template getUntypedValue<left_nullable>(pos);
          result->set(pos,
                      !(left_nullable && (cv_value == nullptr))
                          && this->matchDataPtrWithPattern(cv_value, re2_pattern));
        }
      }
      if (!short_circuit && (filter != nullptr)) {
        result->intersectWith(*filter);
      }
    }

    return result;
  });
}
Exemplo n.º 6
0
TupleIdSequence* PatternMatchingUncheckedComparator<is_like_pattern, is_negation,
                                                    left_nullable, right_nullable>
    ::compareValueAccessorAndStaticValue(
        ValueAccessor *accessor,
        const attribute_id value_accessor_attr_id,
        const TypedValue &static_value,
        const TupleIdSequence *filter) const {
  // Specialized implementation for matching a ValueAccessor of strings to a
  // single pattern. In this situation, the pattern will be compiled only once
  // in advance and then matched by each of the string in the ValueAccessor.
  return InvokeOnValueAccessorMaybeTupleIdSequenceAdapter(
      accessor,
      [&](auto *accessor) -> TupleIdSequence* {  // NOLINT(build/c++11)
#ifdef QUICKSTEP_ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT
    static constexpr bool short_circuit = true;
#else
    static constexpr bool short_circuit = false;
#endif
    TupleIdSequence *result = new TupleIdSequence(accessor->getEndPosition());
    if (right_nullable && static_value.isNull()) {
      return result;
    }

    // Transform and compile pattern in advance before the loop.
    const char *pattern = static_cast<const char *>(static_value.getDataPtr());
    std::string regex_pattern;
    re2::StringPiece pattern_piece;
    std::size_t orig_pattern_len = strnlen(pattern, right_length_);
    if (is_like_pattern) {
      regex_pattern = this->transformLikeToRegex(pattern, orig_pattern_len);
      pattern_piece.set(regex_pattern.c_str(), regex_pattern.size());
    } else {
      pattern_piece.set(pattern, orig_pattern_len);
    }
    const re2::RE2 re2_pattern(pattern_piece);

    if (short_circuit && (filter != nullptr)) {
      DCHECK_EQ(filter->length(), result->length());
      for (TupleIdSequence::const_iterator filter_it = filter->begin();
           filter_it != filter->end();
           ++filter_it) {
        const void *va_value
            = accessor->template getUntypedValueAtAbsolutePosition<left_nullable>(value_accessor_attr_id,
                                                                                  *filter_it);
        result->set(*filter_it,
                    this->matchDataPtrWithPattern(va_value, re2_pattern));
      }
    } else {
      accessor->beginIteration();
      if (accessor->isColumnAccessorSupported()) {
        // If ColumnAccessor is supported on the underlying accessor, we have a fast strided
        // column accessor available for the iteration on the underlying block.
        std::unique_ptr<const ColumnAccessor<left_nullable>>
            column_accessor
            (accessor->template getColumnAccessor<left_nullable>(value_accessor_attr_id));
        DCHECK(column_accessor != nullptr);
        while (accessor->next()) {
          const void *va_value = column_accessor->getUntypedValue();
          result->set(accessor->getCurrentPosition(),
                      this->matchDataPtrWithPattern(va_value, re2_pattern));
        }
      } else {
        while (accessor->next()) {
          const void *va_value
              = accessor->template getUntypedValue<left_nullable>(value_accessor_attr_id);
          result->set(accessor->getCurrentPosition(),
                      this->matchDataPtrWithPattern(va_value, re2_pattern));
        }
      }
      if (!short_circuit && (filter != nullptr)) {
        result->intersectWith(*filter);
      }
    }

    return result;
  });
}