Пример #1
0
size_t qfind_first_byte_of_sse42(const StringPieceLite haystack,
                                 const StringPieceLite needles) {
  if (UNLIKELY(needles.empty() || haystack.empty())) {
    return std::string::npos;
  } else if (needles.size() <= 16) {
    // we can save some unnecessary load instructions by optimizing for
    // the common case of needles.size() <= 16
    return qfind_first_byte_of_needles16(haystack, needles);
  }

  if (haystack.size() < 16 &&
      page_for(haystack.end() - 1) != page_for(haystack.data() + 16)) {
    // We can't safely SSE-load haystack. Use a different approach.
    if (haystack.size() <= 2) {
      return qfind_first_byte_of_std(haystack, needles);
    }
    return qfind_first_byte_of_byteset(haystack, needles);
  }

  auto ret = scanHaystackBlock<false>(haystack, needles, 0);
  if (ret != std::string::npos) {
    return ret;
  }

  size_t i = nextAlignedIndex(haystack.data());
  for (; i < haystack.size(); i += 16) {
    ret = scanHaystackBlock<true>(haystack, needles, i);
    if (ret != std::string::npos) {
      return ret;
    }
  }

  return std::string::npos;
}
Пример #2
0
size_t qfind_first_byte_of_byteset(const StringPieceLite haystack,
                                   const StringPieceLite needles) {
  SparseByteSet s;
  for (auto needle: needles) {
    s.add(needle);
  }
  for (size_t index = 0; index < haystack.size(); ++index) {
    if (s.contains(haystack[index])) {
      return index;
    }
  }
  return std::string::npos;
}
Пример #3
0
size_t qfind_first_byte_of_bitset(const StringPieceLite haystack,
                                  const StringPieceLite needles) {
  std::bitset<256> s;
  for (auto needle : needles) {
    s[(uint8_t)needle] = true;
  }
  for (size_t index = 0; index < haystack.size(); ++index) {
    if (s[(uint8_t)haystack[index]]) {
      return index;
    }
  }
  return std::string::npos;
}
Пример #4
0
// helper method for case where needles.size() <= 16
size_t qfind_first_byte_of_needles16(const StringPieceLite haystack,
                                     const StringPieceLite needles) {
  DCHECK_GT(haystack.size(), 0);
  DCHECK_GT(needles.size(), 0);
  DCHECK_LE(needles.size(), 16);
  if ((needles.size() <= 2 && haystack.size() >= 256) ||
      // must bail if we can't even SSE-load a single segment of haystack
      (haystack.size() < 16 &&
       page_for(haystack.end() - 1) != page_for(haystack.data() + 15)) ||
      // can't load needles into SSE register if it could cross page boundary
      page_for(needles.end() - 1) != page_for(needles.data() + 15)) {
    return detail::qfind_first_byte_of_nosse(haystack, needles);
  }

  auto arr2 = _mm_loadu_si128(
      reinterpret_cast<const __m128i*>(needles.data()));
  // do an unaligned load for first block of haystack
  auto arr1 = _mm_loadu_si128(
      reinterpret_cast<const __m128i*>(haystack.data()));
  auto index = _mm_cmpestri(arr2, needles.size(),
                            arr1, haystack.size(), 0);
  if (index < 16) {
    return index;
  }

  // Now, we can do aligned loads hereafter...
  size_t i = nextAlignedIndex(haystack.data());
  for (; i < haystack.size(); i+= 16) {
    arr1 =
        _mm_load_si128(reinterpret_cast<const __m128i*>(haystack.data() + i));
    index = _mm_cmpestri(arr2, needles.size(), arr1, haystack.size() - i, 0);
    if (index < 16) {
      return i + index;
    }
  }
  return std::string::npos;
}
Пример #5
0
size_t scanHaystackBlock(const StringPieceLite haystack,
                         const StringPieceLite needles,
                         uint64_t blockStartIdx) {
  DCHECK_GT(needles.size(), 16);  // should handled by *needles16() method
  DCHECK(blockStartIdx + 16 <= haystack.size() ||
         (page_for(haystack.data() + blockStartIdx) ==
          page_for(haystack.data() + blockStartIdx + 15)));

  __m128i arr1;
  if (HAYSTACK_ALIGNED) {
    arr1 = _mm_load_si128(
        reinterpret_cast<const __m128i*>(haystack.data() + blockStartIdx));
  } else {
    arr1 = _mm_loadu_si128(
        reinterpret_cast<const __m128i*>(haystack.data() + blockStartIdx));
  }

  // This load is safe because needles.size() >= 16
  auto arr2 = _mm_loadu_si128(
      reinterpret_cast<const __m128i*>(needles.data()));
  size_t b = _mm_cmpestri(
      arr2, 16, arr1, haystack.size() - blockStartIdx, 0);

  size_t j = nextAlignedIndex(needles.data());
  for (; j < needles.size(); j += 16) {
    arr2 = _mm_load_si128(
        reinterpret_cast<const __m128i*>(needles.data() + j));

    auto index = _mm_cmpestri(
      arr2, needles.size() - j,
      arr1, haystack.size() - blockStartIdx, 0);
    b = std::min<size_t>(index, b);
  }

  if (b < 16) {
    return blockStartIdx + b;
  }
  return std::string::npos;
}