size_t qfind_first_byte_of_sse42(const StringPiece& haystack, const StringPiece& needles) { if (UNLIKELY(needles.empty() || haystack.empty())) { return StringPiece::npos; } else if (needles.size() <= 16) { // we can save some unnecessary load instructions by optimizing for // the common case of needles.size() <= 16 return qfind_first_byte_of_needles16(haystack, needles); } if (haystack.size() < 16 && PAGE_FOR(haystack.end() - 1) != PAGE_FOR(haystack.data() + 16)) { // We can't safely SSE-load haystack. Use a different approach. if (haystack.size() <= 2) { return qfind_first_of(haystack, needles, asciiCaseSensitive); } return qfind_first_byte_of_byteset(haystack, needles); } auto ret = scanHaystackBlock<false>(haystack, needles, 0); if (ret != StringPiece::npos) { return ret; } size_t i = nextAlignedIndex(haystack.data()); for (; i < haystack.size(); i += 16) { auto ret = scanHaystackBlock<true>(haystack, needles, i); if (ret != StringPiece::npos) { return ret; } } return StringPiece::npos; }
size_t qfind_first_byte_of_sse42(const StringPiece& haystack, const StringPiece& needles) { if (UNLIKELY(needles.empty() || haystack.empty())) { return StringPiece::npos; } else if (needles.size() <= 16) { // we can save some unnecessary load instructions by optimizing for // the common case of needles.size() <= 16 return qfind_first_byte_of_needles16(haystack, needles); } size_t index = haystack.size(); for (size_t i = 0; i < haystack.size(); i += 16) { size_t b = 16; auto arr1 = __builtin_ia32_loaddqu(haystack.data() + i); for (size_t j = 0; j < needles.size(); j += 16) { auto arr2 = __builtin_ia32_loaddqu(needles.data() + j); auto index = __builtin_ia32_pcmpestri128(arr2, needles.size() - j, arr1, haystack.size() - i, 0); b = std::min<size_t>(index, b); } if (b < 16) { return i + b; } }; return StringPiece::npos; }