bool checkKanaStringsEqual(const UChar* firstData, unsigned firstLength, const UChar* secondData, unsigned secondLength) { const UChar* a = firstData; const UChar* aEnd = firstData + firstLength; const UChar* b = secondData; const UChar* bEnd = secondData + secondLength; while (true) { // Check for non-kana-letter characters. while (a != aEnd && !isKanaLetter(*a) && b != bEnd && !isKanaLetter(*b)) { if (*a++ != *b++) return false; } // If we reached the end of either the target or the match, we should have // reached the end of both; both should have the same number of kana letters. if (a == aEnd || b == bEnd) { return a == aEnd && b == bEnd; } if (isKanaLetter(*a) != isKanaLetter(*b)) return false; // Check that single Kana letters in |a| and |b| are the same. const size_t offset = compareKanaLetterAndComposedVoicedSoundMarks(a, aEnd, b, bEnd); if (offset == kNotFound) return false; // Update values of |a| and |b| after comparing. a += offset; b += offset; } }
bool checkOnlyKanaLettersInStrings(const UChar* firstData, unsigned firstLength, const UChar* secondData, unsigned secondLength) { const UChar* a = firstData; const UChar* aEnd = firstData + firstLength; const UChar* b = secondData; const UChar* bEnd = secondData + secondLength; while (true) { // Skip runs of non-kana-letter characters. This is necessary so we can // correctly handle strings where the |firstData| and |secondData| have different-length // runs of characters that match, while still double checking the correctness // of matches of kana letters with other kana letters. while (a != aEnd && !isKanaLetter(*a)) ++a; while (b != bEnd && !isKanaLetter(*b)) ++b; // If we reached the end of either the target or the match, we should have // reached the end of both; both should have the same number of kana letters. if (a == aEnd || b == bEnd) { return a == aEnd && b == bEnd; } // Check that single Kana letters in |a| and |b| are the same. const size_t offset = compareKanaLetterAndComposedVoicedSoundMarks(a, aEnd, b, bEnd); if (offset == kNotFound) return false; // Update values of |a| and |b| after comparing. a += offset; b += offset; } }
bool containsKanaLetters(const String& pattern) { const unsigned length = pattern.length(); for (unsigned i = 0; i < length; ++i) { if (isKanaLetter(pattern[i])) return true; } return false; }
bool isSmallKanaLetter(UChar character) { ASSERT(isKanaLetter(character)); switch (character) { case 0x3041: // HIRAGANA LETTER SMALL A case 0x3043: // HIRAGANA LETTER SMALL I case 0x3045: // HIRAGANA LETTER SMALL U case 0x3047: // HIRAGANA LETTER SMALL E case 0x3049: // HIRAGANA LETTER SMALL O case 0x3063: // HIRAGANA LETTER SMALL TU case 0x3083: // HIRAGANA LETTER SMALL YA case 0x3085: // HIRAGANA LETTER SMALL YU case 0x3087: // HIRAGANA LETTER SMALL YO case 0x308E: // HIRAGANA LETTER SMALL WA case 0x3095: // HIRAGANA LETTER SMALL KA case 0x3096: // HIRAGANA LETTER SMALL KE case 0x30A1: // KATAKANA LETTER SMALL A case 0x30A3: // KATAKANA LETTER SMALL I case 0x30A5: // KATAKANA LETTER SMALL U case 0x30A7: // KATAKANA LETTER SMALL E case 0x30A9: // KATAKANA LETTER SMALL O case 0x30C3: // KATAKANA LETTER SMALL TU case 0x30E3: // KATAKANA LETTER SMALL YA case 0x30E5: // KATAKANA LETTER SMALL YU case 0x30E7: // KATAKANA LETTER SMALL YO case 0x30EE: // KATAKANA LETTER SMALL WA case 0x30F5: // KATAKANA LETTER SMALL KA case 0x30F6: // KATAKANA LETTER SMALL KE case 0x31F0: // KATAKANA LETTER SMALL KU case 0x31F1: // KATAKANA LETTER SMALL SI case 0x31F2: // KATAKANA LETTER SMALL SU case 0x31F3: // KATAKANA LETTER SMALL TO case 0x31F4: // KATAKANA LETTER SMALL NU case 0x31F5: // KATAKANA LETTER SMALL HA case 0x31F6: // KATAKANA LETTER SMALL HI case 0x31F7: // KATAKANA LETTER SMALL HU case 0x31F8: // KATAKANA LETTER SMALL HE case 0x31F9: // KATAKANA LETTER SMALL HO case 0x31FA: // KATAKANA LETTER SMALL MU case 0x31FB: // KATAKANA LETTER SMALL RA case 0x31FC: // KATAKANA LETTER SMALL RI case 0x31FD: // KATAKANA LETTER SMALL RU case 0x31FE: // KATAKANA LETTER SMALL RE case 0x31FF: // KATAKANA LETTER SMALL RO case 0xFF67: // HALFWIDTH KATAKANA LETTER SMALL A case 0xFF68: // HALFWIDTH KATAKANA LETTER SMALL I case 0xFF69: // HALFWIDTH KATAKANA LETTER SMALL U case 0xFF6A: // HALFWIDTH KATAKANA LETTER SMALL E case 0xFF6B: // HALFWIDTH KATAKANA LETTER SMALL O case 0xFF6C: // HALFWIDTH KATAKANA LETTER SMALL YA case 0xFF6D: // HALFWIDTH KATAKANA LETTER SMALL YU case 0xFF6E: // HALFWIDTH KATAKANA LETTER SMALL YO case 0xFF6F: // HALFWIDTH KATAKANA LETTER SMALL TU return true; } return false; }
static inline VoicedSoundMarkType composedVoicedSoundMark(UChar character) { ASSERT(isKanaLetter(character)); switch (character) { case 0x304C: // HIRAGANA LETTER GA case 0x304E: // HIRAGANA LETTER GI case 0x3050: // HIRAGANA LETTER GU case 0x3052: // HIRAGANA LETTER GE case 0x3054: // HIRAGANA LETTER GO case 0x3056: // HIRAGANA LETTER ZA case 0x3058: // HIRAGANA LETTER ZI case 0x305A: // HIRAGANA LETTER ZU case 0x305C: // HIRAGANA LETTER ZE case 0x305E: // HIRAGANA LETTER ZO case 0x3060: // HIRAGANA LETTER DA case 0x3062: // HIRAGANA LETTER DI case 0x3065: // HIRAGANA LETTER DU case 0x3067: // HIRAGANA LETTER DE case 0x3069: // HIRAGANA LETTER DO case 0x3070: // HIRAGANA LETTER BA case 0x3073: // HIRAGANA LETTER BI case 0x3076: // HIRAGANA LETTER BU case 0x3079: // HIRAGANA LETTER BE case 0x307C: // HIRAGANA LETTER BO case 0x3094: // HIRAGANA LETTER VU case 0x30AC: // KATAKANA LETTER GA case 0x30AE: // KATAKANA LETTER GI case 0x30B0: // KATAKANA LETTER GU case 0x30B2: // KATAKANA LETTER GE case 0x30B4: // KATAKANA LETTER GO case 0x30B6: // KATAKANA LETTER ZA case 0x30B8: // KATAKANA LETTER ZI case 0x30BA: // KATAKANA LETTER ZU case 0x30BC: // KATAKANA LETTER ZE case 0x30BE: // KATAKANA LETTER ZO case 0x30C0: // KATAKANA LETTER DA case 0x30C2: // KATAKANA LETTER DI case 0x30C5: // KATAKANA LETTER DU case 0x30C7: // KATAKANA LETTER DE case 0x30C9: // KATAKANA LETTER DO case 0x30D0: // KATAKANA LETTER BA case 0x30D3: // KATAKANA LETTER BI case 0x30D6: // KATAKANA LETTER BU case 0x30D9: // KATAKANA LETTER BE case 0x30DC: // KATAKANA LETTER BO case 0x30F4: // KATAKANA LETTER VU case 0x30F7: // KATAKANA LETTER VA case 0x30F8: // KATAKANA LETTER VI case 0x30F9: // KATAKANA LETTER VE case 0x30FA: // KATAKANA LETTER VO return VoicedSoundMark; case 0x3071: // HIRAGANA LETTER PA case 0x3074: // HIRAGANA LETTER PI case 0x3077: // HIRAGANA LETTER PU case 0x307A: // HIRAGANA LETTER PE case 0x307D: // HIRAGANA LETTER PO case 0x30D1: // KATAKANA LETTER PA case 0x30D4: // KATAKANA LETTER PI case 0x30D7: // KATAKANA LETTER PU case 0x30DA: // KATAKANA LETTER PE case 0x30DD: // KATAKANA LETTER PO return SemiVoicedSoundMark; } return NoVoicedSoundMark; }