TextBreakIterator* wordBreakIterator(StringView string) { static TextBreakIterator* staticWordBreakIterator = initializeIterator(UBRK_WORD); if (!staticWordBreakIterator) return nullptr; return setTextForIterator(*staticWordBreakIterator, string); }
TextBreakIterator* sentenceBreakIterator(StringView string) { static TextBreakIterator* staticSentenceBreakIterator = initializeIterator(UBRK_SENTENCE); if (!staticSentenceBreakIterator) return nullptr; return setTextForIterator(*staticSentenceBreakIterator, string); }
NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(StringView string) { m_iterator = nonSharedCharacterBreakIterator; bool createdIterator = m_iterator && compareAndSwapNonSharedCharacterBreakIterator(m_iterator, 0); if (!createdIterator) m_iterator = initializeIterator(UBRK_CHARACTER); if (!m_iterator) return; m_iterator = setTextForIterator(*m_iterator, string); }
TextBreakIterator* cursorMovementIterator(StringView string) { #if !PLATFORM(IOS) // This rule set is based on character-break iterator rules of ICU 4.0 // <http://source.icu-project.org/repos/icu/icu/tags/release-4-0/source/data/brkitr/char.txt>. // The major differences from the original ones are listed below: // * Replaced '[\p{Grapheme_Cluster_Break = SpacingMark}]' with '[\p{General_Category = Spacing Mark} - $Extend]' for ICU 3.8 or earlier; // * Removed rules that prevent a cursor from moving after prepend characters (Bug 24342); // * Added rules that prevent a cursor from moving after virama signs of Indic languages except Tamil (Bug 15790), and; // * Added rules that prevent a cursor from moving before Japanese half-width katakara voiced marks. // * Added rules for regional indicator symbols. static const char* kRules = "$CR = [\\p{Grapheme_Cluster_Break = CR}];" "$LF = [\\p{Grapheme_Cluster_Break = LF}];" "$Control = [\\p{Grapheme_Cluster_Break = Control}];" "$VoiceMarks = [\\uFF9E\\uFF9F];" // Japanese half-width katakana voiced marks "$Extend = [\\p{Grapheme_Cluster_Break = Extend} $VoiceMarks - [\\u0E30 \\u0E32 \\u0E45 \\u0EB0 \\u0EB2]];" "$SpacingMark = [[\\p{General_Category = Spacing Mark}] - $Extend];" "$L = [\\p{Grapheme_Cluster_Break = L}];" "$V = [\\p{Grapheme_Cluster_Break = V}];" "$T = [\\p{Grapheme_Cluster_Break = T}];" "$LV = [\\p{Grapheme_Cluster_Break = LV}];" "$LVT = [\\p{Grapheme_Cluster_Break = LVT}];" "$Hin0 = [\\u0905-\\u0939];" // Devanagari Letter A,...,Ha "$HinV = \\u094D;" // Devanagari Sign Virama "$Hin1 = [\\u0915-\\u0939];" // Devanagari Letter Ka,...,Ha "$Ben0 = [\\u0985-\\u09B9];" // Bengali Letter A,...,Ha "$BenV = \\u09CD;" // Bengali Sign Virama "$Ben1 = [\\u0995-\\u09B9];" // Bengali Letter Ka,...,Ha "$Pan0 = [\\u0A05-\\u0A39];" // Gurmukhi Letter A,...,Ha "$PanV = \\u0A4D;" // Gurmukhi Sign Virama "$Pan1 = [\\u0A15-\\u0A39];" // Gurmukhi Letter Ka,...,Ha "$Guj0 = [\\u0A85-\\u0AB9];" // Gujarati Letter A,...,Ha "$GujV = \\u0ACD;" // Gujarati Sign Virama "$Guj1 = [\\u0A95-\\u0AB9];" // Gujarati Letter Ka,...,Ha "$Ori0 = [\\u0B05-\\u0B39];" // Oriya Letter A,...,Ha "$OriV = \\u0B4D;" // Oriya Sign Virama "$Ori1 = [\\u0B15-\\u0B39];" // Oriya Letter Ka,...,Ha "$Tel0 = [\\u0C05-\\u0C39];" // Telugu Letter A,...,Ha "$TelV = \\u0C4D;" // Telugu Sign Virama "$Tel1 = [\\u0C14-\\u0C39];" // Telugu Letter Ka,...,Ha "$Kan0 = [\\u0C85-\\u0CB9];" // Kannada Letter A,...,Ha "$KanV = \\u0CCD;" // Kannada Sign Virama "$Kan1 = [\\u0C95-\\u0CB9];" // Kannada Letter A,...,Ha "$Mal0 = [\\u0D05-\\u0D39];" // Malayalam Letter A,...,Ha "$MalV = \\u0D4D;" // Malayalam Sign Virama "$Mal1 = [\\u0D15-\\u0D39];" // Malayalam Letter A,...,Ha "$RI = [\\U0001F1E6-\\U0001F1FF];" // Emoji regional indicators "!!chain;" "!!forward;" "$CR $LF;" "$L ($L | $V | $LV | $LVT);" "($LV | $V) ($V | $T);" "($LVT | $T) $T;" "[^$Control $CR $LF] $Extend;" "[^$Control $CR $LF] $SpacingMark;" "$RI $RI / $RI;" "$RI $RI;" "$Hin0 $HinV $Hin1;" // Devanagari Virama (forward) "$Ben0 $BenV $Ben1;" // Bengali Virama (forward) "$Pan0 $PanV $Pan1;" // Gurmukhi Virama (forward) "$Guj0 $GujV $Guj1;" // Gujarati Virama (forward) "$Ori0 $OriV $Ori1;" // Oriya Virama (forward) "$Tel0 $TelV $Tel1;" // Telugu Virama (forward) "$Kan0 $KanV $Kan1;" // Kannada Virama (forward) "$Mal0 $MalV $Mal1;" // Malayalam Virama (forward) "!!reverse;" "$LF $CR;" "($L | $V | $LV | $LVT) $L;" "($V | $T) ($LV | $V);" "$T ($LVT | $T);" "$Extend [^$Control $CR $LF];" "$SpacingMark [^$Control $CR $LF];" "$RI $RI / $RI $RI;" "$RI $RI;" "$Hin1 $HinV $Hin0;" // Devanagari Virama (backward) "$Ben1 $BenV $Ben0;" // Bengali Virama (backward) "$Pan1 $PanV $Pan0;" // Gurmukhi Virama (backward) "$Guj1 $GujV $Guj0;" // Gujarati Virama (backward) "$Ori1 $OriV $Ori0;" // Gujarati Virama (backward) "$Tel1 $TelV $Tel0;" // Telugu Virama (backward) "$Kan1 $KanV $Kan0;" // Kannada Virama (backward) "$Mal1 $MalV $Mal0;" // Malayalam Virama (backward) "!!safe_reverse;" "!!safe_forward;"; static TextBreakIterator* staticCursorMovementIterator = initializeIteratorWithRules(kRules); #else // PLATFORM(IOS) // Use the special Thai character break iterator for all locales static TextBreakIterator* staticCursorMovementIterator = initializeIterator(UBRK_CHARACTER, "th"); #endif // !PLATFORM(IOS) if (!staticCursorMovementIterator) return nullptr; return setTextForIterator(*staticCursorMovementIterator, string); }
NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(StringView string) { if ((m_iterator = getNonSharedCharacterBreakIterator())) m_iterator = setTextForIterator(*m_iterator, string); }