uint32_t FCDUIterCollationIterator::handleNextCE32(UChar32 &c, UErrorCode &errorCode) { for(;;) { if(state == ITER_CHECK_FWD) { c = iter.next(&iter); if(c < 0) { return Collation::FALLBACK_CE32; } if(CollationFCD::hasTccc(c)) { if(CollationFCD::maybeTibetanCompositeVowel(c) || CollationFCD::hasLccc(iter.current(&iter))) { iter.previous(&iter); if(!nextSegment(errorCode)) { c = U_SENTINEL; return Collation::FALLBACK_CE32; } continue; } } break; } else if(state == ITER_IN_FCD_SEGMENT && pos != limit) { c = iter.next(&iter); ++pos; U_ASSERT(c >= 0); break; } else if(state >= IN_NORM_ITER_AT_LIMIT && pos != normalized.length()) { c = normalized[pos++]; break; } else { switchToForward(); } } return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c); }
UChar32 FCDUIterCollationIterator::nextCodePoint(UErrorCode &errorCode) { UChar32 c; for(;;) { if(state == ITER_CHECK_FWD) { c = iter.next(&iter); if(c < 0) { return c; } if(CollationFCD::hasTccc(c)) { if(CollationFCD::maybeTibetanCompositeVowel(c) || CollationFCD::hasLccc(iter.current(&iter))) { iter.previous(&iter); if(!nextSegment(errorCode)) { return U_SENTINEL; } continue; } } if(U16_IS_LEAD(c)) { UChar32 trail = iter.next(&iter); if(U16_IS_TRAIL(trail)) { return U16_GET_SUPPLEMENTARY(c, trail); } else if(trail >= 0) { iter.previous(&iter); } } return c; } else if(state == ITER_IN_FCD_SEGMENT && pos != limit) { c = uiter_next32(&iter); pos += U16_LENGTH(c); U_ASSERT(c >= 0); return c; } else if(state >= IN_NORM_ITER_AT_LIMIT && pos != normalized.length()) { c = normalized.char32At(pos); pos += U16_LENGTH(c); return c; } else { switchToForward(); } } }
UChar32 FCDUTF8CollationIterator::nextCodePoint(UErrorCode &errorCode) { UChar32 c; for(;;) { if(state == CHECK_FWD) { if(pos == length || ((c = u8[pos]) == 0 && length < 0)) { return U_SENTINEL; } if(c < 0x80) { ++pos; return c; } U8_NEXT_OR_FFFD(u8, pos, length, c); if(CollationFCD::hasTccc(c <= 0xffff ? c : U16_LEAD(c)) && (CollationFCD::maybeTibetanCompositeVowel(c) || (pos != length && nextHasLccc()))) { // c is not FCD-inert, therefore it is not U+FFFD and it has a valid byte sequence // and we can use U8_LENGTH() rather than a previous-position variable. pos -= U8_LENGTH(c); if(!nextSegment(errorCode)) { return U_SENTINEL; } continue; } return c; } else if(state == IN_FCD_SEGMENT && pos != limit) { U8_NEXT_OR_FFFD(u8, pos, length, c); return c; } else if(state == IN_NORMALIZED && pos != normalized.length()) { c = normalized.char32At(pos); pos += U16_LENGTH(c); return c; } else { switchToForward(); } } }
uint32_t FCDUTF8CollationIterator::handleNextCE32(UChar32 &c, UErrorCode &errorCode) { for(;;) { if(state == CHECK_FWD) { // Combination of UTF8CollationIterator::handleNextCE32() with FCD check fastpath. if(pos == length) { c = U_SENTINEL; return Collation::FALLBACK_CE32; } c = u8[pos++]; if(c < 0xc0) { // ASCII 00..7F; trail bytes 80..BF map to error values. return trie->data32[c]; } uint8_t t1, t2; if(c < 0xe0 && pos != length && (t1 = (u8[pos] - 0x80)) <= 0x3f) { // U+0080..U+07FF; 00..7F map to error values. uint32_t ce32 = trie->data32[trie->index[(UTRIE2_UTF8_2B_INDEX_2_OFFSET - 0xc0) + c] + t1]; c = ((c & 0x1f) << 6) | t1; ++pos; if(CollationFCD::hasTccc(c) && pos != length && nextHasLccc()) { pos -= 2; } else { return ce32; } } else if(c <= 0xef && ((pos + 1) < length || length < 0) && (t1 = (u8[pos] - 0x80)) <= 0x3f && (c != 0xe0 || t1 >= 0x20) && (t2 = (u8[pos + 1] - 0x80)) <= 0x3f ) { // U+0800..U+FFFF; caller maps surrogates to error values. c = (UChar)((c << 12) | (t1 << 6) | t2); pos += 2; if(CollationFCD::hasTccc(c) && (CollationFCD::maybeTibetanCompositeVowel(c) || (pos != length && nextHasLccc()))) { pos -= 3; } else { break; // return CE32(BMP) } } else { // Function call for supplementary code points and error cases. // Illegal byte sequences yield U+FFFD. c = utf8_nextCharSafeBody(u8, &pos, length, c, -3); if(c == 0xfffd) { return Collation::FFFD_CE32; } else { U_ASSERT(c > 0xffff); if(CollationFCD::hasTccc(U16_LEAD(c)) && pos != length && nextHasLccc()) { pos -= 4; } else { return data->getCE32FromSupplementary(c); } } } if(!nextSegment(errorCode)) { c = U_SENTINEL; return Collation::FALLBACK_CE32; } continue; } else if(state == IN_FCD_SEGMENT && pos != limit) { return UTF8CollationIterator::handleNextCE32(c, errorCode); } else if(state == IN_NORMALIZED && pos != normalized.length()) { c = normalized[pos++]; break; } else { switchToForward(); } } return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c); }