ConversionResult convertUTF8ToUTF16( const char** sourceStart, const char* sourceEnd, UChar** targetStart, UChar* targetEnd, bool strict) { ConversionResult result = conversionOK; const char* source = *sourceStart; UChar* target = *targetStart; while (source < sourceEnd) { int utf8SequenceLength = inlineUTF8SequenceLength(*source); if (sourceEnd - source < utf8SequenceLength) { result = sourceExhausted; break; } // Do this check whether lenient or strict if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(source), utf8SequenceLength)) { result = sourceIllegal; break; } UChar32 character = readUTF8Sequence(source, utf8SequenceLength); if (target >= targetEnd) { source -= utf8SequenceLength; // Back up source pointer! result = targetExhausted; break; } if (U_IS_BMP(character)) { // UTF-16 surrogate values are illegal in UTF-32 if (U_IS_SURROGATE(character)) { if (strict) { source -= utf8SequenceLength; // return to the illegal value itself result = sourceIllegal; break; } else *target++ = replacementCharacter; } else *target++ = character; // normal case } else if (U_IS_SUPPLEMENTARY(character)) { // target is a character in range 0xFFFF - 0x10FFFF if (target + 1 >= targetEnd) { source -= utf8SequenceLength; // Back up source pointer! result = targetExhausted; break; } *target++ = U16_LEAD(character); *target++ = U16_TRAIL(character); } else { if (strict) { source -= utf8SequenceLength; // return to the start result = sourceIllegal; break; // Bail out; shouldn't continue } else *target++ = replacementCharacter; } } *sourceStart = source; *targetStart = target; return result; }
U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ u_fungetc(UChar32 ch, UFILE *f) { u_localized_string *str; str = &f->str; /* if we're at the beginning of the buffer, sorry! */ if (str->fPos == str->fBuffer || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer)) { ch = U_EOF; } else { /* otherwise, put the character back */ /* Remember, read them back on in the reverse order. */ if (U_IS_LEAD(ch)) { if (*--(str->fPos) != U16_TRAIL(ch) || *--(str->fPos) != U16_LEAD(ch)) { ch = U_EOF; } } else if (*--(str->fPos) != ch) { ch = U_EOF; } } return ch; }
void XMLTreeBuilder::processHTMLEntity(const AtomicXMLToken& token) { HTMLEntitySearch search; const AtomicString& name = token.name(); for (size_t i = 0; i < name.length(); ++i) { search.advance(name[i]); if (!search.isEntityPrefix()) { m_parser->stopParsing(); return; } } search.advance(';'); if (!search.isEntityPrefix()) { m_parser->stopParsing(); return; } UChar32 entityValue = search.mostRecentMatch()->firstValue; // FIXME: We need to account for secondValue if any XML entities are longer // than one unicode character. ASSERT_NOT_REACHED(); // Darin Adler writes: // You can see given the code above that this else is dead code. This code is in a strange state. // And the reinterpret_cast to UChar* makes the code little-endian-specific. That is not good! if (entityValue <= 0xFFFF) appendToText(reinterpret_cast<UChar*>(&entityValue), 1); else { UChar utf16Pair[2] = { U16_LEAD(entityValue), U16_TRAIL(entityValue) }; appendToText(utf16Pair, 2); } }
static UChar32 U_CALLCONV lenient8IteratorPrevious(UCharIterator *iter) { int32_t index; if(iter->reservedField!=0) { UChar lead=U16_LEAD(iter->reservedField); iter->reservedField=0; iter->start-=4; /* we stayed behind the supplementary code point; go before it now */ if((index=iter->index)>0) { iter->index=index-1; } return lead; } else if(iter->start>0) { const uint8_t *s=(const uint8_t *)iter->context; UChar32 c; L8_PREV(s, 0, iter->start, c); if((index=iter->index)>0) { iter->index=index-1; } else if(iter->start<=1) { iter->index= c<=0xffff ? iter->start : iter->start+1; } if(c<0) { return 0xfffd; } else if(c<=0xffff) { return c; } else { iter->start+=4; /* back to behind this supplementary code point for consistent state */ iter->reservedField=c; return U16_TRAIL(c); } } else { return U_SENTINEL; } }
RefPtr<Font> Font::systemFallbackFontForCharacter(UChar32 character, const FontDescription& description, bool isForPlatformFont) const { auto fontAddResult = systemFallbackCache().add(this, CharacterFallbackMap()); if (!character) { UChar codeUnit = 0; return FontCache::singleton().systemFallbackForCharacters(description, this, isForPlatformFont, &codeUnit, 1); } auto key = CharacterFallbackMapKey(description.locale(), character, isForPlatformFont); auto characterAddResult = fontAddResult.iterator->value.add(WTF::move(key), nullptr); Font*& fallbackFont = characterAddResult.iterator->value; if (!fallbackFont) { UChar codeUnits[2]; unsigned codeUnitsLength; if (U_IS_BMP(character)) { codeUnits[0] = FontCascade::normalizeSpaces(character); codeUnitsLength = 1; } else { codeUnits[0] = U16_LEAD(character); codeUnits[1] = U16_TRAIL(character); codeUnitsLength = 2; } fallbackFont = FontCache::singleton().systemFallbackForCharacters(description, this, isForPlatformFont, codeUnits, codeUnitsLength).get(); if (fallbackFont) fallbackFont->m_isUsedInSystemFallbackCache = true; } return fallbackFont; }
/* Explain <xxxxx> tag to a native value * * Since <xxxxx> is always larger than the native value, * the operation will replace the tag directly in the buffer, * and, of course, will shift tail elements. */ void IdnaConfTest::ExplainCodePointTag(UnicodeString& buf){ buf.append((UChar)0); // add a terminal NULL UChar* bufBase = buf.getBuffer(buf.length()); UChar* p = bufBase; while (*p != 0){ if ( *p != 0x3C){ // < *bufBase++ = *p++; } else { p++; // skip < UChar32 cp = 0; for ( ;*p != 0x3E; p++){ // > if (0x30 <= *p && *p <= 0x39){ // 0-9 cp = (cp * 16) + (*p - 0x30); } else if (0x61 <= *p && *p <= 0x66){ // a-f cp = (cp * 16) + (*p - 0x61) + 10; } else if (0x41 <= *p && *p <= 0x46) {// A-F cp = (cp * 16) + (*p - 0x41) + 10; } // no else. hope everything is good. } p++; // skip > if (U_IS_BMP(cp)){ *bufBase++ = cp; } else { *bufBase++ = U16_LEAD(cp); *bufBase++ = U16_TRAIL(cp); } } } *bufBase = 0; // close our buffer buf.releaseBuffer(); }
UStringTrieResult UCharsTrie::nextForCodePoint(UChar32 cp) { return cp<=0xffff ? next(cp) : (USTRINGTRIE_HAS_NEXT(next(U16_LEAD(cp))) ? next(U16_TRAIL(cp)) : USTRINGTRIE_NO_MATCH); }
UBool Appendable::appendCodePoint(UChar32 c) { if(c<=0xffff) { return appendCodeUnit((UChar)c); } else { return appendCodeUnit(U16_LEAD(c)) && appendCodeUnit(U16_TRAIL(c)); } }
void git__utf8_to_16(wchar_t *dest, size_t length, const char *src) { wchar_t *pDest = dest; uint32_t ch; const uint8_t* pSrc = (uint8_t*) src; assert(dest && src && length); length--; while(*pSrc && length > 0) { ch = *pSrc++; length--; if(ch < 0xc0) { /* * ASCII, or a trail byte in lead position which is treated like * a single-byte sequence for better character boundary * resynchronization after illegal sequences. */ *pDest++ = (wchar_t)ch; continue; } else if(ch < 0xe0) { /* U+0080..U+07FF */ if (pSrc[0]) { /* 0x3080 = (0xc0 << 6) + 0x80 */ *pDest++ = (wchar_t)((ch << 6) + *pSrc++ - 0x3080); continue; } } else if(ch < 0xf0) { /* U+0800..U+FFFF */ if (pSrc[0] && pSrc[1]) { /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ /* 0x2080 = (0x80 << 6) + 0x80 */ ch = (ch << 12) + (*pSrc++ << 6); *pDest++ = (wchar_t)(ch + *pSrc++ - 0x2080); continue; } } else /* f0..f4 */ { /* U+10000..U+10FFFF */ if (length >= 1 && pSrc[0] && pSrc[1] && pSrc[2]) { /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ ch = (ch << 18) + (*pSrc++ << 12); ch += *pSrc++ << 6; ch += *pSrc++ - 0x3c82080; *(pDest++) = U16_LEAD(ch); *(pDest++) = U16_TRAIL(ch); length--; /* two bytes for this character */ continue; } } /* truncated character at the end */ *pDest++ = 0xfffd; break; } *pDest++ = 0x0; }
UBool FCDUTF8CollationIterator::previousHasTccc() const { U_ASSERT(state == CHECK_BWD && pos != 0); UChar32 c = u8[pos - 1]; if(c < 0x80) { return FALSE; } int32_t i = pos; U8_PREV_OR_FFFD(u8, 0, i, c); if(c > 0xffff) { c = U16_LEAD(c); } return CollationFCD::hasTccc(c); }
UBool FCDUTF8CollationIterator::nextHasLccc() const { U_ASSERT(state == CHECK_FWD && pos != length); // The lowest code point with ccc!=0 is U+0300 which is CC 80 in UTF-8. // CJK U+4000..U+DFFF except U+Axxx are also FCD-inert. (Lead bytes E4..ED except EA.) UChar32 c = u8[pos]; if(c < 0xcc || (0xe4 <= c && c <= 0xed && c != 0xea)) { return FALSE; } int32_t i = pos; U8_NEXT_OR_FFFD(u8, i, length, c); if(c > 0xffff) { c = U16_LEAD(c); } return CollationFCD::hasLccc(c); }
static inline UChar* appendCharacter(UChar* destination, int character) { ASSERT(character != nonCharacter); ASSERT(!U_IS_SURROGATE(character)); if (U_IS_BMP(character)) *destination++ = character; else { *destination++ = U16_LEAD(character); *destination++ = U16_TRAIL(character); } return destination; }
static size_t appendUChar32ToUCharArray(UChar32 value, UChar* result) { if (U_IS_BMP(value)) { UChar character = static_cast<UChar>(value); ASSERT(character == value); result[0] = character; return 1; } result[0] = U16_LEAD(value); result[1] = U16_TRAIL(value); return 2; }
U_CFUNC void ustr_u32cat(struct UString *dst, UChar32 c, UErrorCode *status){ if(c > 0x10FFFF){ *status = U_ILLEGAL_CHAR_FOUND; return; } if(c >0xFFFF){ ustr_ucat(dst, U16_LEAD(c), status); ustr_ucat(dst, U16_TRAIL(c), status); }else{ ustr_ucat(dst, (UChar) c, status); } }
static inline String singleCharacterString(UChar32 c) { if (!c) return String(); if (c > 0xffff) { UChar lead = U16_LEAD(c); UChar trail = U16_TRAIL(c); UChar utf16[2] = {lead, trail}; return String(utf16, 2); } UChar n = (UChar)c; return String(&n, 1); }
unsigned calculateStringHashAndLengthFromUTF8MaskingTop8Bits(const char* data, const char* dataEnd, unsigned& dataLength, unsigned& utf16Length) { if (!data) return 0; StringHasher stringHasher; dataLength = 0; utf16Length = 0; while (data < dataEnd || (!dataEnd && *data)) { if (isASCII(*data)) { stringHasher.addCharacter(*data++); dataLength++; utf16Length++; continue; } int utf8SequenceLength = inlineUTF8SequenceLengthNonASCII(*data); dataLength += utf8SequenceLength; if (!dataEnd) { for (int i = 1; i < utf8SequenceLength; ++i) { if (!data[i]) return 0; } } else if (dataEnd - data < utf8SequenceLength) { return 0; } if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(data), utf8SequenceLength)) return 0; UChar32 character = readUTF8Sequence(data, utf8SequenceLength); ASSERT(!isASCII(character)); if (U_IS_BMP(character)) { // UTF-16 surrogate values are illegal in UTF-32 if (U_IS_SURROGATE(character)) return 0; stringHasher.addCharacter(static_cast<UChar>(character)); // normal case utf16Length++; } else if (U_IS_SUPPLEMENTARY(character)) { stringHasher.addCharacters(static_cast<UChar>(U16_LEAD(character)), static_cast<UChar>(U16_TRAIL(character))); utf16Length += 2; } else { return 0; } } return stringHasher.hashWithTop8BitsMasked(); }
std::vector<uint16_t> utf8ToUtf16(const std::string& text) { std::vector<uint16_t> result; int32_t i = 0; const int32_t textLength = static_cast<int32_t>(text.size()); uint32_t c = 0; while (i < textLength) { U8_NEXT(text.c_str(), i, textLength, c); if (U16_LENGTH(c) == 1) { result.push_back(c); } else { result.push_back(U16_LEAD(c)); result.push_back(U16_TRAIL(c)); } } return result; }
unsigned calculateStringHashFromUTF8(const char* data, const char* dataEnd, unsigned& utf16Length) { if (!data) return 0; WTF::StringHasher stringHasher; utf16Length = 0; while (data < dataEnd) { if (isASCII(*data)) { stringHasher.addCharacter(*data++); utf16Length++; continue; } int utf8SequenceLength = inlineUTF8SequenceLengthNonASCII(*data); if (dataEnd - data < utf8SequenceLength) return false; if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(data), utf8SequenceLength)) return 0; UChar32 character = readUTF8Sequence(data, utf8SequenceLength); ASSERT(!isASCII(character)); if (U_IS_BMP(character)) { // UTF-16 surrogate values are illegal in UTF-32 if (U_IS_SURROGATE(character)) return 0; stringHasher.addCharacter(static_cast<UChar>(character)); // normal case utf16Length++; } else if (U_IS_SUPPLEMENTARY(character)) { stringHasher.addCharacters(static_cast<UChar>(U16_LEAD(character)), static_cast<UChar>(U16_TRAIL(character))); utf16Length += 2; } else return 0; } return stringHasher.hash(); }
U_CAPI UChar* U_EXPORT2 u_strchr32(const UChar* s, UChar32 c) { if ((uint32_t) c <= U_BMP_MAX) { /* find BMP code point */ return u_strchr(s, (UChar) c); } else if ((uint32_t) c <= UCHAR_MAX_VALUE) { /* find supplementary code point as surrogate pair */ UChar cs, lead = U16_LEAD(c), trail = U16_TRAIL(c); while ((cs = *s++) != 0) { if (cs == lead && *s == trail) { return (UChar*) (s - 1); } } return NULL; } else { /* not a Unicode code point, not findable */ return NULL; } }
void XMLTreeBuilder::processHTMLEntity(const AtomicXMLToken& token) { HTMLEntitySearch search; const AtomicString& name = token.name(); for (size_t i = 0; i < name.length(); ++i) { search.advance(name[i]); if (!search.isEntityPrefix()) { m_parser->stopParsing(); return; } } search.advance(';'); UChar32 entityValue = search.currentValue(); if (entityValue <= 0xFFFF) appendToText(reinterpret_cast<UChar*>(&entityValue), 1); else { UChar utf16Pair[2] = { U16_LEAD(entityValue), U16_TRAIL(entityValue) }; appendToText(utf16Pair, 2); } }
static UChar32 U_CALLCONV lenient8IteratorCurrent(UCharIterator *iter) { if(iter->reservedField!=0) { return U16_TRAIL(iter->reservedField); } else if(iter->start<iter->limit) { const uint8_t *s=(const uint8_t *)iter->context; UChar32 c; int32_t i=iter->start; L8_NEXT(s, i, iter->limit, c); if(c<0) { return 0xfffd; } else if(c<=0xffff) { return c; } else { return U16_LEAD(c); } } else { return U_SENTINEL; } }
static void TestSurrogate(){ static UChar32 s[] = {0x10000, 0x10ffff, 0x50000, 0x100000, 0x1abcd}; int i = 0; while (i < 5) { UChar first = UTF_FIRST_SURROGATE(s[i]); UChar second = UTF_SECOND_SURROGATE(s[i]); /* algorithm from the Unicode consortium */ UChar firstresult = (UChar)(((s[i] - 0x10000) / 0x400) + 0xD800); UChar secondresult = (UChar)(((s[i] - 0x10000) % 0x400) + 0xDC00); if (first != UTF16_LEAD(s[i]) || first != U16_LEAD(s[i]) || first != firstresult) { log_err("Failure in first surrogate in 0x%x expected to be 0x%x\n", s[i], firstresult); } if (second != UTF16_TRAIL(s[i]) || second != U16_TRAIL(s[i]) || second != secondresult) { log_err("Failure in second surrogate in 0x%x expected to be 0x%x\n", s[i], secondresult); } i ++; } }
String SVGFontData::createStringWithMirroredCharacters(const UChar* characters, unsigned length) const { StringBuilder mirroredCharacters; mirroredCharacters.reserveCapacity(length); UChar32 character; unsigned i = 0; while (i < length) { U16_NEXT(characters, i, length, character); character = mirroredChar(character); if (U16_LENGTH(character) == 1) mirroredCharacters.append(static_cast<UChar>(character)); else { mirroredCharacters.append(U16_LEAD(character)); mirroredCharacters.append(U16_TRAIL(character)); } } return mirroredCharacters.toString(); }
TEST(StringBuilderTest, Append) { StringBuilder builder; builder.append(String("0123456789")); expectBuilderContent("0123456789", builder); builder.append("abcd"); expectBuilderContent("0123456789abcd", builder); builder.append("efgh", 3); expectBuilderContent("0123456789abcdefg", builder); builder.append(""); expectBuilderContent("0123456789abcdefg", builder); builder.append('#'); expectBuilderContent("0123456789abcdefg#", builder); builder.toString(); // Test after reifyString(). StringBuilder builder1; builder.append("", 0); expectBuilderContent("0123456789abcdefg#", builder); builder1.append(builder.characters8(), builder.length()); builder1.append("XYZ"); builder.append(builder1.characters8(), builder1.length()); expectBuilderContent("0123456789abcdefg#0123456789abcdefg#XYZ", builder); StringBuilder builder2; builder2.reserveCapacity(100); builder2.append("xyz"); const LChar* characters = builder2.characters8(); builder2.append("0123456789"); EXPECT_EQ(characters, builder2.characters8()); // Test appending UChar32 characters to StringBuilder. StringBuilder builderForUChar32Append; UChar32 frakturAChar = 0x1D504; builderForUChar32Append.append(frakturAChar); // The fraktur A is not in the BMP, so it's two UTF-16 code units long. EXPECT_EQ(2U, builderForUChar32Append.length()); builderForUChar32Append.append(static_cast<UChar32>('A')); EXPECT_EQ(3U, builderForUChar32Append.length()); const UChar resultArray[] = { U16_LEAD(frakturAChar), U16_TRAIL(frakturAChar), 'A' }; expectBuilderContent(String(resultArray, WTF_ARRAY_LENGTH(resultArray)), builderForUChar32Append); }
ALWAYS_INLINE bool equalWithUTF8Internal(const CharType* a, const CharType* aEnd, const char* b, const char* bEnd) { while (b < bEnd) { if (isASCII(*b)) { if (*a++ != *b++) return false; continue; } int utf8SequenceLength = inlineUTF8SequenceLengthNonASCII(*b); if (bEnd - b < utf8SequenceLength) return false; if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(b), utf8SequenceLength)) return 0; UChar32 character = readUTF8Sequence(b, utf8SequenceLength); ASSERT(!isASCII(character)); if (U_IS_BMP(character)) { // UTF-16 surrogate values are illegal in UTF-32 if (U_IS_SURROGATE(character)) return false; if (*a++ != character) return false; } else if (U_IS_SUPPLEMENTARY(character)) { if (*a++ != U16_LEAD(character)) return false; if (*a++ != U16_TRAIL(character)) return false; } else { return false; } } return a == aEnd; }
UChar32 FCDUTF8CollationIterator::nextCodePoint(UErrorCode &errorCode) { UChar32 c; for(;;) { if(state == CHECK_FWD) { if(pos == length || ((c = u8[pos]) == 0 && length < 0)) { return U_SENTINEL; } if(c < 0x80) { ++pos; return c; } U8_NEXT_OR_FFFD(u8, pos, length, c); if(CollationFCD::hasTccc(c <= 0xffff ? c : U16_LEAD(c)) && (CollationFCD::maybeTibetanCompositeVowel(c) || (pos != length && nextHasLccc()))) { // c is not FCD-inert, therefore it is not U+FFFD and it has a valid byte sequence // and we can use U8_LENGTH() rather than a previous-position variable. pos -= U8_LENGTH(c); if(!nextSegment(errorCode)) { return U_SENTINEL; } continue; } return c; } else if(state == IN_FCD_SEGMENT && pos != limit) { U8_NEXT_OR_FFFD(u8, pos, length, c); return c; } else if(state == IN_NORMALIZED && pos != normalized.length()) { c = normalized.char32At(pos); pos += U16_LENGTH(c); return c; } else { switchToForward(); } } }
UChar32 FCDUTF8CollationIterator::previousCodePoint(UErrorCode &errorCode) { UChar32 c; for(;;) { if(state == CHECK_BWD) { if(pos == 0) { return U_SENTINEL; } if((c = u8[pos - 1]) < 0x80) { --pos; return c; } U8_PREV_OR_FFFD(u8, 0, pos, c); if(CollationFCD::hasLccc(c <= 0xffff ? c : U16_LEAD(c)) && (CollationFCD::maybeTibetanCompositeVowel(c) || (pos != 0 && previousHasTccc()))) { // c is not FCD-inert, therefore it is not U+FFFD and it has a valid byte sequence // and we can use U8_LENGTH() rather than a previous-position variable. pos += U8_LENGTH(c); if(!previousSegment(errorCode)) { return U_SENTINEL; } continue; } return c; } else if(state == IN_FCD_SEGMENT && pos != start) { U8_PREV_OR_FFFD(u8, 0, pos, c); return c; } else if(state >= IN_NORMALIZED && pos != 0) { c = normalized.char32At(pos - 1); pos -= U16_LENGTH(c); return c; } else { switchToBackward(); } } }
String XSSAuditor::decodeHTMLEntities(const String& string, bool leaveUndecodableEntitiesUntouched) { SegmentedString source(string); SegmentedString sourceShadow; Vector<UChar> result; while (!source.isEmpty()) { UChar cc = *source; source.advance(); if (cc != '&') { result.append(cc); continue; } if (leaveUndecodableEntitiesUntouched) sourceShadow = source; bool notEnoughCharacters = false; unsigned entity = PreloadScanner::consumeEntity(source, notEnoughCharacters); // We ignore notEnoughCharacters because we might as well use this loop // to copy the remaining characters into |result|. if (entity > 0xFFFF) { result.append(U16_LEAD(entity)); result.append(U16_TRAIL(entity)); } else if (entity && (!leaveUndecodableEntitiesUntouched || entity != 0xFFFD)){ result.append(entity); } else { result.append('&'); if (leaveUndecodableEntitiesUntouched) source = sourceShadow; } } return String::adopt(result); }
U_CAPI UChar* U_EXPORT2 u_memrchr32(const UChar* s, UChar32 c, int32_t count) { if ((uint32_t) c <= U_BMP_MAX) { /* find BMP code point */ return u_memrchr(s, (UChar) c, count); } else if (count < 2) { /* too short for a surrogate pair */ return NULL; } else if ((uint32_t) c <= UCHAR_MAX_VALUE) { /* find supplementary code point as surrogate pair */ const UChar* limit = s + count - 1; UChar lead = U16_LEAD(c), trail = U16_TRAIL(c); do { if (*limit == trail && *(limit - 1) == lead) { return (UChar*) (limit - 1); } } while (s != --limit); return NULL; } else { /* not a Unicode code point, not findable */ return NULL; } }
static UChar32 U_CALLCONV lenient8IteratorNext(UCharIterator *iter) { int32_t index; if(iter->reservedField!=0) { UChar trail=U16_TRAIL(iter->reservedField); iter->reservedField=0; if((index=iter->index)>=0) { iter->index=index+1; } return trail; } else if(iter->start<iter->limit) { const uint8_t *s=(const uint8_t *)iter->context; UChar32 c; L8_NEXT(s, iter->start, iter->limit, c); if((index=iter->index)>=0) { iter->index=++index; if(iter->length<0 && iter->start==iter->limit) { iter->length= c<=0xffff ? index : index+1; } } else if(iter->start==iter->limit && iter->length>=0) { iter->index= c<=0xffff ? iter->length : iter->length-1; } if(c<0) { return 0xfffd; } else if(c<=0xffff) { return c; } else { iter->reservedField=c; return U16_LEAD(c); } } else { return U_SENTINEL; } }