/* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */ static U_INLINE int32_t appendResult(uint8_t *dest, int32_t destIndex, int32_t destCapacity, int32_t result, const UChar *s) { UChar32 c; int32_t length, destLength; UErrorCode errorCode; /* decode the result */ if(result<0) { /* (not) original code point */ c=~result; length=-1; } else if(result<=UCASE_MAX_STRING_LENGTH) { c=U_SENTINEL; length=result; } else { c=result; length=-1; } if(destIndex<destCapacity) { /* append the result */ if(length<0) { /* code point */ UBool isError=FALSE; U8_APPEND(dest, destIndex, destCapacity, c, isError); if(isError) { /* overflow, nothing written */ destIndex+=U8_LENGTH(c); } } else { /* string */ errorCode=U_ZERO_ERROR; u_strToUTF8( (char *)(dest+destIndex), destCapacity-destIndex, &destLength, s, length, &errorCode); destIndex+=destLength; /* we might have an overflow, but we know the actual length */ } } else { /* preflight */ if(length<0) { destIndex+=U8_LENGTH(c); } else { errorCode=U_ZERO_ERROR; u_strToUTF8( NULL, 0, &destLength, s, length, &errorCode); destIndex+=destLength; } } return destIndex; }
static void TestCharLength() { static const uint32_t codepoint[]={ 1, 0x0061, 1, 0x007f, 2, 0x016f, 2, 0x07ff, 3, 0x0865, 3, 0x20ac, 4, 0x20402, 4, 0x23456, 4, 0x24506, 4, 0x20402, 4, 0x10402, 3, 0xd7ff, 3, 0xe000, }; int16_t i; UBool multiple; for(i=0; i<sizeof(codepoint)/sizeof(codepoint[0]); i=(int16_t)(i+2)){ UChar32 c=codepoint[i+1]; if(UTF8_CHAR_LENGTH(c) != (uint16_t)codepoint[i] || U8_LENGTH(c) != (uint16_t)codepoint[i]){ log_err("The no: of code units for %lx:- Expected: %d Got: %d\n", c, codepoint[i], UTF8_CHAR_LENGTH(c)); }else{ log_verbose("The no: of code units for %lx is %d\n",c, UTF8_CHAR_LENGTH(c) ); } multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE); if(UTF8_NEED_MULTIPLE_UCHAR(c) != multiple){ log_err("ERROR: UTF8_NEED_MULTIPLE_UCHAR failed for %lx\n", c); } } }
UChar32 FCDUTF8CollationIterator::previousCodePoint(UErrorCode &errorCode) { UChar32 c; for(;;) { if(state == CHECK_BWD) { if(pos == 0) { return U_SENTINEL; } if((c = u8[pos - 1]) < 0x80) { --pos; return c; } U8_PREV_OR_FFFD(u8, 0, pos, c); if(CollationFCD::hasLccc(c <= 0xffff ? c : U16_LEAD(c)) && (CollationFCD::maybeTibetanCompositeVowel(c) || (pos != 0 && previousHasTccc()))) { // c is not FCD-inert, therefore it is not U+FFFD and it has a valid byte sequence // and we can use U8_LENGTH() rather than a previous-position variable. pos += U8_LENGTH(c); if(!previousSegment(errorCode)) { return U_SENTINEL; } continue; } return c; } else if(state == IN_FCD_SEGMENT && pos != start) { U8_PREV_OR_FFFD(u8, 0, pos, c); return c; } else if(state >= IN_NORMALIZED && pos != 0) { c = normalized.char32At(pos - 1); pos -= U16_LENGTH(c); return c; } else { switchToBackward(); } } }
UChar32 FCDUTF8CollationIterator::nextCodePoint(UErrorCode &errorCode) { UChar32 c; for(;;) { if(state == CHECK_FWD) { if(pos == length || ((c = u8[pos]) == 0 && length < 0)) { return U_SENTINEL; } if(c < 0x80) { ++pos; return c; } U8_NEXT_OR_FFFD(u8, pos, length, c); if(CollationFCD::hasTccc(c <= 0xffff ? c : U16_LEAD(c)) && (CollationFCD::maybeTibetanCompositeVowel(c) || (pos != length && nextHasLccc()))) { // c is not FCD-inert, therefore it is not U+FFFD and it has a valid byte sequence // and we can use U8_LENGTH() rather than a previous-position variable. pos -= U8_LENGTH(c); if(!nextSegment(errorCode)) { return U_SENTINEL; } continue; } return c; } else if(state == IN_FCD_SEGMENT && pos != limit) { U8_NEXT_OR_FFFD(u8, pos, length, c); return c; } else if(state == IN_NORMALIZED && pos != normalized.length()) { c = normalized.char32At(pos); pos += U16_LENGTH(c); return c; } else { switchToForward(); } } }