/* * Compare two strings as presented by UCharIterators. * Use code unit or code point order. * When the function returns, it is undefined where the iterators * have stopped. */ U_CAPI int32_t U_EXPORT2 u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder) { UChar32 c1, c2; /* argument checking */ if(iter1==NULL || iter2==NULL) { return 0; /* bad arguments */ } if(iter1==iter2) { return 0; /* identical iterators */ } /* reset iterators to start? */ iter1->move(iter1, 0, UITER_START); iter2->move(iter2, 0, UITER_START); /* compare identical prefixes - they do not need to be fixed up */ for(;;) { c1=iter1->next(iter1); c2=iter2->next(iter2); if(c1!=c2) { break; } if(c1==-1) { return 0; } } /* if both values are in or above the surrogate range, fix them up */ if(c1>=0xd800 && c2>=0xd800 && codePointOrder) { /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ if( (c1<=0xdbff && U16_IS_TRAIL(iter1->current(iter1))) || (U16_IS_TRAIL(c1) && (iter1->previous(iter1), U16_IS_LEAD(iter1->previous(iter1)))) ) { /* part of a surrogate pair, leave >=d800 */ } else { /* BMP code point - may be surrogate code point - make <d800 */ c1-=0x2800; } if( (c2<=0xdbff && U16_IS_TRAIL(iter2->current(iter2))) || (U16_IS_TRAIL(c2) && (iter2->previous(iter2), U16_IS_LEAD(iter2->previous(iter2)))) ) { /* part of a surrogate pair, leave >=d800 */ } else { /* BMP code point - may be surrogate code point - make <d800 */ c2-=0x2800; } } /* now c1 and c2 are in the requested (code unit or code point) order */ return (int32_t)c1-(int32_t)c2; }
/* * Test if a substring match inside a string is at code point boundaries. * All pointers refer to the same buffer. * The limit pointer may be NULL, all others must be real pointers. */ static U_INLINE UBool isMatchAtCPBoundary(const UChar* start, const UChar* match, const UChar* matchLimit, const UChar* limit) { if (U16_IS_TRAIL(*match) && start != match && U16_IS_LEAD(*(match - 1))) { /* the leading edge of the match is in the middle of a surrogate pair */ return FALSE; } if (U16_IS_LEAD(*(matchLimit - 1)) && match != limit && U16_IS_TRAIL(*matchLimit)) { /* the trailing edge of the match is in the middle of a surrogate pair */ return FALSE; } return TRUE; }
unsigned Font::expansionOpportunityCount(const UChar* characters, size_t length, TextDirection direction, bool& isAfterExpansion) { static bool expandAroundIdeographs = canExpandAroundIdeographsInComplexText(); unsigned count = 0; if (direction == LTR) { for (size_t i = 0; i < length; ++i) { UChar32 character = characters[i]; if (treatAsSpace(character)) { count++; isAfterExpansion = true; continue; } if (U16_IS_LEAD(character) && i + 1 < length && U16_IS_TRAIL(characters[i + 1])) { character = U16_GET_SUPPLEMENTARY(character, characters[i + 1]); i++; } if (expandAroundIdeographs && isCJKIdeographOrSymbol(character)) { if (!isAfterExpansion) count++; count++; isAfterExpansion = true; continue; } isAfterExpansion = false; } } else { for (size_t i = length; i > 0; --i) { UChar32 character = characters[i - 1]; if (treatAsSpace(character)) { count++; isAfterExpansion = true; continue; } if (U16_IS_TRAIL(character) && i > 1 && U16_IS_LEAD(characters[i - 2])) { character = U16_GET_SUPPLEMENTARY(characters[i - 2], character); i--; } if (expandAroundIdeographs && isCJKIdeographOrSymbol(character)) { if (!isAfterExpansion) count++; count++; isAfterExpansion = true; continue; } isAfterExpansion = false; } } return count; }
unsigned Character::expansionOpportunityCount(const UChar* characters, size_t length, TextDirection direction, bool& isAfterExpansion, const TextJustify textJustify) { unsigned count = 0; if (direction == LTR) { for (size_t i = 0; i < length; ++i) { UChar32 character = characters[i]; if (treatAsSpace(character)) { count++; isAfterExpansion = true; continue; } if (U16_IS_LEAD(character) && i + 1 < length && U16_IS_TRAIL(characters[i + 1])) { character = U16_GET_SUPPLEMENTARY(character, characters[i + 1]); i++; } if (textJustify == TextJustify::TextJustifyAuto && isCJKIdeographOrSymbol(character)) { if (!isAfterExpansion) count++; count++; isAfterExpansion = true; continue; } isAfterExpansion = false; } } else { for (size_t i = length; i > 0; --i) { UChar32 character = characters[i - 1]; if (treatAsSpace(character)) { count++; isAfterExpansion = true; continue; } if (U16_IS_TRAIL(character) && i > 1 && U16_IS_LEAD(characters[i - 2])) { character = U16_GET_SUPPLEMENTARY(characters[i - 2], character); i--; } if (textJustify == TextJustify::TextJustifyAuto && isCJKIdeographOrSymbol(character)) { if (!isAfterExpansion) count++; count++; isAfterExpansion = true; continue; } isAfterExpansion = false; } } return count; }
bool SimpleFontData::fillGlyphPage(GlyphPage* pageToFill, unsigned offset, unsigned length, UChar* buffer, unsigned bufferLength) const { if (U16_IS_LEAD(buffer[bufferLength-1])) { DLOG(ERROR) << "Last UTF-16 code unit is high-surrogate."; return false; } SkTypeface* typeface = platformData().typeface(); if (!typeface) { DLOG(ERROR) << "fillGlyphPage called on an empty Skia typeface."; return false; } SkAutoSTMalloc<GlyphPage::size, uint16_t> glyphStorage(length); uint16_t* glyphs = glyphStorage.get(); typeface->charsToGlyphs(buffer, SkTypeface::kUTF16_Encoding, glyphs, length); bool haveGlyphs = false; for (unsigned i = 0; i < length; i++) { if (glyphs[i]) { pageToFill->setGlyphDataForIndex(offset + i, glyphs[i], this); haveGlyphs = true; } } return haveGlyphs; }
static void TestCodeUnitValues() { static uint16_t codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0}; int16_t i; for(i=0; i<sizeof(codeunit)/sizeof(codeunit[0]); i++){ UChar c=codeunit[i]; log_verbose("Testing code unit value of %x\n", c); if(i<4){ if(!UTF16_IS_SINGLE(c) || UTF16_IS_LEAD(c) || UTF16_IS_TRAIL(c) || !U16_IS_SINGLE(c) || U16_IS_LEAD(c) || U16_IS_TRAIL(c)){ log_err("ERROR: %x is a single character\n", c); } } if(i >= 4 && i< 8){ if(!UTF16_IS_LEAD(c) || UTF16_IS_SINGLE(c) || UTF16_IS_TRAIL(c) || !U16_IS_LEAD(c) || U16_IS_SINGLE(c) || U16_IS_TRAIL(c)){ log_err("ERROR: %x is a first surrogate\n", c); } } if(i >= 8 && i< 12){ if(!UTF16_IS_TRAIL(c) || UTF16_IS_SINGLE(c) || UTF16_IS_LEAD(c) || !U16_IS_TRAIL(c) || U16_IS_SINGLE(c) || U16_IS_LEAD(c)){ log_err("ERROR: %x is a second surrogate\n", c); } } } }
bool SimpleFontData::fillGlyphPage(GlyphPage* pageToFill, unsigned offset, unsigned length, UChar* buffer, unsigned bufferLength) const { if (U16_IS_LEAD(buffer[bufferLength - 1])) { SkDebugf("%s last char is high-surrogate", __FUNCTION__); return false; } SkAutoSTMalloc<GlyphPage::size, uint16_t> glyphStorage(length); uint16_t* glyphs = glyphStorage.get(); SkTypeface* typeface = platformData().typeface(); typeface->charsToGlyphs(buffer, SkTypeface::kUTF16_Encoding, glyphs, length); bool haveGlyphs = false; for (unsigned i = 0; i < length; i++) { if (glyphs[i]) { pageToFill->setGlyphDataForIndex(offset + i, glyphs[i], this); haveGlyphs = true; } } return haveGlyphs; }
void MessagePattern::setParseError(UParseError *parseError, int32_t index) { if(parseError==NULL) { return; } parseError->offset=index; // Set preContext to some of msg before index. // Avoid splitting a surrogate pair. int32_t length=index; if(length>=U_PARSE_CONTEXT_LEN) { length=U_PARSE_CONTEXT_LEN-1; if(length>0 && U16_IS_TRAIL(msg[index-length])) { --length; } } msg.extract(index-length, length, parseError->preContext); parseError->preContext[length]=0; // Set postContext to some of msg starting at index. length=msg.length()-index; if(length>=U_PARSE_CONTEXT_LEN) { length=U_PARSE_CONTEXT_LEN-1; if(length>0 && U16_IS_LEAD(msg[index+length-1])) { --length; } } msg.extract(index, length, parseError->postContext); parseError->postContext[length]=0; }
UChar32 StringImpl::characterStartingAt(unsigned i) { if (U16_IS_SINGLE(m_data[i])) return m_data[i]; if (i + 1 < m_length && U16_IS_LEAD(m_data[i]) && U16_IS_TRAIL(m_data[i + 1])) return U16_GET_SUPPLEMENTARY(m_data[i], m_data[i + 1]); return 0; }
float ShapeResultSpacing::computeSpacing(const TextRun& run, size_t index, float& offset) { UChar32 character = run[index]; bool treatAsSpace = (Character::treatAsSpace(character) || (m_normalizeSpace && Character::isNormalizedCanvasSpaceCharacter(character))) && (character != '\t' || !m_allowTabs); if (treatAsSpace && character != noBreakSpaceCharacter) character = spaceCharacter; float spacing = 0; if (m_letterSpacing && !Character::treatAsZeroWidthSpace(character)) spacing += m_letterSpacing; if (treatAsSpace && (index || !isFirstRun(run) || character == noBreakSpaceCharacter)) spacing += m_wordSpacing; if (!hasExpansion()) return spacing; if (treatAsSpace) return spacing + nextExpansion(); if (run.is8Bit() || m_textJustify != TextJustify::TextJustifyAuto) return spacing; // isCJKIdeographOrSymbol() has expansion opportunities both before and // after each character. // http://www.w3.org/TR/jlreq/#line_adjustment if (U16_IS_LEAD(character) && index + 1 < run.length() && U16_IS_TRAIL(run[index + 1])) character = U16_GET_SUPPLEMENTARY(character, run[index + 1]); if (!Character::isCJKIdeographOrSymbol(character)) { m_isAfterExpansion = false; return spacing; } if (!m_isAfterExpansion) { // Take the expansion opportunity before this ideograph. float expandBefore = nextExpansion(); if (expandBefore) { offset += expandBefore; spacing += expandBefore; } if (!hasExpansion()) return spacing; } return spacing + nextExpansion(); }
U_CAPI int32_t U_EXPORT2 u_countChar32(const UChar *s, int32_t length) { int32_t count; if(s==NULL || length<-1) { return 0; } count=0; if(length>=0) { while(length>0) { ++count; if(U16_IS_LEAD(*s) && length>=2 && U16_IS_TRAIL(*(s+1))) { s+=2; length-=2; } else { ++s; --length; } } } else /* length==-1 */ { UChar c; for(;;) { if((c=*s++)==0) { break; } ++count; /* * sufficient to look ahead one because of UTF-16; * safe to look ahead one because at worst that would be the terminating NUL */ if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) { ++s; } } } return count; }
UChar32 FCDUIterCollationIterator::previousCodePoint(UErrorCode &errorCode) { UChar32 c; for(;;) { if(state == ITER_CHECK_BWD) { c = iter.previous(&iter); if(c < 0) { start = pos = 0; state = ITER_IN_FCD_SEGMENT; return U_SENTINEL; } if(CollationFCD::hasLccc(c)) { UChar32 prev = U_SENTINEL; if(CollationFCD::maybeTibetanCompositeVowel(c) || CollationFCD::hasTccc(prev = iter.previous(&iter))) { iter.next(&iter); if(prev >= 0) { iter.next(&iter); } if(!previousSegment(errorCode)) { return U_SENTINEL; } continue; } // hasLccc(trail)=true for all trail surrogates if(U16_IS_TRAIL(c)) { if(prev < 0) { prev = iter.previous(&iter); } if(U16_IS_LEAD(prev)) { return U16_GET_SUPPLEMENTARY(prev, c); } } if(prev >= 0) { iter.next(&iter); } } return c; } else if(state == ITER_IN_FCD_SEGMENT && pos != start) { c = uiter_previous32(&iter); pos -= U16_LENGTH(c); U_ASSERT(c >= 0); return c; } else if(state >= IN_NORM_ITER_AT_LIMIT && pos != 0) { c = normalized.char32At(pos - 1); pos -= U16_LENGTH(c); return c; } else { switchToBackward(); } } }
// // Check if a UTF16 string is valid according to the UTF16 standard // Specifically, check that we don't have any invalid surrogate pairs // If the string is valid, we return true. // If not, we set invalidIndex to the index of the first invalid char index // and return false // If the invalid char is a lead surrogate pair, we return its index // Otherwise, we treat the char before as the invalid one and return index - 1 // This function has defined behavior only for null-terminated strings. // If the string is not null terminated, the behavior is undefined (likely hang) // static bool IsUtf16StringValid(const UChar* str, size_t length, size_t* invalidIndex) { Assert(invalidIndex != nullptr); *invalidIndex = -1; size_t i = 0; for (;;) { // Iterate through the UTF16-LE string // If we are at the end of the null terminated string, return true // since the string is valid // If not, check if the codepoint we have is a surrogate code unit. // If it is, the string is malformed since U16_NEXT would have returned // is the full codepoint if both code units in the surrogate pair were present UChar32 c; U16_NEXT(str, i, length, c); if (c == 0) { return true; } if (U_IS_SURROGATE(c)) { if (U16_IS_LEAD(c)) { *invalidIndex = i; } else { Assert(i > 0); *invalidIndex = i - 1; } return false; } if (i >= length) { return true; } } }
UChar32 FCDUIterCollationIterator::nextCodePoint(UErrorCode &errorCode) { UChar32 c; for(;;) { if(state == ITER_CHECK_FWD) { c = iter.next(&iter); if(c < 0) { return c; } if(CollationFCD::hasTccc(c)) { if(CollationFCD::maybeTibetanCompositeVowel(c) || CollationFCD::hasLccc(iter.current(&iter))) { iter.previous(&iter); if(!nextSegment(errorCode)) { return U_SENTINEL; } continue; } } if(U16_IS_LEAD(c)) { UChar32 trail = iter.next(&iter); if(U16_IS_TRAIL(trail)) { return U16_GET_SUPPLEMENTARY(c, trail); } else if(trail >= 0) { iter.previous(&iter); } } return c; } else if(state == ITER_IN_FCD_SEGMENT && pos != limit) { c = uiter_next32(&iter); pos += U16_LENGTH(c); U_ASSERT(c >= 0); return c; } else if(state >= IN_NORM_ITER_AT_LIMIT && pos != normalized.length()) { c = normalized.char32At(pos); pos += U16_LENGTH(c); return c; } else { switchToForward(); } } }
String HTMLTextAreaElement::sanitizeUserInputValue(const String& proposedValue, unsigned maxLength) { unsigned submissionLength = 0; unsigned i = 0; for (; i < proposedValue.length(); ++i) { if (proposedValue[i] == '\r' && i + 1 < proposedValue.length() && proposedValue[i + 1] == '\n') continue; ++submissionLength; if (submissionLength == maxLength) { ++i; break; } if (submissionLength > maxLength) break; } if (i > 0 && U16_IS_LEAD(proposedValue[i - 1])) --i; return proposedValue.left(i); }
void CollationElementIterator::setOffset(int32_t newOffset, UErrorCode& status) { if (U_FAILURE(status)) { return; } if (0 < newOffset && newOffset < string_.length()) { int32_t offset = newOffset; do { UChar c = string_.charAt(offset); if (!rbc_->isUnsafe(c) || (U16_IS_LEAD(c) && !rbc_->isUnsafe(string_.char32At(offset)))) { break; } // Back up to before this unsafe character. --offset; } while (offset > 0); if (offset < newOffset) { // We might have backed up more than necessary. // For example, contractions "ch" and "cu" make both 'h' and 'u' unsafe, // but for text "chu" setOffset(2) should remain at 2 // although we initially back up to offset 0. // Find the last safe offset no greater than newOffset by iterating forward. int32_t lastSafeOffset = offset; do { iter_->resetToOffset(lastSafeOffset); do { iter_->nextCE(status); if (U_FAILURE(status)) { return; } } while ((offset = iter_->getOffset()) == lastSafeOffset); if (offset <= newOffset) { lastSafeOffset = offset; } } while (offset < newOffset); newOffset = lastSafeOffset; } } iter_->resetToOffset(newOffset); otherHalf_ = 0; dir_ = 1; }
/* get a UChar32 from the stream*/ U_CAPI int32_t U_EXPORT2 ucbuf_getc32(UCHARBUF* buf,UErrorCode* error){ int32_t retVal = (int32_t)U_EOF; if(error==NULL || U_FAILURE(*error)){ return FALSE; } if(buf->currentPos+1>=buf->bufLimit){ if(buf->remaining==0){ return U_EOF; } buf=ucbuf_fillucbuf(buf,error); if(U_FAILURE(*error)){ return U_EOF; } } if(U16_IS_LEAD(*(buf->currentPos))){ retVal=U16_GET_SUPPLEMENTARY(buf->currentPos[0],buf->currentPos[1]); buf->currentPos+=2; }else{ retVal = *(buf->currentPos++); } return retVal; }
static inline int nextBreakablePositionBreakAllInternal(LazyLineBreakIterator& lazyBreakIterator, const CharacterType* str, unsigned length, int pos) { int len = static_cast<int>(length); CharacterType lastLastCh = pos > 1 ? str[pos - 2] : static_cast<CharacterType>(lazyBreakIterator.secondToLastCharacter()); CharacterType lastCh = pos > 0 ? str[pos - 1] : static_cast<CharacterType>(lazyBreakIterator.lastCharacter()); bool lastIsLetterOrNumber = isUnicodeCategoryLetterOrNumber(lastLastCh, lastCh); for (int i = pos; i < len; ++i) { CharacterType ch = str[i]; if (isBreakableSpace(ch) || shouldBreakAfter(lastLastCh, lastCh, ch)) return i; if (!U16_IS_LEAD(ch)) { bool isLetterOrNumber = isUnicodeCategoryLetterOrNumber(lastCh, ch); if (isLetterOrNumber && lastIsLetterOrNumber) return i > pos && U16_IS_TRAIL(ch) ? i - 1 : i; lastIsLetterOrNumber = isLetterOrNumber; } lastLastCh = lastCh; lastCh = ch; } return len; }
/* internal function */ U_CFUNC int32_t u_strcmpFold(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode) { const UCaseProps *csp; /* current-level start/limit - s1/s2 as current */ const UChar *start1, *start2, *limit1, *limit2; /* case folding variables */ const UChar *p; int32_t length; /* stacks of previous-level start/current/limit */ CmpEquivLevel stack1[2], stack2[2]; /* case folding buffers, only use current-level start/limit */ UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1]; /* track which is the current level per string */ int32_t level1, level2; /* current code units, and code points for lookups */ UChar32 c1, c2, cp1, cp2; /* no argument error checking because this itself is not an API */ /* * assume that at least the option U_COMPARE_IGNORE_CASE is set * otherwise this function would have to behave exactly as uprv_strCompare() */ csp=ucase_getSingleton(); if(U_FAILURE(*pErrorCode)) { return 0; } /* initialize */ start1=s1; if(length1==-1) { limit1=NULL; } else { limit1=s1+length1; } start2=s2; if(length2==-1) { limit2=NULL; } else { limit2=s2+length2; } level1=level2=0; c1=c2=-1; /* comparison loop */ for(;;) { /* * here a code unit value of -1 means "get another code unit" * below it will mean "this source is finished" */ if(c1<0) { /* get next code unit from string 1, post-increment */ for(;;) { if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) { if(level1==0) { c1=-1; break; } } else { ++s1; break; } /* reached end of level buffer, pop one level */ do { --level1; start1=stack1[level1].start; } while(start1==NULL); s1=stack1[level1].s; limit1=stack1[level1].limit; } } if(c2<0) { /* get next code unit from string 2, post-increment */ for(;;) { if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) { if(level2==0) { c2=-1; break; } } else { ++s2; break; } /* reached end of level buffer, pop one level */ do { --level2; start2=stack2[level2].start; } while(start2==NULL); s2=stack2[level2].s; limit2=stack2[level2].limit; } } /* * compare c1 and c2 * either variable c1, c2 is -1 only if the corresponding string is finished */ if(c1==c2) { if(c1<0) { return 0; /* c1==c2==-1 indicating end of strings */ } c1=c2=-1; /* make us fetch new code units */ continue; } else if(c1<0) { return -1; /* string 1 ends before string 2 */ } else if(c2<0) { return 1; /* string 2 ends before string 1 */ } /* c1!=c2 && c1>=0 && c2>=0 */ /* get complete code points for c1, c2 for lookups if either is a surrogate */ cp1=c1; if(U_IS_SURROGATE(c1)) { UChar c; if(U_IS_SURROGATE_LEAD(c1)) { if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) { /* advance ++s1; only below if cp1 decomposes/case-folds */ cp1=U16_GET_SUPPLEMENTARY(c1, c); } } else /* isTrail(c1) */ { if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) { cp1=U16_GET_SUPPLEMENTARY(c, c1); } } } cp2=c2; if(U_IS_SURROGATE(c2)) { UChar c; if(U_IS_SURROGATE_LEAD(c2)) { if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) { /* advance ++s2; only below if cp2 decomposes/case-folds */ cp2=U16_GET_SUPPLEMENTARY(c2, c); } } else /* isTrail(c2) */ { if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) { cp2=U16_GET_SUPPLEMENTARY(c, c2); } } } /* * go down one level for each string * continue with the main loop as soon as there is a real change */ if( level1==0 && (length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0 ) { /* cp1 case-folds to the code point "length" or to p[length] */ if(U_IS_SURROGATE(c1)) { if(U_IS_SURROGATE_LEAD(c1)) { /* advance beyond source surrogate pair if it case-folds */ ++s1; } else /* isTrail(c1) */ { /* * we got a supplementary code point when hitting its trail surrogate, * therefore the lead surrogate must have been the same as in the other string; * compare this decomposition with the lead surrogate in the other string * remember that this simulates bulk text replacement: * the decomposition would replace the entire code point */ --s2; c2=*(s2-1); } } /* push current level pointers */ stack1[0].start=start1; stack1[0].s=s1; stack1[0].limit=limit1; ++level1; /* copy the folding result to fold1[] */ if(length<=UCASE_MAX_STRING_LENGTH) { u_memcpy(fold1, p, length); } else { int32_t i=0; U16_APPEND_UNSAFE(fold1, i, length); length=i; } /* set next level pointers to case folding */ start1=s1=fold1; limit1=fold1+length; /* get ready to read from decomposition, continue with loop */ c1=-1; continue; } if( level2==0 && (length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0 ) { /* cp2 case-folds to the code point "length" or to p[length] */ if(U_IS_SURROGATE(c2)) { if(U_IS_SURROGATE_LEAD(c2)) { /* advance beyond source surrogate pair if it case-folds */ ++s2; } else /* isTrail(c2) */ { /* * we got a supplementary code point when hitting its trail surrogate, * therefore the lead surrogate must have been the same as in the other string; * compare this decomposition with the lead surrogate in the other string * remember that this simulates bulk text replacement: * the decomposition would replace the entire code point */ --s1; c1=*(s1-1); } } /* push current level pointers */ stack2[0].start=start2; stack2[0].s=s2; stack2[0].limit=limit2; ++level2; /* copy the folding result to fold2[] */ if(length<=UCASE_MAX_STRING_LENGTH) { u_memcpy(fold2, p, length); } else { int32_t i=0; U16_APPEND_UNSAFE(fold2, i, length); length=i; } /* set next level pointers to case folding */ start2=s2=fold2; limit2=fold2+length; /* get ready to read from decomposition, continue with loop */ c2=-1; continue; } /* * no decomposition/case folding, max level for both sides: * return difference result * * code point order comparison must not just return cp1-cp2 * because when single surrogates are present then the surrogate pairs * that formed cp1 and cp2 may be from different string indexes * * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units * c1=d800 cp1=10001 c2=dc00 cp2=10000 * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 } * * therefore, use same fix-up as in ustring.c/uprv_strCompare() * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++ * so we have slightly different pointer/start/limit comparisons here */ if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) { /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ if( (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) || (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2))) ) { /* part of a surrogate pair, leave >=d800 */ } else { /* BMP code point - may be surrogate code point - make <d800 */ c1-=0x2800; } if( (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) || (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2))) ) { /* part of a surrogate pair, leave >=d800 */ } else { /* BMP code point - may be surrogate code point - make <d800 */ c2-=0x2800; } } return c1-c2; } }
void SVGTextLayoutAttributesBuilder::propagateLayoutAttributes(RenderObject* start, Vector<SVGTextLayoutAttributes>& allAttributes, unsigned& atCharacter, UChar& lastCharacter) const { for (RenderObject* child = start->firstChild(); child; child = child->nextSibling()) { if (child->isSVGInlineText()) { RenderSVGInlineText* text = toRenderSVGInlineText(child); const UChar* characters = text->characters(); unsigned textLength = text->textLength(); bool preserveWhiteSpace = shouldPreserveAllWhiteSpace(text->style()); SVGTextLayoutAttributes attributes(text); attributes.reserveCapacity(textLength); unsigned valueListPosition = atCharacter; unsigned metricsLength = 1; SVGTextMetrics lastMetrics(SVGTextMetrics::SkippedSpaceMetrics); for (unsigned textPosition = 0; textPosition < textLength; textPosition += metricsLength) { const UChar& currentCharacter = characters[textPosition]; SVGTextMetrics startToCurrentMetrics; SVGTextMetrics currentMetrics; unsigned valueListAdvance = 0; if (U16_IS_LEAD(currentCharacter) && (textPosition + 1) < textLength && U16_IS_TRAIL(characters[textPosition + 1])) { // Handle surrogate pairs. startToCurrentMetrics = SVGTextMetrics::measureCharacterRange(text, 0, textPosition + 2); currentMetrics = SVGTextMetrics::measureCharacterRange(text, textPosition, 2); metricsLength = currentMetrics.length(); valueListAdvance = 1; } else { // Handle BMP characters. startToCurrentMetrics = SVGTextMetrics::measureCharacterRange(text, 0, textPosition + 1); currentMetrics = SVGTextMetrics::measureCharacterRange(text, textPosition, 1); metricsLength = currentMetrics.length(); valueListAdvance = metricsLength; } if (!metricsLength) break; // Frequent case for Arabic text: when measuring a single character the arabic isolated form is taken // when rendering the glyph "in context" (with it's surrounding characters) it changes due to shaping. // So whenever runWidthAdvance != currentMetrics.width(), we are processing a text run whose length is // not equal to the sum of the individual lengths of the glyphs, when measuring them isolated. float runWidthAdvance = startToCurrentMetrics.width() - lastMetrics.width(); if (runWidthAdvance != currentMetrics.width()) currentMetrics.setWidth(runWidthAdvance); lastMetrics = startToCurrentMetrics; if (!preserveWhiteSpace && characterIsSpace(currentCharacter) && characterIsSpaceOrNull(lastCharacter)) { attributes.positioningLists().appendEmptyValues(); attributes.textMetricsValues().append(SVGTextMetrics(SVGTextMetrics::SkippedSpaceMetrics)); continue; } SVGTextLayoutAttributes::PositioningLists& positioningLists = attributes.positioningLists(); positioningLists.appendValuesFromPosition(m_positioningLists, valueListPosition); attributes.textMetricsValues().append(currentMetrics); // Pad x/y/dx/dy/rotate value lists with empty values, if the metrics span more than one character. if (metricsLength > 1) { for (unsigned i = 0; i < metricsLength - 1; ++i) positioningLists.appendEmptyValues(); } lastCharacter = currentCharacter; valueListPosition += valueListAdvance; } #if DUMP_TEXT_LAYOUT_ATTRIBUTES > 0 fprintf(stderr, "\nDumping layout attributes for RenderSVGInlineText, renderer=%p, node=%p (atCharacter: %i)\n", text, text->node(), atCharacter); fprintf(stderr, "BiDi properties: unicode-bidi=%i, block direction=%i\n", text->style()->unicodeBidi(), text->style()->direction()); attributes.dump(); #endif text->storeLayoutAttributes(attributes); allAttributes.append(attributes); atCharacter = valueListPosition; continue; } if (!child->isSVGInline()) continue; propagateLayoutAttributes(child, allAttributes, atCharacter, lastCharacter); } }
bool characterStartsSurrogatePair(unsigned textPosition) const { return U16_IS_LEAD(m_run[textPosition]) && textPosition + 1 < textLength() && U16_IS_TRAIL(m_run[textPosition + 1]); }
U_CAPI UBool U_EXPORT2 u_strHasMoreChar32Than(const UChar* s, int32_t length, int32_t number) { if (number < 0) { return TRUE; } if (s == NULL || length < -1) { return FALSE; } if (length == -1) { /* s is NUL-terminated */ UChar c; /* count code points until they exceed */ for (; ;) { if ((c = *s++) == 0) { return FALSE; } if (number == 0) { return TRUE; } if (U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) { ++s; } --number; } } else { /* length>=0 known */ const UChar* limit; int32_t maxSupplementary; /* s contains at least (length+1)/2 code points: <=2 UChars per cp */ if (((length + 1) / 2) > number) { return TRUE; } /* check if s does not even contain enough UChars */ maxSupplementary = length - number; if (maxSupplementary <= 0) { return FALSE; } /* there are maxSupplementary=length-number more UChars than asked-for code points */ /* * count code points until they exceed and also check that there are * no more than maxSupplementary supplementary code points (UChar pairs) */ limit = s + length; for (; ;) { if (s == limit) { return FALSE; } if (number == 0) { return TRUE; } if (U16_IS_LEAD(*s++) && s != limit && U16_IS_TRAIL(*s)) { ++s; if (--maxSupplementary <= 0) { /* too many pairs - too few code points */ return FALSE; } } --number; } } }
static inline bool isUnicodeCategoryLetterOrNumber(UChar lastCh, UChar ch) { UChar32 ch32 = U16_IS_LEAD(lastCh) && U16_IS_TRAIL(ch) ? U16_GET_SUPPLEMENTARY(lastCh, ch) : ch; return (U_MASK(u_charType(ch32)) & (U_GC_L_MASK | U_GC_N_MASK)); }
String HTMLTextAreaElement::sanitizeUserInputValue(const String& proposedValue, unsigned maxLength) { if (maxLength > 0 && U16_IS_LEAD(proposedValue[maxLength - 1])) --maxLength; return proposedValue.left(maxLength); }
U_CFUNC int32_t u_strToPunycode(const UChar *src, int32_t srcLength, UChar *dest, int32_t destCapacity, const UBool *caseFlags, UErrorCode *pErrorCode) { int32_t cpBuffer[MAX_CP_COUNT]; int32_t n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount; UChar c, c2; /* argument checking */ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } /* * Handle the basic code points and * convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit): */ srcCPCount=destLength=0; if(srcLength==-1) { /* NUL-terminated input */ for(j=0; /* no condition */; ++j) { if((c=src[j])==0) { break; } if(srcCPCount==MAX_CP_COUNT) { /* too many input code points */ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } if(IS_BASIC(c)) { cpBuffer[srcCPCount++]=0; if(destLength<destCapacity) { dest[destLength]= caseFlags!=NULL ? asciiCaseMap((char)c, caseFlags[j]) : (char)c; } ++destLength; } else { n=(caseFlags!=NULL && caseFlags[j])<<31L; if(U16_IS_SINGLE(c)) { n|=c; } else if(U16_IS_LEAD(c) && U16_IS_TRAIL(c2=src[j+1])) { ++j; n|=(int32_t)U16_GET_SUPPLEMENTARY(c, c2); } else { /* error: unmatched surrogate */ *pErrorCode=U_INVALID_CHAR_FOUND; return 0; } cpBuffer[srcCPCount++]=n; } } } else { /* length-specified input */ for(j=0; j<srcLength; ++j) { if(srcCPCount==MAX_CP_COUNT) { /* too many input code points */ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } c=src[j]; if(IS_BASIC(c)) { cpBuffer[srcCPCount++]=0; if(destLength<destCapacity) { dest[destLength]= caseFlags!=NULL ? asciiCaseMap((char)c, caseFlags[j]) : (char)c; } ++destLength; } else { n=(caseFlags!=NULL && caseFlags[j])<<31L; if(U16_IS_SINGLE(c)) { n|=c; } else if(U16_IS_LEAD(c) && (j+1)<srcLength && U16_IS_TRAIL(c2=src[j+1])) { ++j; n|=(int32_t)U16_GET_SUPPLEMENTARY(c, c2); } else { /* error: unmatched surrogate */ *pErrorCode=U_INVALID_CHAR_FOUND; return 0; } cpBuffer[srcCPCount++]=n; } } } /* Finish the basic string - if it is not empty - with a delimiter. */ basicLength=destLength; if(basicLength>0) { if(destLength<destCapacity) { dest[destLength]=DELIMITER; } ++destLength; } /* * handledCPCount is the number of code points that have been handled * basicLength is the number of basic code points * destLength is the number of chars that have been output */ /* Initialize the state: */ n=INITIAL_N; delta=0; bias=INITIAL_BIAS; /* Main encoding loop: */ for(handledCPCount=basicLength; handledCPCount<srcCPCount; /* no op */) { /* * All non-basic code points < n have been handled already. * Find the next larger one: */ for(m=0x7fffffff, j=0; j<srcCPCount; ++j) { q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */ if(n<=q && q<m) { m=q; } } /* * Increase delta enough to advance the decoder's * <n,i> state to <m,0>, but guard against overflow: */ if(m-n>(0x7fffffff-MAX_CP_COUNT-delta)/(handledCPCount+1)) { *pErrorCode=U_INTERNAL_PROGRAM_ERROR; return 0; } delta+=(m-n)*(handledCPCount+1); n=m; /* Encode a sequence of same code points n */ for(j=0; j<srcCPCount; ++j) { q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */ if(q<n) { ++delta; } else if(q==n) { /* Represent delta as a generalized variable-length integer: */ for(q=delta, k=BASE; /* no condition */; k+=BASE) { /** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt t=k-bias; if(t<TMIN) { t=TMIN; } else if(t>TMAX) { t=TMAX; } */ t=k-bias; if(t<TMIN) { t=TMIN; } else if(k>=(bias+TMAX)) { t=TMAX; } if(q<t) { break; } if(destLength<destCapacity) { dest[destLength]=digitToBasic(t+(q-t)%(BASE-t), 0); } ++destLength; q=(q-t)/(BASE-t); } if(destLength<destCapacity) { dest[destLength]=digitToBasic(q, (UBool)(cpBuffer[j]<0)); } ++destLength; bias=adaptBias(delta, handledCPCount+1, (UBool)(handledCPCount==basicLength)); delta=0; ++handledCPCount; } } ++delta; ++n; } return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); }
inline bool SVGTextMetricsBuilder::currentCharacterStartsSurrogatePair() const { return U16_IS_LEAD(m_run[m_textPosition]) && int(m_textPosition + 1) < m_run.charactersLength() && U16_IS_TRAIL(m_run[m_textPosition + 1]); }
static int generateComponents(TextRunComponents* components, const Font &font, const TextRun &run) { int letterSpacing = font.letterSpacing(); int wordSpacing = font.wordSpacing(); int padding = run.expansion(); int numSpaces = 0; if (padding) { for (int i = 0; i < run.length(); i++) if (Font::treatAsSpace(run[i])) ++numSpaces; } int offset = 0; if (letterSpacing) { // need to draw every letter on it's own int start = 0; if (Font::treatAsSpace(run[0])) { int add = 0; if (numSpaces) { add = padding/numSpaces; padding -= add; --numSpaces; } components->append(TextRunComponent(1, font, offset)); offset += add + letterSpacing + components->last().m_width; start = 1; } for (int i = 1; i < run.length(); ++i) { UChar ch = run[i]; if (U16_IS_LEAD(ch) && U16_IS_TRAIL(run[i-1])) ch = U16_GET_SUPPLEMENTARY(ch, run[i-1]); if (U16_IS_TRAIL(ch) || U_GET_GC_MASK(ch) & U_GC_MN_MASK) continue; if (Font::treatAsSpace(run[i])) { int add = 0; if (i - start > 0) { components->append(TextRunComponent(run.characters16() + start, i - start, run, font, offset)); offset += components->last().m_width + letterSpacing; } if (numSpaces) { add = padding/numSpaces; padding -= add; --numSpaces; } components->append(TextRunComponent(1, font, offset)); offset += wordSpacing + add + components->last().m_width + letterSpacing; start = i + 1; continue; } if (i - start > 0) { components->append(TextRunComponent(run.characters16() + start, i - start, run, font, offset)); offset += components->last().m_width + letterSpacing; } start = i; } if (run.length() - start > 0) { components->append(TextRunComponent(run.characters16() + start, run.length() - start, run, font, offset)); offset += components->last().m_width; } offset += letterSpacing; } else { int start = 0; for (int i = 0; i < run.length(); ++i) { if (Font::treatAsSpace(run[i])) { if (i - start > 0) { components->append(TextRunComponent(run.characters16() + start, i - start, run, font, offset)); offset += components->last().m_width; } int add = 0; if (numSpaces) { add = padding/numSpaces; padding -= add; --numSpaces; } components->append(TextRunComponent(1, font, offset)); offset += add + components->last().m_width; if (i) offset += wordSpacing; start = i + 1; } } if (run.length() - start > 0) { components->append(TextRunComponent(run.characters16() + start, run.length() - start, run, font, offset)); offset += components->last().m_width; } } return offset; }
U_CFUNC int32_t U_EXPORT2 uprv_strCompare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, UBool strncmpStyle, UBool codePointOrder) { const UChar *start1, *start2, *limit1, *limit2; UChar c1, c2; /* setup for fix-up */ start1=s1; start2=s2; /* compare identical prefixes - they do not need to be fixed up */ if(length1<0 && length2<0) { /* strcmp style, both NUL-terminated */ if(s1==s2) { return 0; } for(;;) { c1=*s1; c2=*s2; if(c1!=c2) { break; } if(c1==0) { return 0; } ++s1; ++s2; } /* setup for fix-up */ limit1=limit2=NULL; } else if(strncmpStyle) { /* special handling for strncmp, assume length1==length2>=0 but also check for NUL */ if(s1==s2) { return 0; } limit1=start1+length1; for(;;) { /* both lengths are same, check only one limit */ if(s1==limit1) { return 0; } c1=*s1; c2=*s2; if(c1!=c2) { break; } if(c1==0) { return 0; } ++s1; ++s2; } /* setup for fix-up */ limit2=start2+length1; /* use length1 here, too, to enforce assumption */ } else { /* memcmp/UnicodeString style, both length-specified */ int32_t lengthResult; if(length1<0) { length1=u_strlen(s1); } if(length2<0) { length2=u_strlen(s2); } /* limit1=start1+min(lenght1, length2) */ if(length1<length2) { lengthResult=-1; limit1=start1+length1; } else if(length1==length2) { lengthResult=0; limit1=start1+length1; } else /* length1>length2 */ { lengthResult=1; limit1=start1+length2; } if(s1==s2) { return lengthResult; } for(;;) { /* check pseudo-limit */ if(s1==limit1) { return lengthResult; } c1=*s1; c2=*s2; if(c1!=c2) { break; } ++s1; ++s2; } /* setup for fix-up */ limit1=start1+length1; limit2=start2+length2; } /* if both values are in or above the surrogate range, fix them up */ if(c1>=0xd800 && c2>=0xd800 && codePointOrder) { /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ if( (c1<=0xdbff && (s1+1)!=limit1 && U16_IS_TRAIL(*(s1+1))) || (U16_IS_TRAIL(c1) && start1!=s1 && U16_IS_LEAD(*(s1-1))) ) { /* part of a surrogate pair, leave >=d800 */ } else { /* BMP code point - may be surrogate code point - make <d800 */ c1-=0x2800; } if( (c2<=0xdbff && (s2+1)!=limit2 && U16_IS_TRAIL(*(s2+1))) || (U16_IS_TRAIL(c2) && start2!=s2 && U16_IS_LEAD(*(s2-1))) ) { /* part of a surrogate pair, leave >=d800 */ } else { /* BMP code point - may be surrogate code point - make <d800 */ c2-=0x2800; } } /* now c1 and c2 are in the requested (code unit or code point) order */ return (int32_t)c1-(int32_t)c2; }
static void U_CALLCONV _Bocu1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, UErrorCode *pErrorCode) { UConverter *cnv; const UChar *source, *sourceLimit; uint8_t *target; int32_t targetCapacity; int32_t *offsets; int32_t prev, c, diff; int32_t sourceIndex, nextSourceIndex; /* set up the local pointers */ cnv=pArgs->converter; source=pArgs->source; sourceLimit=pArgs->sourceLimit; target=(uint8_t *)pArgs->target; targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); offsets=pArgs->offsets; /* get the converter state from UConverter */ c=cnv->fromUChar32; prev=(int32_t)cnv->fromUnicodeStatus; if(prev==0) { prev=BOCU1_ASCII_PREV; } /* sourceIndex=-1 if the current character began in the previous buffer */ sourceIndex= c==0 ? 0 : -1; nextSourceIndex=0; /* conversion loop */ if(c!=0 && targetCapacity>0) { goto getTrail; } fastSingle: /* fast loop for single-byte differences */ /* use only one loop counter variable, targetCapacity, not also source */ diff=(int32_t)(sourceLimit-source); if(targetCapacity>diff) { targetCapacity=diff; } while(targetCapacity>0 && (c=*source)<0x3000) { if(c<=0x20) { if(c!=0x20) { prev=BOCU1_ASCII_PREV; } *target++=(uint8_t)c; *offsets++=nextSourceIndex++; ++source; --targetCapacity; } else { diff=c-prev; if(DIFF_IS_SINGLE(diff)) { prev=BOCU1_SIMPLE_PREV(c); *target++=(uint8_t)PACK_SINGLE_DIFF(diff); *offsets++=nextSourceIndex++; ++source; --targetCapacity; } else { break; } } } /* restore real values */ targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target); sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */ /* regular loop for all cases */ while(source<sourceLimit) { if(targetCapacity>0) { c=*source++; ++nextSourceIndex; if(c<=0x20) { /* * ISO C0 control & space: * Encode directly for MIME compatibility, * and reset state except for space, to not disrupt compression. */ if(c!=0x20) { prev=BOCU1_ASCII_PREV; } *target++=(uint8_t)c; *offsets++=sourceIndex; --targetCapacity; sourceIndex=nextSourceIndex; continue; } if(U16_IS_LEAD(c)) { getTrail: if(source<sourceLimit) { /* test the following code unit */ UChar trail=*source; if(U16_IS_TRAIL(trail)) { ++source; ++nextSourceIndex; c=U16_GET_SUPPLEMENTARY(c, trail); } } else { /* no more input */ c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */ break; } } /* * all other Unicode code points c==U+0021..U+10ffff * are encoded with the difference c-prev * * a new prev is computed from c, * placed in the middle of a 0x80-block (for most small scripts) or * in the middle of the Unihan and Hangul blocks * to statistically minimize the following difference */ diff=c-prev; prev=BOCU1_PREV(c); if(DIFF_IS_SINGLE(diff)) { *target++=(uint8_t)PACK_SINGLE_DIFF(diff); *offsets++=sourceIndex; --targetCapacity; sourceIndex=nextSourceIndex; if(c<0x3000) { goto fastSingle; } } else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) { /* optimize 2-byte case */ int32_t m; if(diff>=0) { diff-=BOCU1_REACH_POS_1+1; m=diff%BOCU1_TRAIL_COUNT; diff/=BOCU1_TRAIL_COUNT; diff+=BOCU1_START_POS_2; } else { diff-=BOCU1_REACH_NEG_1; NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); diff+=BOCU1_START_NEG_2; } *target++=(uint8_t)diff; *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m); *offsets++=sourceIndex; *offsets++=sourceIndex; targetCapacity-=2; sourceIndex=nextSourceIndex; } else { int32_t length; /* will be 2..4 */ diff=packDiff(diff); length=BOCU1_LENGTH_FROM_PACKED(diff); /* write the output character bytes from diff and length */ /* from the first if in the loop we know that targetCapacity>0 */ if(length<=targetCapacity) { switch(length) { /* each branch falls through to the next one */ case 4: *target++=(uint8_t)(diff>>24); *offsets++=sourceIndex; U_FALLTHROUGH; case 3: *target++=(uint8_t)(diff>>16); *offsets++=sourceIndex; U_FALLTHROUGH; case 2: *target++=(uint8_t)(diff>>8); *offsets++=sourceIndex; /* case 1: handled above */ *target++=(uint8_t)diff; *offsets++=sourceIndex; U_FALLTHROUGH; default: /* will never occur */ break; } targetCapacity-=length; sourceIndex=nextSourceIndex; } else { uint8_t *charErrorBuffer; /* * We actually do this backwards here: * In order to save an intermediate variable, we output * first to the overflow buffer what does not fit into the * regular target. */ /* we know that 1<=targetCapacity<length<=4 */ length-=targetCapacity; charErrorBuffer=(uint8_t *)cnv->charErrorBuffer; switch(length) { /* each branch falls through to the next one */ case 3: *charErrorBuffer++=(uint8_t)(diff>>16); U_FALLTHROUGH; case 2: *charErrorBuffer++=(uint8_t)(diff>>8); U_FALLTHROUGH; case 1: *charErrorBuffer=(uint8_t)diff; U_FALLTHROUGH; default: /* will never occur */ break; } cnv->charErrorBufferLength=(int8_t)length; /* now output what fits into the regular target */ diff>>=8*length; /* length was reduced by targetCapacity */ switch(targetCapacity) { /* each branch falls through to the next one */ case 3: *target++=(uint8_t)(diff>>16); *offsets++=sourceIndex; U_FALLTHROUGH; case 2: *target++=(uint8_t)(diff>>8); *offsets++=sourceIndex; U_FALLTHROUGH; case 1: *target++=(uint8_t)diff; *offsets++=sourceIndex; U_FALLTHROUGH; default: /* will never occur */ break; } /* target overflow */ targetCapacity=0; *pErrorCode=U_BUFFER_OVERFLOW_ERROR; break; } } } else {
/* Parse a single escape sequence. Although this method deals in * UChars, it does not use C++ or UnicodeString. This allows it to * be used from C contexts. */ U_CAPI UChar32 U_EXPORT2 u_unescapeAt(UNESCAPE_CHAR_AT charAt, int32_t* offset, int32_t length, void* context) { int32_t start = *offset; UChar c; UChar32 result = 0; int8_t n = 0; int8_t minDig = 0; int8_t maxDig = 0; int8_t bitsPerDigit = 4; int8_t dig; int32_t i; UBool braces = FALSE; /* Check that offset is in range */ if (*offset < 0 || *offset >= length) { goto err; } /* Fetch first UChar after '\\' */ c = charAt((*offset)++, context); /* Convert hexadecimal and octal escapes */ switch (c) { case 0x0075 /*'u'*/: minDig = maxDig = 4; break; case 0x0055 /*'U'*/: minDig = maxDig = 8; break; case 0x0078 /*'x'*/: minDig = 1; if (*offset < length && charAt(*offset, context) == 0x7B /*{*/) { ++(*offset); braces = TRUE; maxDig = 8; } else { maxDig = 2; } break; default: dig = _digit8(c); if (dig >= 0) { minDig = 1; maxDig = 3; n = 1; /* Already have first octal digit */ bitsPerDigit = 3; result = dig; } break; } if (minDig != 0) { while (*offset < length && n < maxDig) { c = charAt(*offset, context); dig = (int8_t) ((bitsPerDigit == 3) ? _digit8(c) : _digit16(c)); if (dig < 0) { break; } result = (result << bitsPerDigit) | dig; ++(*offset); ++n; } if (n < minDig) { goto err; } if (braces) { if (c != 0x7D /*}*/) { goto err; } ++(*offset); } if (result < 0 || result >= 0x110000) { goto err; } /* If an escape sequence specifies a lead surrogate, see if * there is a trail surrogate after it, either as an escape or * as a literal. If so, join them up into a supplementary. */ if (*offset < length && U16_IS_LEAD(result)) { int32_t ahead = *offset + 1; c = charAt(*offset, context); if (c == 0x5C /*'\\'*/ && ahead < length) { c = (UChar) u_unescapeAt(charAt, &ahead, length, context); } if (U16_IS_TRAIL(c)) { *offset = ahead; result = U16_GET_SUPPLEMENTARY(result, c); } } return result; } /* Convert C-style escapes in table */ for (i = 0; i < UNESCAPE_MAP_LENGTH; i += 2) { if (c == UNESCAPE_MAP[i]) { return UNESCAPE_MAP[i + 1]; } else if (c < UNESCAPE_MAP[i]) { break; } } /* Map \cX to control-X: X & 0x1F */ if (c == 0x0063 /*'c'*/ && *offset < length) { c = charAt((*offset)++, context); if (UTF_IS_FIRST_SURROGATE(c) && *offset < length) { UChar c2 = charAt(*offset, context); if (UTF_IS_SECOND_SURROGATE(c2)) { ++(*offset); c = (UChar) UTF16_GET_PAIR_VALUE(c, c2); /* [sic] */ } } return 0x1F & c; } /* If no special forms are recognized, then consider * the backslash to generically escape the next character. * Deal with surrogate pairs. */ if (UTF_IS_FIRST_SURROGATE(c) && *offset < length) { UChar c2 = charAt(*offset, context); if (UTF_IS_SECOND_SURROGATE(c2)) { ++(*offset); return UTF16_GET_PAIR_VALUE(c, c2); } } return c; err: /* Invalid escape sequence */ *offset = start; /* Reset to initial value */ return (UChar32) 0xFFFFFFFF; }