/* * Compare two strings as presented by UCharIterators. * Use code unit or code point order. * When the function returns, it is undefined where the iterators * have stopped. */ U_CAPI int32_t U_EXPORT2 u_strCompareIter(UCharIterator* iter1, UCharIterator* iter2, UBool codePointOrder) { UChar32 c1, c2; /* argument checking */ if (iter1 == NULL || iter2 == NULL) { return 0; /* bad arguments */ } if (iter1 == iter2) { return 0; /* identical iterators */ } /* reset iterators to start? */ iter1->move(iter1, 0, UITER_START); iter2->move(iter2, 0, UITER_START); /* compare identical prefixes - they do not need to be fixed up */ for (; ;) { c1 = iter1->next(iter1); c2 = iter2->next(iter2); if (c1 != c2) { break; } if (c1 == -1) { return 0; } } /* if both values are in or above the surrogate range, fix them up */ if (c1 >= 0xd800 && c2 >= 0xd800 && codePointOrder) { /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ if ( (c1 <= 0xdbff && UTF_IS_TRAIL(iter1->current(iter1))) || (UTF_IS_TRAIL(c1) && (iter1->previous(iter1), UTF_IS_LEAD(iter1->previous(iter1)))) ) { /* part of a surrogate pair, leave >=d800 */ } else { /* BMP code point - may be surrogate code point - make <d800 */ c1 -= 0x2800; } if ( (c2 <= 0xdbff && UTF_IS_TRAIL(iter2->current(iter2))) || (UTF_IS_TRAIL(c2) && (iter2->previous(iter2), UTF_IS_LEAD(iter2->previous(iter2)))) ) { /* part of a surrogate pair, leave >=d800 */ } else { /* BMP code point - may be surrogate code point - make <d800 */ c2 -= 0x2800; } } /* now c1 and c2 are in the requested (code unit or code point) order */ return (int32_t) c1 - (int32_t) c2; }
U_CAPI int32_t U_EXPORT2 u_countChar32(const UChar* s, int32_t length) { int32_t count; if (s == NULL || length < -1) { return 0; } count = 0; if (length >= 0) { while (length > 0) { ++count; if (UTF_IS_LEAD(*s) && length >= 2 && UTF_IS_TRAIL(*(s + 1))) { s += 2; length -= 2; } else { ++s; --length; } } } else /* length==-1 */ { UChar c; for (; ;) { if ((c = *s++) == 0) { break; } ++count; /* * sufficient to look ahead one because of UTF-16; * safe to look ahead one because at worst that would be the terminating NUL */ if (UTF_IS_LEAD(c) && UTF_IS_TRAIL(*s)) { ++s; } } } return count; }
U_CAPI int32_t U_EXPORT2 uprv_strCompare(const UChar* s1, int32_t length1, const UChar* s2, int32_t length2, UBool strncmpStyle, UBool codePointOrder) { const UChar* start1, * start2, * limit1, * limit2; UChar c1, c2; /* setup for fix-up */ start1 = s1; start2 = s2; /* compare identical prefixes - they do not need to be fixed up */ if (length1 < 0 && length2 < 0) { /* strcmp style, both NUL-terminated */ if (s1 == s2) { return 0; } for (; ;) { c1 = *s1; c2 = *s2; if (c1 != c2) { break; } if (c1 == 0) { return 0; } ++s1; ++s2; } /* setup for fix-up */ limit1 = limit2 = NULL; } else if (strncmpStyle) { /* special handling for strncmp, assume length1==length2>=0 but also check for NUL */ if (s1 == s2) { return 0; } limit1 = start1 + length1; for (; ;) { /* both lengths are same, check only one limit */ if (s1 == limit1) { return 0; } c1 = *s1; c2 = *s2; if (c1 != c2) { break; } if (c1 == 0) { return 0; } ++s1; ++s2; } /* setup for fix-up */ limit2 = start2 + length1; /* use length1 here, too, to enforce assumption */ } else { /* memcmp/UnicodeString style, both length-specified */ int32_t lengthResult; if (length1 < 0) { length1 = u_strlen(s1); } if (length2 < 0) { length2 = u_strlen(s2); } /* limit1=start1+min(lenght1, length2) */ if (length1 < length2) { lengthResult = -1; limit1 = start1 + length1; } else if (length1 == length2) { lengthResult = 0; limit1 = start1 + length1; } else /* length1>length2 */ { lengthResult = 1; limit1 = start1 + length2; } if (s1 == s2) { return lengthResult; } for (; ;) { /* check pseudo-limit */ if (s1 == limit1) { return lengthResult; } c1 = *s1; c2 = *s2; if (c1 != c2) { break; } ++s1; ++s2; } /* setup for fix-up */ limit1 = start1 + length1; limit2 = start2 + length2; } /* if both values are in or above the surrogate range, fix them up */ if (c1 >= 0xd800 && c2 >= 0xd800 && codePointOrder) { /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ if ( (c1 <= 0xdbff && (s1 + 1) != limit1 && UTF_IS_TRAIL(*(s1 + 1))) || (UTF_IS_TRAIL(c1) && start1 != s1 && UTF_IS_LEAD(*(s1 - 1))) ) { /* part of a surrogate pair, leave >=d800 */ } else { /* BMP code point - may be surrogate code point - make <d800 */ c1 -= 0x2800; } if ( (c2 <= 0xdbff && (s2 + 1) != limit2 && UTF_IS_TRAIL(*(s2 + 1))) || (UTF_IS_TRAIL(c2) && start2 != s2 && UTF_IS_LEAD(*(s2 - 1))) ) { /* part of a surrogate pair, leave >=d800 */ } else { /* BMP code point - may be surrogate code point - make <d800 */ c2 -= 0x2800; } } /* now c1 and c2 are in the requested (code unit or code point) order */ return (int32_t) c1 - (int32_t) c2; }
U_CAPI UChar32* U_EXPORT2 u_strToUTF32(UChar32 *dest, int32_t destCapacity, int32_t *pDestLength, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode) { const UChar* pSrc = src; const UChar* pSrcLimit; int32_t reqLength=0; uint32_t ch=0; uint32_t *pDest = (uint32_t *)dest; uint32_t *pDestLimit = pDest + destCapacity; UChar ch2=0; /* args check */ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ return NULL; } if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } if(srcLength==-1) { while((ch=*pSrc)!=0 && pDest!=pDestLimit) { ++pSrc; /*need not check for NUL because NUL fails UTF_IS_TRAIL() anyway*/ if(UTF_IS_LEAD(ch) && UTF_IS_TRAIL(ch2=*pSrc)) { ++pSrc; ch=UTF16_GET_PAIR_VALUE(ch, ch2); } *(pDest++)= ch; } while((ch=*pSrc++)!=0) { if(UTF_IS_LEAD(ch) && UTF_IS_TRAIL(ch2=*pSrc)) { ++pSrc; } ++reqLength; } } else { pSrcLimit = pSrc+srcLength; while(pSrc<pSrcLimit && pDest<pDestLimit) { ch=*pSrc++; if(UTF_IS_LEAD(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) { ++pSrc; ch=UTF16_GET_PAIR_VALUE(ch, ch2); } *(pDest++)= ch; } while(pSrc!=pSrcLimit) { ch=*pSrc++; if(UTF_IS_LEAD(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) { ++pSrc; } ++reqLength; } } reqLength+=(int32_t)(pDest - (uint32_t *)dest); if(pDestLength){ *pDestLength = reqLength; } /* Terminate the buffer */ u_terminateUChar32s(dest,destCapacity,reqLength,pErrorCode); return dest; }