示例#1
0
/*
 * Compare two strings as presented by UCharIterators.
 * Use code unit or code point order.
 * When the function returns, it is undefined where the iterators
 * have stopped.
 */
U_CAPI int32_t U_EXPORT2
u_strCompareIter(UCharIterator* iter1, UCharIterator* iter2, UBool codePointOrder) {
    UChar32 c1, c2;

    /* argument checking */
    if (iter1 == NULL || iter2 == NULL) {
        return 0; /* bad arguments */
    }
    if (iter1 == iter2) {
        return 0; /* identical iterators */
    }

    /* reset iterators to start? */
    iter1->move(iter1, 0, UITER_START);
    iter2->move(iter2, 0, UITER_START);

    /* compare identical prefixes - they do not need to be fixed up */
    for (; ;) {
        c1 = iter1->next(iter1);
        c2 = iter2->next(iter2);
        if (c1 != c2) {
            break;
        }
        if (c1 == -1) {
            return 0;
        }
    }

    /* if both values are in or above the surrogate range, fix them up */
    if (c1 >= 0xd800 && c2 >= 0xd800 && codePointOrder) {
        /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
        if (
        (c1 <= 0xdbff && UTF_IS_TRAIL(iter1->current(iter1))) ||
        (UTF_IS_TRAIL(c1) && (iter1->previous(iter1), UTF_IS_LEAD(iter1->previous(iter1))))
        ) {
            /* part of a surrogate pair, leave >=d800 */
        } else {
            /* BMP code point - may be surrogate code point - make <d800 */
            c1 -= 0x2800;
        }

        if (
        (c2 <= 0xdbff && UTF_IS_TRAIL(iter2->current(iter2))) ||
        (UTF_IS_TRAIL(c2) && (iter2->previous(iter2), UTF_IS_LEAD(iter2->previous(iter2))))
        ) {
            /* part of a surrogate pair, leave >=d800 */
        } else {
            /* BMP code point - may be surrogate code point - make <d800 */
            c2 -= 0x2800;
        }
    }

    /* now c1 and c2 are in the requested (code unit or code point) order */
    return (int32_t) c1 - (int32_t) c2;
}
示例#2
0
U_CAPI int32_t U_EXPORT2
u_countChar32(const UChar* s, int32_t length) {
    int32_t count;

    if (s == NULL || length < -1) {
        return 0;
    }

    count = 0;
    if (length >= 0) {
        while (length > 0) {
            ++count;
            if (UTF_IS_LEAD(*s) && length >= 2 && UTF_IS_TRAIL(*(s + 1))) {
                s += 2;
                length -= 2;
            } else {
                ++s;
                --length;
            }
        }
    } else /* length==-1 */ {
        UChar c;

        for (; ;) {
            if ((c = *s++) == 0) {
                break;
            }
            ++count;

            /*
             * sufficient to look ahead one because of UTF-16;
             * safe to look ahead one because at worst that would be the terminating NUL
             */
            if (UTF_IS_LEAD(c) && UTF_IS_TRAIL(*s)) {
                ++s;
            }
        }
    }
    return count;
}
示例#3
0
U_CAPI int32_t U_EXPORT2
uprv_strCompare(const UChar* s1, int32_t length1,
                const UChar* s2, int32_t length2,
                UBool strncmpStyle, UBool codePointOrder) {
    const UChar* start1, * start2, * limit1, * limit2;
    UChar c1, c2;

    /* setup for fix-up */
    start1 = s1;
    start2 = s2;

    /* compare identical prefixes - they do not need to be fixed up */
    if (length1 < 0 && length2 < 0) {
        /* strcmp style, both NUL-terminated */
        if (s1 == s2) {
            return 0;
        }

        for (; ;) {
            c1 = *s1;
            c2 = *s2;
            if (c1 != c2) {
                break;
            }
            if (c1 == 0) {
                return 0;
            }
            ++s1;
            ++s2;
        }

        /* setup for fix-up */
        limit1 = limit2 = NULL;
    } else if (strncmpStyle) {
        /* special handling for strncmp, assume length1==length2>=0 but also check for NUL */
        if (s1 == s2) {
            return 0;
        }

        limit1 = start1 + length1;

        for (; ;) {
            /* both lengths are same, check only one limit */
            if (s1 == limit1) {
                return 0;
            }

            c1 = *s1;
            c2 = *s2;
            if (c1 != c2) {
                break;
            }
            if (c1 == 0) {
                return 0;
            }
            ++s1;
            ++s2;
        }

        /* setup for fix-up */
        limit2 = start2 + length1; /* use length1 here, too, to enforce assumption */
    } else {
        /* memcmp/UnicodeString style, both length-specified */
        int32_t lengthResult;

        if (length1 < 0) {
            length1 = u_strlen(s1);
        }
        if (length2 < 0) {
            length2 = u_strlen(s2);
        }

        /* limit1=start1+min(lenght1, length2) */
        if (length1 < length2) {
            lengthResult = -1;
            limit1 = start1 + length1;
        } else if (length1 == length2) {
            lengthResult = 0;
            limit1 = start1 + length1;
        } else /* length1>length2 */ {
            lengthResult = 1;
            limit1 = start1 + length2;
        }

        if (s1 == s2) {
            return lengthResult;
        }

        for (; ;) {
            /* check pseudo-limit */
            if (s1 == limit1) {
                return lengthResult;
            }

            c1 = *s1;
            c2 = *s2;
            if (c1 != c2) {
                break;
            }
            ++s1;
            ++s2;
        }

        /* setup for fix-up */
        limit1 = start1 + length1;
        limit2 = start2 + length2;
    }

    /* if both values are in or above the surrogate range, fix them up */
    if (c1 >= 0xd800 && c2 >= 0xd800 && codePointOrder) {
        /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
        if (
        (c1 <= 0xdbff && (s1 + 1) != limit1 && UTF_IS_TRAIL(*(s1 + 1))) ||
        (UTF_IS_TRAIL(c1) && start1 != s1 && UTF_IS_LEAD(*(s1 - 1)))
        ) {
            /* part of a surrogate pair, leave >=d800 */
        } else {
            /* BMP code point - may be surrogate code point - make <d800 */
            c1 -= 0x2800;
        }

        if (
        (c2 <= 0xdbff && (s2 + 1) != limit2 && UTF_IS_TRAIL(*(s2 + 1))) ||
        (UTF_IS_TRAIL(c2) && start2 != s2 && UTF_IS_LEAD(*(s2 - 1)))
        ) {
            /* part of a surrogate pair, leave >=d800 */
        } else {
            /* BMP code point - may be surrogate code point - make <d800 */
            c2 -= 0x2800;
        }
    }

    /* now c1 and c2 are in the requested (code unit or code point) order */
    return (int32_t) c1 - (int32_t) c2;
}
示例#4
0
U_CAPI UChar32* U_EXPORT2
u_strToUTF32(UChar32 *dest,
             int32_t  destCapacity,
             int32_t  *pDestLength,
             const UChar *src,
             int32_t  srcLength,
             UErrorCode *pErrorCode)
{
    const UChar* pSrc = src;
    const UChar* pSrcLimit;
    int32_t reqLength=0;
    uint32_t ch=0;
    uint32_t *pDest = (uint32_t *)dest;
    uint32_t *pDestLimit = pDest + destCapacity;
    UChar ch2=0;

    /* args check */
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
        return NULL;
    }


    if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
        return NULL;
    }

    if(srcLength==-1) {
        while((ch=*pSrc)!=0 && pDest!=pDestLimit) {
            ++pSrc;
            /*need not check for NUL because NUL fails UTF_IS_TRAIL() anyway*/
            if(UTF_IS_LEAD(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
                ++pSrc;
                ch=UTF16_GET_PAIR_VALUE(ch, ch2);
            }
            *(pDest++)= ch;
        }
        while((ch=*pSrc++)!=0) {
            if(UTF_IS_LEAD(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
                ++pSrc;
            }
            ++reqLength;
        }
    } else {
        pSrcLimit = pSrc+srcLength;
        while(pSrc<pSrcLimit && pDest<pDestLimit) {
            ch=*pSrc++;
            if(UTF_IS_LEAD(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
                ++pSrc;
                ch=UTF16_GET_PAIR_VALUE(ch, ch2);
            }
            *(pDest++)= ch;
        }
        while(pSrc!=pSrcLimit) {
            ch=*pSrc++;
            if(UTF_IS_LEAD(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
                ++pSrc;
            }
            ++reqLength;
        }
    }

    reqLength+=(int32_t)(pDest - (uint32_t *)dest);
    if(pDestLength){
        *pDestLength = reqLength;
    }

    /* Terminate the buffer */
    u_terminateUChar32s(dest,destCapacity,reqLength,pErrorCode);

    return dest;
}