Пример #1
0
int ICUTransService::compareNIString(const  XMLCh* const    comp1
                                    , const XMLCh* const    comp2
                                    , const unsigned int    maxChars)
{
    if (maxChars > 0)
    {
        // Note that this function has somewhat broken semantics, as it's
        // possible for two strings of different lengths to compare as equal
        // in a case-insensitive manner, since one character could be
        // represented as a surrogate pair.
        size_t  i = 0;
        size_t  j = 0;

        for(;;)
        {
            UChar32 ch1;
            UChar32 ch2;

            U16_NEXT_UNSAFE(comp1, i, ch1);
            U16_NEXT_UNSAFE(comp2, j, ch2);

            const UChar32   folded1 =
                u_foldCase(ch1, U_FOLD_CASE_DEFAULT);

            const UChar32   folded2 =
                u_foldCase(ch2, U_FOLD_CASE_DEFAULT);

            if (folded1 != folded2)
            {
                return folded1 - folded2;
            }
            else if (i == maxChars)
            {
                // If we're at the end of both strings, return 0.
                // Otherwise, we've run out of characters in the
                // left string, so return -1.
                return j == maxChars ? 0 : -1;
            }
            else if (j == maxChars)
            {
                // We've run out of characters in the right string,
                // but not the left, so return 1.
                return 1;
            }
        }
    }

    return 0;
}
Пример #2
0
U_CFUNC UBool U_EXPORT2
ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length, const USetAdder *sa) {
    const UChar *unfold, *p;
    int32_t i, start, limit, result, unfoldRows, unfoldRowWidth, unfoldStringWidth;

    if(csp->unfold==NULL || s==NULL) {
        return FALSE; /* no reverse case folding data, or no string */
    }
    if(length<=1) {
        /* the string is too short to find any match */
        /*
         * more precise would be:
         * if(!u_strHasMoreChar32Than(s, length, 1))
         * but this does not make much practical difference because
         * a single supplementary code point would just not be found
         */
        return FALSE;
    }

    unfold=csp->unfold;
    unfoldRows=unfold[UCASE_UNFOLD_ROWS];
    unfoldRowWidth=unfold[UCASE_UNFOLD_ROW_WIDTH];
    unfoldStringWidth=unfold[UCASE_UNFOLD_STRING_WIDTH];
    unfold+=unfoldRowWidth;

    if(length>unfoldStringWidth) {
        /* the string is too long to find any match */
        return FALSE;
    }

    /* do a binary search for the string */
    start=0;
    limit=unfoldRows;
    while(start<limit) {
        i=(start+limit)/2;
        p=unfold+(i*unfoldRowWidth);
        result=strcmpMax(s, length, p, unfoldStringWidth);

        if(result==0) {
            /* found the string: add each code point, and its case closure */
            UChar32 c;

            for(i=unfoldStringWidth; i<unfoldRowWidth && p[i]!=0;) {
                U16_NEXT_UNSAFE(p, i, c);
                sa->add(sa->set, c);
                ucase_addCaseClosure(csp, c, sa);
            }
            return TRUE;
        } else if(result<0) {
            limit=i;
        } else /* result>0 */ {
            start=i+1;
        }
    }

    return FALSE; /* string not found */
}
Пример #3
0
// ---------------------------------------------------------------------------
//  ICUTransService: The virtual transcoding service API
// ---------------------------------------------------------------------------
int ICUTransService::compareIString(const   XMLCh* const    comp1
                                    , const XMLCh* const    comp2)
{
    size_t  i = 0;
    size_t  j = 0;

    for(;;)
    {
        UChar32 ch1;
        UChar32 ch2;

        U16_NEXT_UNSAFE(comp1, i, ch1);
        U16_NEXT_UNSAFE(comp2, j, ch2);

        const UChar32   folded1 =
            u_foldCase(ch1, U_FOLD_CASE_DEFAULT);

        const UChar32   folded2 =
            u_foldCase(ch2, U_FOLD_CASE_DEFAULT);

        if (folded1 !=
            folded2)
        {
            return folded1 - folded2;
        }
        else if (ch1 == 0)
        {
            // If ch1 is 0, the ch2 must also be
            // 0.  Otherwise, the previous if
            // would have failed.
            break;
        }
    }

    return 0;
}
Пример #4
0
static void
doCaseConvert(
            XMLCh*          convertString,
            FunctionType    caseFunction)
{
    // Note the semantics of this function are broken, since it's
    // possible that changing the case of a string could increase
    // its length, but there's no way to handle such a situation.
    const unsigned int  len =
            XMLString::stringLen(convertString);

    size_t  readPos = 0;
    size_t  writePos = 0;

    while(readPos < len)
    {
        UChar32     original;

        // Get the next Unicode code point.
        U16_NEXT_UNSAFE(convertString, readPos, original);

        // Convert the code point
        const UChar32   converted = caseFunction(original);

        // OK, now here's where it gets ugly.
        if (!U_IS_BMP(converted) && U_IS_BMP(original) &&
            readPos - writePos == 1)
        {
            // We do not have room to convert the
            // character without overwriting the next
            // character, so we will just stop.
            break;
        }
        else
        {
            U16_APPEND_UNSAFE(convertString, writePos, converted);
        }
    }

    convertString[writePos] = 0;
}
Пример #5
0
static void TestNextPrevChar(){

    static UChar input[]={0x0061, 0xd800, 0xdc00, 0xdbff, 0xdfff, 0x0062, 0xd841, 0xd7ff, 0xd841, 0xdc41, 0xdc00, 0x0000};
    static UChar32 result[]={
    /*next_unsafe    next_safe_ns  next_safe_s       prev_unsafe   prev_safe_ns     prev_safe_s*/
        0x0061,        0x0061,       0x0061,           0x0000,       0x0000,          0x0000,
        0x10000,       0x10000,      0x10000,          0x120400,     0xdc00,          UTF_ERROR_VALUE, 
        0xdc00,        0xdc00,       UTF_ERROR_VALUE,  0x20441,      0x20441,         0x20441,
        0x10ffff,      0x10ffff,     0x10ffff,         0xd841,       0xd841,          UTF_ERROR_VALUE, 
        0xdfff,        0xdfff,       UTF_ERROR_VALUE,  0xd7ff,       0xd7ff,          0xd7ff,   
        0x0062,        0x0062,       0x0062,           0xd841,       0xd841,          UTF_ERROR_VALUE,     
        0x1ffff,       0xd841,       UTF_ERROR_VALUE,  0x0062,       0x0062,          0x0062,
        0xd7ff,        0xd7ff,       0xd7ff,           0x10ffff,     0x10ffff,        0x10ffff,
        0x20441,       0x20441,      0x20441,          0xdbff,       0xdbff,          UTF_ERROR_VALUE,      
        0xdc41,        0xdc41,       UTF_ERROR_VALUE,  0x10000,      0x10000,         0x10000,
        0xdc00,        0xdc00,       UTF_ERROR_VALUE,  0xd800,       0xd800,          UTF_ERROR_VALUE,
        0x0000,        0x0000,       0x0000,           0x0061,       0x0061,          0x0061
    };
    static uint16_t movedOffset[]={
   /*next_unsafe    next_safe_ns  next_safe_s       prev_unsafe   prev_safe_ns     prev_safe_s*/
        1,            1,           1,                11,           11,               11,
        3,            3,           3,                9,            10 ,              10, 
        3,            3,           3,                8,            8,                8,  
        5,            5,           4,                8,            8,                8, 
        5,            5,           5,                7,            7,                7,
        6,            6,           6,                6,            6,                6,
        8,            7,           7,                5,            5,                5,
        8,            8,           8,                3,            3,                3, 
        10,           10,          10,               3,            3,                3,         
        10,           10,          10,               1,            1,                1, 
        11,           11,          11,               1,            1,                1, 
        12,           12,          12,               0,            0,                0, 
    };
      

    UChar32 c=0x0000;
    uint16_t i=0;
    uint16_t offset=0, setOffset=0;
    for(offset=0; offset<sizeof(input)/U_SIZEOF_UCHAR; offset++){
         setOffset=offset;
         UTF16_NEXT_CHAR_UNSAFE(input, setOffset, c);
         if(setOffset != movedOffset[i]){
             log_err("ERROR: UTF16_NEXT_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
                 offset, movedOffset[i], setOffset);
         }
         if(c != result[i]){
             log_err("ERROR: UTF16_NEXT_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
         }

         setOffset=offset;
         U16_NEXT_UNSAFE(input, setOffset, c);
         if(setOffset != movedOffset[i]){
             log_err("ERROR: U16_NEXT_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
                 offset, movedOffset[i], setOffset);
         }
         if(c != result[i]){
             log_err("ERROR: U16_NEXT_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c);
         }

         setOffset=offset;
         UTF16_NEXT_CHAR_SAFE(input, setOffset, sizeof(input)/U_SIZEOF_UCHAR, c, FALSE);
         if(setOffset != movedOffset[i+1]){
             log_err("ERROR: UTF16_NEXT_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
                 offset, movedOffset[i+1], setOffset);
         }
         if(c != result[i+1]){
             log_err("ERROR: UTF16_NEXT_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c);
         }

         setOffset=offset;
         U16_NEXT(input, setOffset, sizeof(input)/U_SIZEOF_UCHAR, c);
         if(setOffset != movedOffset[i+1]){
             log_err("ERROR: U16_NEXT failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
                 offset, movedOffset[i+1], setOffset);
         }
         if(c != result[i+1]){
             log_err("ERROR: U16_NEXT failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c);
         }

         setOffset=offset;
         UTF16_NEXT_CHAR_SAFE(input, setOffset, sizeof(input)/U_SIZEOF_UCHAR, c, TRUE);
         if(setOffset != movedOffset[i+1]){
             log_err("ERROR: UTF16_NEXT_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
                 offset, movedOffset[i+2], setOffset);
         }
         if(c != result[i+2]){
             log_err("ERROR: UTF16_NEXT_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c);
         }

         i=(uint16_t)(i+6);
    }
    i=0;
    for(offset=(uint16_t)sizeof(input)/U_SIZEOF_UCHAR; offset > 0; --offset){
         setOffset=offset;
         UTF16_PREV_CHAR_UNSAFE(input, setOffset, c);
         if(setOffset != movedOffset[i+3]){
             log_err("ERROR: UTF16_PREV_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
                 offset, movedOffset[i+3], setOffset);
         }
         if(c != result[i+3]){
             log_err("ERROR: UTF16_PREV_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+3], c);
         }

         setOffset=offset;
         U16_PREV_UNSAFE(input, setOffset, c);
         if(setOffset != movedOffset[i+3]){
             log_err("ERROR: U16_PREV_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
                 offset, movedOffset[i+3], setOffset);
         }
         if(c != result[i+3]){
             log_err("ERROR: U16_PREV_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+3], c);
         }

         setOffset=offset;
         UTF16_PREV_CHAR_SAFE(input, 0, setOffset, c, FALSE);
         if(setOffset != movedOffset[i+4]){
             log_err("ERROR: UTF16_PREV_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
                 offset, movedOffset[i+4], setOffset);
         }
         if(c != result[i+4]){
             log_err("ERROR: UTF16_PREV_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+4], c);
         }

         setOffset=offset;
         U16_PREV(input, 0, setOffset, c);
         if(setOffset != movedOffset[i+4]){
             log_err("ERROR: U16_PREV failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
                 offset, movedOffset[i+4], setOffset);
         }
         if(c != result[i+4]){
             log_err("ERROR: U16_PREV failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+4], c);
         }

         setOffset=offset;
         UTF16_PREV_CHAR_SAFE(input, 0,  setOffset, c, TRUE);
         if(setOffset != movedOffset[i+5]){
             log_err("ERROR: UTF16_PREV_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n",
                 offset, movedOffset[i+5], setOffset);
         } 
         if(c != result[i+5]){
             log_err("ERROR: UTF16_PREV_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+5], c);
         }

         i=(uint16_t)(i+6);
    }

}
Пример #6
0
U_CFUNC void U_EXPORT2
ucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa) {
    uint16_t props;

    /*
     * Hardcode the case closure of i and its relatives and ignore the
     * data file data for these characters.
     * The Turkic dotless i and dotted I with their case mapping conditions
     * and case folding option make the related characters behave specially.
     * This code matches their closure behavior to their case folding behavior.
     */

    switch(c) {
    case 0x49:
        /* regular i and I are in one equivalence class */
        sa->add(sa->set, 0x69);
        return;
    case 0x69:
        sa->add(sa->set, 0x49);
        return;
    case 0x130:
        /* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */
        sa->addString(sa->set, iDot, 2);
        return;
    case 0x131:
        /* dotless i is in a class by itself */
        return;
    default:
        /* otherwise use the data file data */
        break;
    }

    props=UTRIE2_GET16(&csp->trie, c);
    if(!PROPS_HAS_EXCEPTION(props)) {
        if(UCASE_GET_TYPE(props)!=UCASE_NONE) {
            /* add the one simple case mapping, no matter what type it is */
            int32_t delta=UCASE_GET_DELTA(props);
            if(delta!=0) {
                sa->add(sa->set, c+delta);
            }
        }
    } else {
        /*
         * c has exceptions, so there may be multiple simple and/or
         * full case mappings. Add them all.
         */
        const uint16_t *pe0, *pe=GET_EXCEPTIONS(csp, props);
        const UChar *closure;
        uint16_t excWord=*pe++;
        int32_t idx, closureLength, fullLength, length;

        pe0=pe;

        /* add all simple case mappings */
        for(idx=UCASE_EXC_LOWER; idx<=UCASE_EXC_TITLE; ++idx) {
            if(HAS_SLOT(excWord, idx)) {
                pe=pe0;
                GET_SLOT_VALUE(excWord, idx, pe, c);
                sa->add(sa->set, c);
            }
        }

        /* get the closure string pointer & length */
        if(HAS_SLOT(excWord, UCASE_EXC_CLOSURE)) {
            pe=pe0;
            GET_SLOT_VALUE(excWord, UCASE_EXC_CLOSURE, pe, closureLength);
            closureLength&=UCASE_CLOSURE_MAX_LENGTH; /* higher bits are reserved */
            closure=(const UChar *)pe+1; /* behind this slot, unless there are full case mappings */
        } else {
            closureLength=0;
            closure=NULL;
        }

        /* add the full case folding */
        if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
            pe=pe0;
            GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, fullLength);

            /* start of full case mapping strings */
            ++pe;

            fullLength&=0xffff; /* bits 16 and higher are reserved */

            /* skip the lowercase result string */
            pe+=fullLength&UCASE_FULL_LOWER;
            fullLength>>=4;

            /* add the full case folding string */
            length=fullLength&0xf;
            if(length!=0) {
                sa->addString(sa->set, (const UChar *)pe, length);
                pe+=length;
            }

            /* skip the uppercase and titlecase strings */
            fullLength>>=4;
            pe+=fullLength&0xf;
            fullLength>>=4;
            pe+=fullLength;

            closure=(const UChar *)pe; /* behind full case mappings */
        }

        /* add each code point in the closure string */
        for(idx=0; idx<closureLength;) {
            U16_NEXT_UNSAFE(closure, idx, c);
            sa->add(sa->set, c);
        }
    }
}