Exemplo n.º 1
0
/* Helper for dexIsValidMemberNameUtf8(); do not call directly. */
bool dexIsValidMemberNameUtf8_0(const char** pUtf8Ptr) {
    /*
     * It's a multibyte encoded character. Decode it and analyze. We
     * accept anything that isn't (a) an improperly encoded low value,
     * (b) an improper surrogate pair, (c) an encoded '\0', (d) a high
     * control character, or (e) a high space, layout, or special
     * character (U+00a0, U+2000..U+200f, U+2028..U+202f,
     * U+fff0..U+ffff). This is all specified in the dex format
     * document.
     */

    u2 utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);

    // Perform follow-up tests based on the high 8 bits.
    switch (utf16 >> 8) {
        case 0x00: {
            // It's only valid if it's above the ISO-8859-1 high space (0xa0).
            return (utf16 > 0x00a0);
        }
        case 0xd8:
        case 0xd9:
        case 0xda:
        case 0xdb: {
            /*
             * It's a leading surrogate. Check to see that a trailing
             * surrogate follows.
             */
            utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);
            return (utf16 >= 0xdc00) && (utf16 <= 0xdfff);
        }
        case 0xdc:
        case 0xdd:
        case 0xde:
        case 0xdf: {
            // It's a trailing surrogate, which is not valid at this point.
            return false;
        }
        case 0x20:
        case 0xff: {
            // It's in the range that has spaces, controls, and specials.
            switch (utf16 & 0xfff8) {
                case 0x2000:
                case 0x2008:
                case 0x2028:
                case 0xfff0:
                case 0xfff8: {
                    return false;
                }
            }
            break;
        }
    }

    return true;
}
Exemplo n.º 2
0
/* Compare two '\0'-terminated modified UTF-8 strings, using Unicode
 * code point values for comparison. This treats different encodings
 * for the same code point as equivalent, except that only a real '\0'
 * byte is considered the string terminator. The return value is as
 * for strcmp(). */
int dexUtf8Cmp(const char* s1, const char* s2) {
    for (;;) {
        if (*s1 == '\0') {
            if (*s2 == '\0') {
                return 0;
            }
            return -1;
        } else if (*s2 == '\0') {
            return 1;
        }

        int utf1 = dexGetUtf16FromUtf8(&s1);
        int utf2 = dexGetUtf16FromUtf8(&s2);
        int diff = utf1 - utf2;

        if (diff != 0) {
            return diff;
        }
    }
}
Exemplo n.º 3
0
/*
 * Convert a "modified" UTF-8 string to UTF-16.
 */
void dvmConvertUtf8ToUtf16(u2* utf16Str, const char* utf8Str)
{
    while (*utf8Str != '\0')
        *utf16Str++ = dexGetUtf16FromUtf8(&utf8Str);
}