/* Helper for dexIsValidMemberNameUtf8(); do not call directly. */ bool dexIsValidMemberNameUtf8_0(const char** pUtf8Ptr) { /* * It's a multibyte encoded character. Decode it and analyze. We * accept anything that isn't (a) an improperly encoded low value, * (b) an improper surrogate pair, (c) an encoded '\0', (d) a high * control character, or (e) a high space, layout, or special * character (U+00a0, U+2000..U+200f, U+2028..U+202f, * U+fff0..U+ffff). This is all specified in the dex format * document. */ u2 utf16 = dexGetUtf16FromUtf8(pUtf8Ptr); // Perform follow-up tests based on the high 8 bits. switch (utf16 >> 8) { case 0x00: { // It's only valid if it's above the ISO-8859-1 high space (0xa0). return (utf16 > 0x00a0); } case 0xd8: case 0xd9: case 0xda: case 0xdb: { /* * It's a leading surrogate. Check to see that a trailing * surrogate follows. */ utf16 = dexGetUtf16FromUtf8(pUtf8Ptr); return (utf16 >= 0xdc00) && (utf16 <= 0xdfff); } case 0xdc: case 0xdd: case 0xde: case 0xdf: { // It's a trailing surrogate, which is not valid at this point. return false; } case 0x20: case 0xff: { // It's in the range that has spaces, controls, and specials. switch (utf16 & 0xfff8) { case 0x2000: case 0x2008: case 0x2028: case 0xfff0: case 0xfff8: { return false; } } break; } } return true; }
/* Compare two '\0'-terminated modified UTF-8 strings, using Unicode * code point values for comparison. This treats different encodings * for the same code point as equivalent, except that only a real '\0' * byte is considered the string terminator. The return value is as * for strcmp(). */ int dexUtf8Cmp(const char* s1, const char* s2) { for (;;) { if (*s1 == '\0') { if (*s2 == '\0') { return 0; } return -1; } else if (*s2 == '\0') { return 1; } int utf1 = dexGetUtf16FromUtf8(&s1); int utf2 = dexGetUtf16FromUtf8(&s2); int diff = utf1 - utf2; if (diff != 0) { return diff; } } }
/* * Convert a "modified" UTF-8 string to UTF-16. */ void dvmConvertUtf8ToUtf16(u2* utf16Str, const char* utf8Str) { while (*utf8Str != '\0') *utf16Str++ = dexGetUtf16FromUtf8(&utf8Str); }