//////////////////////////////////////////////////////////////////////////////// // Note: Assumes 4-digit hex codepoints: // xxxx // \uxxxx // U+xxxx unsigned int utf8_codepoint (const std::string& input) { unsigned int codepoint = 0; int length = input.length (); // U+xxxx, \uxxxx if (length >= 6 && ((input[0] == 'U' && input[1] == '+') || (input[0] == '\\' && input[1] == 'u'))) { codepoint = XDIGIT (input[2]) << 12 | XDIGIT (input[3]) << 8 | XDIGIT (input[4]) << 4 | XDIGIT (input[5]); } else if (length >= 4) { codepoint = XDIGIT (input[0]) << 12 | XDIGIT (input[1]) << 8 | XDIGIT (input[2]) << 4 | XDIGIT (input[3]); } else throw std::string (STRING_UTF8_INVALID_CP_REP); return codepoint; }
//////////////////////////////////////////////////////////////////////////////// // Note: Assumes 4-digit hex codepoints: // xxxx // \uxxxx // U+xxxx unsigned int utf8_codepoint (const std::string& input) { unsigned int codepoint = 0; int length = input.length (); // U+xxxx, \uxxxx if (length >= 6 && ((input[0] == 'U' && input[1] == '+') || (input[0] == '\\' && input[1] == 'u'))) { codepoint = XDIGIT (input[2]) << 12 | XDIGIT (input[3]) << 8 | XDIGIT (input[4]) << 4 | XDIGIT (input[5]); } else if (length >= 4) { codepoint = XDIGIT (input[0]) << 12 | XDIGIT (input[1]) << 8 | XDIGIT (input[2]) << 4 | XDIGIT (input[3]); } else throw std::string ("ERROR: Invalid codepoint representation."); return codepoint; }
/** * camel_url_decode: * @part: a URL part * * %-decodes the passed-in URL *in place*. The decoded version is * never longer than the encoded version, so there does not need to * be any additional space at the end of the string. */ void camel_url_decode (gchar *part) { guchar *s, *d; g_return_if_fail (part != NULL); #define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10) s = d = (guchar *) part; do { if (*s == '%' && isxdigit (s[1]) && isxdigit (s[2])) { *d++ = (XDIGIT (s[1]) << 4) + XDIGIT (s[2]); s += 2; } else *d++ = *s; } while (*s++); }