static int unicode_mangling_length (const char *name, int len) { const unsigned char *ptr; const unsigned char *limit = (const unsigned char *)name + len; int need_escapes = 0; /* Whether we need an escape or not */ int num_chars = 0; /* Number of characters in the mangled name */ int uuU = 0; /* Help us to find __U. 0: '_', 1: '__' */ for (ptr = (const unsigned char *) name; ptr < limit; ) { int ch = UTF8_GET(ptr, limit); if (ch < 0) error ("internal error - invalid Utf8 name"); if ((ISALNUM (ch) && ch != 'U') || ch == '$') num_chars++; /* Everything else needs encoding */ else { int encoding_length = 2; if (ch == '_' || ch == 'U') { /* It's always at least one character. */ num_chars++; /* Prepare to recognize __U */ if (ch == '_' && (uuU < 3)) uuU++; /* We recognize __U that we wish to encode __U_, we count one more character. */ else if (ch == 'U' && (uuU == 2)) { num_chars++; need_escapes = 1; uuU = 0; } /* Otherwise, just reset uuU */ else uuU = 0; continue; } if (ch > 0xff) encoding_length++; if (ch > 0xfff) encoding_length++; num_chars += (4 + encoding_length); need_escapes = 1; uuU = 0; } } if (need_escapes) return num_chars; else return 0; }
void append_gpp_mangled_name (const char *name, int len) { const unsigned char *ptr; const unsigned char *limit; int encoded_len; char buf [6]; MANGLE_CXX_KEYWORDS (name, len); limit = (const unsigned char *)name + len; /* Compute the length of the string we wish to mangle. */ for (encoded_len = 0, ptr = (const unsigned char *) name; ptr < limit; encoded_len++) { int ch = UTF8_GET(ptr, limit); if (ch < 0) error ("internal error - invalid Utf8 name"); } sprintf (buf, "%d", encoded_len); obstack_grow (mangle_obstack, buf, strlen (buf)); obstack_grow (mangle_obstack, name, len); }
/* another variant that steps over the index, * note, currently this also falls back to latin1 for text drawing. */ unsigned int BLI_str_utf8_as_unicode_step(const char *p, size_t *index) { int i, mask = 0, len; unsigned int result; unsigned char c; p += *index; c= (unsigned char) *p; UTF8_COMPUTE (c, mask, len); if (len == -1) { /* when called with NULL end, result will never be NULL, * checks for a NULL character */ char *p_next= BLI_str_find_next_char_utf8(p, NULL); /* will never return the same pointer unless '\0', * eternal loop is prevented */ *index += (size_t)(p_next - p); return BLI_UTF8_ERR; } /* this is tricky since there are a few ways we can bail out of bad unicode * values, 3 possible solutions. */ #if 0 UTF8_GET (result, p, i, mask, len, BLI_UTF8_ERR); #elif 1 /* WARNING: this is NOT part of glib, or supported by similar functions. * this is added for text drawing because some filepaths can have latin1 * characters */ UTF8_GET (result, p, i, mask, len, BLI_UTF8_ERR); if (result == BLI_UTF8_ERR) { len= 1; result= *p; } /* end warning! */ #else /* without a fallback like '?', text drawing will stop on this value */ UTF8_GET (result, p, i, mask, len, '?'); #endif *index += len; return result; }
/* variant that increments the length */ unsigned int BLI_str_utf8_as_unicode_and_size(const char *p, size_t *index) { int i, mask = 0, len; unsigned int result; unsigned char c = (unsigned char) *p; UTF8_COMPUTE (c, mask, len); if (len == -1) return BLI_UTF8_ERR; UTF8_GET (result, p, i, mask, len, BLI_UTF8_ERR); *index += len; return result; }
/** * g_utf8_get_char: * @p: a pointer to Unicode character encoded as UTF-8 * * Converts a sequence of bytes encoded as UTF-8 to a Unicode character. * If @p does not point to a valid UTF-8 encoded character, results are * undefined. If you are not sure that the bytes are complete * valid Unicode characters, you should use g_utf8_get_char_validated() * instead. * * Return value: the resulting character **/ gunichar g_utf8_get_char (const gchar *p) { int i, mask = 0, len; gunichar result; unsigned char c = (unsigned char) *p; UTF8_COMPUTE (c, mask, len); if (len == -1) return (gunichar)-1; UTF8_GET (result, p, i, mask, len); return result; }
/* Converts a sequence of bytes encoded as UTF-8 to a Unicode character. * If @p does not point to a valid UTF-8 encoded character, results are * undefined. **/ static uint32_t _utf8_get_char (const unsigned char *p) { int i, mask = 0, len; uint32_t result; unsigned char c = (unsigned char) *p; UTF8_COMPUTE (c, mask, len); if (len == -1) return (uint32_t)-1; UTF8_GET (result, p, i, mask, len); return result; }
/** * g_utf8_get_char: * @p: a pointer to Unicode character encoded as UTF-8 * * Converts a sequence of bytes encoded as UTF-8 to a Unicode character. * If @p does not point to a valid UTF-8 encoded character, results are * undefined. If you are not sure that the bytes are complete * valid Unicode characters, you should use g_utf8_get_char_validated() * instead. * * Return value: the resulting character **/ wchar_t g_utf8_get_char (const char *p) { int i, mask = 0, len; wchar_t result; unsigned char c = (unsigned char) *p; UTF8_COMPUTE (c, mask, len); if (len == -1) return (wchar_t)-1; UTF8_GET (result, p, i, mask, len); return result; }
static void append_unicode_mangled_name (const char *name, int len) { const unsigned char *ptr; const unsigned char *limit = (const unsigned char *)name + len; int uuU = 0; for (ptr = (const unsigned char *) name; ptr < limit; ) { int ch = UTF8_GET(ptr, limit); if ((ISALNUM (ch) && ch != 'U') || ch == '$') { obstack_1grow (mangle_obstack, ch); uuU = 0; } /* Everything else needs encoding */ else { /* Buffer large enough for UINT_MAX plus the prefix. */ char buf [13]; if (ch == '_' || ch == 'U') { /* Prepare to recognize __U */ if (ch == '_' && (uuU < 3)) { uuU++; obstack_1grow (mangle_obstack, ch); } /* We recognize __U that we wish to encode __U_. Finish the encoding. */ else if (ch == 'U' && (uuU == 2)) { uuU = 0; obstack_grow (mangle_obstack, "U_", 2); } /* Otherwise, just reset uuU and emit the character we have. */ else { uuU = 0; obstack_1grow (mangle_obstack, ch); } continue; } sprintf (buf, "__U%x_", ch); obstack_grow (mangle_obstack, buf, strlen (buf)); uuU = 0; } } }
static int utf8_cmp (const unsigned char *str, int length, const char *name) { const unsigned char *limit = str + length; int i; for (i = 0; name[i]; ++i) { int ch = UTF8_GET (str, limit); if (ch != name[i]) return ch - name[i]; } return str == limit ? 0 : 1; }
tree java_mangle_resource_name (const char *name) { int len = strlen (name); char *buf = (char *) alloca (2 * len + 1); char *pos; const unsigned char *w1 = (const unsigned char *) name; const unsigned char *w2; const unsigned char *limit = w1 + len; pos = buf; init_mangling (); MANGLE_RAW_STRING ("Gr"); *pos++ = '_'; while (w1 < limit) { int ch; w2 = w1; ch = UTF8_GET (w1, limit); gcc_assert (ch > 0); switch (ch) { case '$': *pos++ = '$'; *pos++ = '$'; break; case '.': *pos++ = '$'; *pos++ = '_'; break; case '/': *pos++ = '$'; *pos++ = 'S'; break; default: memcpy (pos, w2, w1 - w2); pos += w1 - w2; break; } } append_gpp_mangled_name (buf, pos - buf); return finish_mangling (); }
/** * _cairo_utf8_get_char_validated: * @p: a UTF-8 string * @unicode: location to store one Unicode character * * Decodes the first character of a valid UTF-8 string, and returns * the number of bytes consumed. * * Note that the string should be valid. Do not use this without * validating the string first. * * Returns: the number of bytes forming the character returned. **/ int _cairo_utf8_get_char_validated (const char *p, uint32_t *unicode) { int i, mask = 0, len; uint32_t result; unsigned char c = (unsigned char) *p; UTF8_COMPUTE (c, mask, len); if (len == -1) { if (unicode) *unicode = (uint32_t)-1; return 1; } UTF8_GET (result, p, i, mask, len); if (unicode) *unicode = result; return len; }