/** * g_utf8_get_char: * @p: a pointer to Unicode character encoded as UTF-8 * * Converts a sequence of bytes encoded as UTF-8 to a Unicode character. * * If @p does not point to a valid UTF-8 encoded character, results * are undefined. If you are not sure that the bytes are complete * valid Unicode characters, you should use g_utf8_get_char_validated() * instead. * * Return value: the resulting character */ gunichar g_utf8_get_char (const gchar *p) { int i, mask = 0, len; gunichar result; unsigned char c = (unsigned char) *p; UTF8_COMPUTE (c, mask, len); if (len == -1) return (gunichar)-1; UTF8_GET (result, p, i, mask, len); return result; }
/* * g_utf8_get_char: * @p: a pointer to Unicode character encoded as UTF-8 * * Converts a sequence of bytes encoded as UTF-8 to a Unicode character. * If @p does not point to a valid UTF-8 encoded character, results are * undefined. If you are not sure that the bytes are complete * valid Unicode characters, you should use g_utf8_get_char_validated() * instead. * * Return value: the resulting character **/ static unsigned int cc_utf8_get_char (const char * p) { int i, mask = 0, len; unsigned int result; unsigned char c = (unsigned char) *p; UTF8_COMPUTE (c, mask, len); if (len == -1) return (unsigned int) - 1; UTF8_GET (result, p, i, mask, len); return result; }
/* another variant that steps over the index, * note, currently this also falls back to latin1 for text drawing. */ unsigned int BLI_str_utf8_as_unicode_step(const char *p, size_t *index) { int i, mask = 0, len; unsigned int result; unsigned char c; p += *index; c= (unsigned char) *p; UTF8_COMPUTE (c, mask, len); if (len == -1) { /* when called with NULL end, result will never be NULL, * checks for a NULL character */ char *p_next= BLI_str_find_next_char_utf8(p, NULL); /* will never return the same pointer unless '\0', * eternal loop is prevented */ *index += (size_t)(p_next - p); return BLI_UTF8_ERR; } /* this is tricky since there are a few ways we can bail out of bad unicode * values, 3 possible solutions. */ #if 0 UTF8_GET (result, p, i, mask, len, BLI_UTF8_ERR); #elif 1 /* WARNING: this is NOT part of glib, or supported by similar functions. * this is added for text drawing because some filepaths can have latin1 * characters */ UTF8_GET (result, p, i, mask, len, BLI_UTF8_ERR); if (result == BLI_UTF8_ERR) { len= 1; result= *p; } /* end warning! */ #else /* without a fallback like '?', text drawing will stop on this value */ UTF8_GET (result, p, i, mask, len, '?'); #endif *index += len; return result; }
/** * _cairo_utf8_get_char_validated: * @p: a UTF-8 string * @unicode: location to store one Unicode character * * Decodes the first character of a valid UTF-8 string, and returns * the number of bytes consumed. * * Note that the string should be valid. Do not use this without * validating the string first. * * Returns: the number of bytes forming the character returned. **/ int _cairo_utf8_get_char_validated (const char *p, uint32_t *unicode) { int i, mask = 0, len; uint32_t result; unsigned char c = (unsigned char) *p; UTF8_COMPUTE (c, mask, len); if (len == -1) { if (unicode) *unicode = (uint32_t)-1; return 1; } UTF8_GET (result, p, i, mask, len); if (unicode) *unicode = result; return len; }