Exemplo n.º 1
0
Arquivo: gutf8.c Projeto: patito/glib
/**
 * g_utf8_get_char:
 * @p: a pointer to Unicode character encoded as UTF-8
 *
 * Converts a sequence of bytes encoded as UTF-8 to a Unicode character.
 *
 * If @p does not point to a valid UTF-8 encoded character, results
 * are undefined. If you are not sure that the bytes are complete
 * valid Unicode characters, you should use g_utf8_get_char_validated()
 * instead.
 *
 * Return value: the resulting character
 */
gunichar
g_utf8_get_char (const gchar *p)
{
    int i, mask = 0, len;
    gunichar result;
    unsigned char c = (unsigned char) *p;

    UTF8_COMPUTE (c, mask, len);
    if (len == -1)
        return (gunichar)-1;
    UTF8_GET (result, p, i, mask, len);

    return result;
}
Exemplo n.º 2
0
/*
 * g_utf8_get_char:
 * @p: a pointer to Unicode character encoded as UTF-8
 *
 * Converts a sequence of bytes encoded as UTF-8 to a Unicode character.
 * If @p does not point to a valid UTF-8 encoded character, results are
 * undefined. If you are not sure that the bytes are complete
 * valid Unicode characters, you should use g_utf8_get_char_validated()
 * instead.
 *
 * Return value: the resulting character
 **/
static unsigned int
cc_utf8_get_char (const char * p)
{
    int i, mask = 0, len;
    unsigned int result;
    unsigned char c = (unsigned char) *p;
    
    UTF8_COMPUTE (c, mask, len);
    if (len == -1)
        return (unsigned int) - 1;
    UTF8_GET (result, p, i, mask, len);
    
    return result;
}
Exemplo n.º 3
0
/* another variant that steps over the index,
 * note, currently this also falls back to latin1 for text drawing. */
unsigned int BLI_str_utf8_as_unicode_step(const char *p, size_t *index)
{
	int i, mask = 0, len;
	unsigned int result;
	unsigned char c;

	p += *index;
	c= (unsigned char) *p;

	UTF8_COMPUTE (c, mask, len);
	if (len == -1) {
		/* when called with NULL end, result will never be NULL,
		 * checks for a NULL character */
		char *p_next= BLI_str_find_next_char_utf8(p, NULL);
		/* will never return the same pointer unless '\0',
		 * eternal loop is prevented */
		*index += (size_t)(p_next - p);
		return BLI_UTF8_ERR;
	}

	/* this is tricky since there are a few ways we can bail out of bad unicode
	 * values, 3 possible solutions. */
#if 0
	UTF8_GET (result, p, i, mask, len, BLI_UTF8_ERR);
#elif 1
	/* WARNING: this is NOT part of glib, or supported by similar functions.
	 * this is added for text drawing because some filepaths can have latin1
	 * characters */
	UTF8_GET (result, p, i, mask, len, BLI_UTF8_ERR);
	if (result == BLI_UTF8_ERR) {
		len= 1;
		result= *p;
	}
	/* end warning! */
#else
	/* without a fallback like '?', text drawing will stop on this value */
	UTF8_GET (result, p, i, mask, len, '?');
#endif

	*index += len;
	return result;
}
Exemplo n.º 4
0
/**
 * _cairo_utf8_get_char_validated:
 * @p: a UTF-8 string
 * @unicode: location to store one Unicode character
 *
 * Decodes the first character of a valid UTF-8 string, and returns
 * the number of bytes consumed.
 *
 * Note that the string should be valid.  Do not use this without
 * validating the string first.
 *
 * Returns: the number of bytes forming the character returned.
 **/
int
_cairo_utf8_get_char_validated (const char *p,
				uint32_t   *unicode)
{
    int i, mask = 0, len;
    uint32_t result;
    unsigned char c = (unsigned char) *p;

    UTF8_COMPUTE (c, mask, len);
    if (len == -1) {
	if (unicode)
	    *unicode = (uint32_t)-1;
	return 1;
    }
    UTF8_GET (result, p, i, mask, len);

    if (unicode)
	*unicode = result;
    return len;
}