static const char * fast_validate (const char *str)
{
	unsigned int val = 0;
	unsigned int min = 0;
	const char *p;

	for (p = str; *p; p++)
	{
		if (*(unsigned char *)p < 128)
			/* done */;
		else
		{
			const char *last;

			last = p;
			if ((*(unsigned char *)p & 0xe0) == 0xc0) /* 110xxxxx */
			{
				if ((*(unsigned char *)p & 0x1e) == 0)
					goto error;
				p++;
				if ((*(unsigned char *)p & 0xc0) != 0x80) /* 10xxxxxx */
					goto error;
			}
			else
			{
				if ((*(unsigned char *)p & 0xf0) == 0xe0) /* 1110xxxx */
				{
					min = (1 << 11);
					val = *(unsigned char *)p & 0x0f;
					goto TWO_REMAINING;
				}
				else if ((*(unsigned char *)p & 0xf8) == 0xf0) /* 11110xxx */
				{
					min = (1 << 16);
					val = *(unsigned char *)p & 0x07;
				}
				else goto error;

				p++;
				CONTINUATION_CHAR;
				TWO_REMAINING:
				p++;
				CONTINUATION_CHAR;
				p++;
				CONTINUATION_CHAR;

				if (val < min) goto error;

				if (!UNICODE_VALID(val)) goto error;
			}

			continue;

			error:
			return last;
		}
	}

	return p;
}
Ejemplo n.º 2
0
static int
utf8_validate (const char *str)
{
	gunichar val = 0;
	gunichar min = 0;
	const char *p;

	for (p = str; *p; p++) {
		if (*(guchar *)p < 128)
			/* done */;
		else {
			const char *last;

			last = p;
			if ((*(guchar *)p & 0xe0) == 0xc0) { /* 110xxxxx */
				if (G_UNLIKELY ((*(guchar *)p & 0x1e) == 0))
					goto error;
				p++;
				if (G_UNLIKELY ((*(guchar *)p & 0xc0) != 0x80)) /* 10xxxxxx */
					goto error;
			} else {
				if ((*(guchar *)p & 0xf0) == 0xe0) { /* 1110xxxx */
					min = (1 << 11);
					val = *(guchar *)p & 0x0f;
					goto TWO_REMAINING;
				} else if ((*(guchar *)p & 0xf8) == 0xf0) { /* 11110xxx */
					min = (1 << 16);
					val = *(guchar *)p & 0x07;
				} else
					goto error;

				p++;
				CONTINUATION_CHAR;
TWO_REMAINING:
				p++;
				CONTINUATION_CHAR;
				p++;
				CONTINUATION_CHAR;

				if (G_UNLIKELY (val < min))
					goto error;

				if (G_UNLIKELY (!UNICODE_VALID(val)))
					goto error;
			}

			continue;

error:
			return 0;
		}
	}

	return *p == 0;
}
/**
 * _cairo_utf8_to_utf16:
 * @str: an UTF-8 string
 * @len: length of @str in bytes, or -1 if it is nul-terminated.
 *   If @len is supplied and the string has an embedded nul
 *   byte, only the portion before the nul byte is converted.
 * @result: location to store a pointer to a newly allocated UTF-16
 *   string (always native endian). Free with free(). A 0
 *   word will be written after the last character.
 * @items_written: location to store number of 16-bit words 
 *   written. (Not including the trailing 0)
 *
 * Converts a UTF-8 string to UTF-16. UTF-16 is an encoding of Unicode
 * where characters are represented either as a single 16-bit word, or
 * as a pair of 16-bit "surrogates". The string is validated to
 * consist entirely of valid Unicode characters.
 * 
 * Return value: %CAIRO_STATUS_SUCCESS if the entire string was
 *   succesfully converted. %CAIRO_STATUS_INVALID_STRING if an
 *   an invalid sequence was found.
 **/
cairo_status_t
_cairo_utf8_to_utf16 (const unsigned char *str,
		      int		   len,
		      uint16_t		 **result,
		      int		  *items_written)
{
    uint16_t *str16 = NULL;
    int n16, i;
    const unsigned char *in;

    in = str;
    n16 = 0;
    while ((len < 0 || str + len - in > 0) && *in) {
	uint32_t wc = _utf8_get_char_extended (in, str + len - in);
	if (wc & 0x80000000 || !UNICODE_VALID (wc))
	    return CAIRO_STATUS_INVALID_STRING;
	
	if (wc < 0x10000)
	    n16 += 1;
	else
	    n16 += 2;
      
	if (n16 == INT_MAX - 1 || n16 == INT_MAX)
	    return CAIRO_STATUS_INVALID_STRING;
	
	in = UTF8_NEXT_CHAR (in);
    }

  
    str16 = malloc (sizeof (uint16_t) * (n16 + 1));
    if (!str16)
	return CAIRO_STATUS_NO_MEMORY;
  
    in = str;
    for (i = 0; i < n16;) {
	uint32_t wc = _utf8_get_char (in);

	if (wc < 0x10000) {
	    str16[i++] = wc;
	} else {
	    str16[i++] = (wc - 0x10000) / 0x400 + 0xd800;
	    str16[i++] = (wc - 0x10000) % 0x400 + 0xdc00;
	}
      
	in = UTF8_NEXT_CHAR (in);
    }

    str16[i] = 0;

    *result = str16;
    if (items_written)
	*items_written = n16;

    return CAIRO_STATUS_SUCCESS;
}
Ejemplo n.º 4
0
/**
 * g_utf8_get_char_validated:
 * @p: a pointer to Unicode character encoded as UTF-8
 * @max_len: the maximum number of bytes to read, or -1, for no maximum.
 * 
 * Convert a sequence of bytes encoded as UTF-8 to a Unicode character.
 * This function checks for incomplete characters, for invalid characters
 * such as characters that are out of the range of Unicode, and for
 * overlong encodings of valid characters.
 * 
 * Return value: the resulting character. If @p points to a partial
 *    sequence at the end of a string that could begin a valid 
 *    character, returns (gunichar)-2; otherwise, if @p does not point 
 *    to a valid UTF-8 encoded Unicode character, returns (gunichar)-1.
 **/
gunichar
g_utf8_get_char_validated (const  gchar *p,
			   gssize max_len)
{
  gunichar result = g_utf8_get_char_extended (p, max_len);

  if (result & 0x80000000)
    return result;
  else if (!UNICODE_VALID (result))
    return (gunichar)-1;
  else
    return result;
}
Ejemplo n.º 5
0
/**
 * _cairo_utf8_to_ucs4:
 * @str: an UTF-8 string
 * @len: length of @str in bytes, or -1 if it is nul-terminated.
 *   If @len is supplied and the string has an embedded nul
 *   byte, only the portion before the nul byte is converted.
 * @result: location to store a pointer to a newly allocated UTF-32
 *   string (always native endian), or %NULL. Free with free(). A 0
 *   word will be written after the last character.
 * @items_written: location to store number of 32-bit words
 *   written. (Not including the trailing 0)
 *
 * Converts a UTF-8 string to UCS-4. UCS-4 is an encoding of Unicode
 * with 1 32-bit word per character. The string is validated to
 * consist entirely of valid Unicode characters.
 *
 * Return value: %CAIRO_STATUS_SUCCESS if the entire string was
 *   successfully converted. %CAIRO_STATUS_INVALID_STRING if an
 *   invalid sequence was found.
 **/
cairo_status_t
_cairo_utf8_to_ucs4 (const char *str,
		     int	 len,
		     uint32_t  **result,
		     int	*items_written)
{
    uint32_t *str32 = NULL;
    int n_chars, i;
    const unsigned char *in;
    const unsigned char * const ustr = (const unsigned char *) str;

    in = ustr;
    n_chars = 0;
    while ((len < 0 || ustr + len - in > 0) && *in)
    {
	uint32_t wc = _utf8_get_char_extended (in, ustr + len - in);
	if (wc & 0x80000000 || !UNICODE_VALID (wc))
	    return _cairo_error (CAIRO_STATUS_INVALID_STRING);

	n_chars++;
	if (n_chars == INT_MAX)
	    return _cairo_error (CAIRO_STATUS_INVALID_STRING);

	in = UTF8_NEXT_CHAR (in);
    }

    if (result) {
	str32 = _cairo_malloc_ab (n_chars + 1, sizeof (uint32_t));
	if (!str32)
	    return _cairo_error (CAIRO_STATUS_NO_MEMORY);

	in = ustr;
	for (i=0; i < n_chars; i++) {
	    str32[i] = _utf8_get_char (in);
	    in = UTF8_NEXT_CHAR (in);
	}
	str32[i] = 0;

	*result = str32;
    }

    if (items_written)
	*items_written = n_chars;

    return CAIRO_STATUS_SUCCESS;
}
Ejemplo n.º 6
0
int _utf8_get_char_validated(const char *p,
                                  int max_len)
{
    int result;

    if (max_len == 0)
        return -2;

    result = _utf8_get_char_extended(p, max_len);

    if (result & 0x80000000)
        return result;
    else if (!UNICODE_VALID(result))
        return -1;
    else
        return result;
}
Ejemplo n.º 7
0
Archivo: gutf8.c Proyecto: cosimoc/glib
/**
 * g_unichar_validate:
 * @ch: a Unicode character
 * 
 * Checks whether @ch is a valid Unicode character. Some possible
 * integer values of @ch will not be valid. 0 is considered a valid
 * character, though it's normally a string terminator.
 * 
 * Returns: %TRUE if @ch is a valid Unicode character
 **/
gboolean
g_unichar_validate (gunichar ch)
{
  return UNICODE_VALID (ch);
}
Ejemplo n.º 8
0
Archivo: utf8.c Proyecto: EBone/Faust
const char *
avahi_utf8_valid (const char *str)

{
  unsigned val = 0;
  unsigned min = 0;
  const char *p;

  for (p = str; *p; p++)
    {
      if (*(const unsigned char *)p < 128)
	/* done */;
      else
	{
	  if ((*(const unsigned char *)p & 0xe0) == 0xc0) /* 110xxxxx */
	    {
	      if ( ((*(const unsigned char *)p & 0x1e) == 0))
		goto error;
	      p++;
	      if ( ((*(const unsigned char *)p & 0xc0) != 0x80)) /* 10xxxxxx */
		goto error;
	    }
	  else
	    {
	      if ((*(const unsigned char *)p & 0xf0) == 0xe0) /* 1110xxxx */
		{
		  min = (1 << 11);
		  val = *(const unsigned char *)p & 0x0f;
		  goto TWO_REMAINING;
		}
	      else if ((*(const unsigned char *)p & 0xf8) == 0xf0) /* 11110xxx */
		{
		  min = (1 << 16);
		  val = *(const unsigned char *)p & 0x07;
		}
	      else
		goto error;

	      p++;
	      CONTINUATION_CHAR;
	    TWO_REMAINING:
	      p++;
	      CONTINUATION_CHAR;
	      p++;
	      CONTINUATION_CHAR;

	      if ( (val < min))
		goto error;

	      if ( (!UNICODE_VALID(val)))
		goto error;
	    }

	  continue;

	error:
	  return NULL;
	}
    }

  return str;
}
Ejemplo n.º 9
0
Archivo: gutf8.c Proyecto: patito/glib
static const gchar *
fast_validate_len (const char *str,
                   gssize      max_len)

{
    gunichar val = 0;
    gunichar min = 0;
    const gchar *p;

    g_assert (max_len >= 0);

    for (p = str; ((p - str) < max_len) && *p; p++)
    {
        if (*(guchar *)p < 128)
            /* done */;
        else
        {
            const gchar *last;

            last = p;
            if ((*(guchar *)p & 0xe0) == 0xc0) /* 110xxxxx */
            {
                if (G_UNLIKELY (max_len - (p - str) < 2))
                    goto error;

                if (G_UNLIKELY ((*(guchar *)p & 0x1e) == 0))
                    goto error;
                p++;
                if (G_UNLIKELY ((*(guchar *)p & 0xc0) != 0x80)) /* 10xxxxxx */
                    goto error;
            }
            else
            {
                if ((*(guchar *)p & 0xf0) == 0xe0) /* 1110xxxx */
                {
                    if (G_UNLIKELY (max_len - (p - str) < 3))
                        goto error;

                    min = (1 << 11);
                    val = *(guchar *)p & 0x0f;
                    goto TWO_REMAINING;
                }
                else if ((*(guchar *)p & 0xf8) == 0xf0) /* 11110xxx */
                {
                    if (G_UNLIKELY (max_len - (p - str) < 4))
                        goto error;

                    min = (1 << 16);
                    val = *(guchar *)p & 0x07;
                }
                else
                    goto error;

                p++;
                CONTINUATION_CHAR;
TWO_REMAINING:
                p++;
                CONTINUATION_CHAR;
                p++;
                CONTINUATION_CHAR;

                if (G_UNLIKELY (val < min))
                    goto error;
                if (G_UNLIKELY (!UNICODE_VALID(val)))
                    goto error;
            }

            continue;

error:
            return last;
        }
    }

    return p;
}