コード例 #1
0
/**
 * _cairo_utf8_to_utf16:
 * @str: an UTF-8 string
 * @len: length of @str in bytes, or -1 if it is nul-terminated.
 *   If @len is supplied and the string has an embedded nul
 *   byte, only the portion before the nul byte is converted.
 * @result: location to store a pointer to a newly allocated UTF-16
 *   string (always native endian). Free with free(). A 0
 *   word will be written after the last character.
 * @items_written: location to store number of 16-bit words 
 *   written. (Not including the trailing 0)
 *
 * Converts a UTF-8 string to UTF-16. UTF-16 is an encoding of Unicode
 * where characters are represented either as a single 16-bit word, or
 * as a pair of 16-bit "surrogates". The string is validated to
 * consist entirely of valid Unicode characters.
 * 
 * Return value: %CAIRO_STATUS_SUCCESS if the entire string was
 *   succesfully converted. %CAIRO_STATUS_INVALID_STRING if an
 *   an invalid sequence was found.
 **/
cairo_status_t
_cairo_utf8_to_utf16 (const unsigned char *str,
		      int		   len,
		      uint16_t		 **result,
		      int		  *items_written)
{
    uint16_t *str16 = NULL;
    int n16, i;
    const unsigned char *in;

    in = str;
    n16 = 0;
    while ((len < 0 || str + len - in > 0) && *in) {
	uint32_t wc = _utf8_get_char_extended (in, str + len - in);
	if (wc & 0x80000000 || !UNICODE_VALID (wc))
	    return CAIRO_STATUS_INVALID_STRING;
	
	if (wc < 0x10000)
	    n16 += 1;
	else
	    n16 += 2;
      
	if (n16 == INT_MAX - 1 || n16 == INT_MAX)
	    return CAIRO_STATUS_INVALID_STRING;
	
	in = UTF8_NEXT_CHAR (in);
    }

  
    str16 = malloc (sizeof (uint16_t) * (n16 + 1));
    if (!str16)
	return CAIRO_STATUS_NO_MEMORY;
  
    in = str;
    for (i = 0; i < n16;) {
	uint32_t wc = _utf8_get_char (in);

	if (wc < 0x10000) {
	    str16[i++] = wc;
	} else {
	    str16[i++] = (wc - 0x10000) / 0x400 + 0xd800;
	    str16[i++] = (wc - 0x10000) % 0x400 + 0xdc00;
	}
      
	in = UTF8_NEXT_CHAR (in);
    }

    str16[i] = 0;

    *result = str16;
    if (items_written)
	*items_written = n16;

    return CAIRO_STATUS_SUCCESS;
}
コード例 #2
0
ファイル: cairo-unicode.c プロジェクト: 3oyka/cairo2
/**
 * _cairo_utf8_to_ucs4:
 * @str: an UTF-8 string
 * @len: length of @str in bytes, or -1 if it is nul-terminated.
 *   If @len is supplied and the string has an embedded nul
 *   byte, only the portion before the nul byte is converted.
 * @result: location to store a pointer to a newly allocated UTF-32
 *   string (always native endian), or %NULL. Free with free(). A 0
 *   word will be written after the last character.
 * @items_written: location to store number of 32-bit words
 *   written. (Not including the trailing 0)
 *
 * Converts a UTF-8 string to UCS-4. UCS-4 is an encoding of Unicode
 * with 1 32-bit word per character. The string is validated to
 * consist entirely of valid Unicode characters.
 *
 * Return value: %CAIRO_STATUS_SUCCESS if the entire string was
 *   successfully converted. %CAIRO_STATUS_INVALID_STRING if an
 *   invalid sequence was found.
 **/
cairo_status_t
_cairo_utf8_to_ucs4 (const char *str,
		     int	 len,
		     uint32_t  **result,
		     int	*items_written)
{
    uint32_t *str32 = NULL;
    int n_chars, i;
    const unsigned char *in;
    const unsigned char * const ustr = (const unsigned char *) str;

    in = ustr;
    n_chars = 0;
    while ((len < 0 || ustr + len - in > 0) && *in)
    {
	uint32_t wc = _utf8_get_char_extended (in, ustr + len - in);
	if (wc & 0x80000000 || !UNICODE_VALID (wc))
	    return _cairo_error (CAIRO_STATUS_INVALID_STRING);

	n_chars++;
	if (n_chars == INT_MAX)
	    return _cairo_error (CAIRO_STATUS_INVALID_STRING);

	in = UTF8_NEXT_CHAR (in);
    }

    if (result) {
	str32 = _cairo_malloc_ab (n_chars + 1, sizeof (uint32_t));
	if (!str32)
	    return _cairo_error (CAIRO_STATUS_NO_MEMORY);

	in = ustr;
	for (i=0; i < n_chars; i++) {
	    str32[i] = _utf8_get_char (in);
	    in = UTF8_NEXT_CHAR (in);
	}
	str32[i] = 0;

	*result = str32;
    }

    if (items_written)
	*items_written = n_chars;

    return CAIRO_STATUS_SUCCESS;
}