예제 #1
0
파일: gutf8.c 프로젝트: cosimoc/glib
/**
 * g_utf8_to_ucs4:
 * @str: a UTF-8 encoded string
 * @len: the maximum length of @str to use, in bytes. If @len < 0,
 *     then the string is nul-terminated.
 * @items_read: (out caller-allocates) (optional): location to store number of
  *    bytes read, or %NULL.
 *     If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will be
 *     returned in case @str contains a trailing partial
 *     character. If an error occurs then the index of the
 *     invalid input is stored here.
 * @items_written: (out caller-allocates) (optional): location to store number
 *     of characters written or %NULL. The value here stored does not include
 *     the trailing 0 character.
 * @error: location to store the error occurring, or %NULL to ignore
 *     errors. Any of the errors in #GConvertError other than
 *     %G_CONVERT_ERROR_NO_CONVERSION may occur.
 *
 * Convert a string from UTF-8 to a 32-bit fixed width
 * representation as UCS-4. A trailing 0 character will be added to the
 * string after the converted text.
 * 
 * Returns: a pointer to a newly allocated UCS-4 string.
 *     This value must be freed with g_free(). If an error occurs,
 *     %NULL will be returned and @error set.
 */
gunichar *
g_utf8_to_ucs4 (const gchar *str,
		glong        len,             
		glong       *items_read,      
		glong       *items_written,   
		GError     **error)
{
  gunichar *result = NULL;
  gint n_chars, i;
  const gchar *in;
  
  in = str;
  n_chars = 0;
  while ((len < 0 || str + len - in > 0) && *in)
    {
      gunichar wc = g_utf8_get_char_extended (in, len < 0 ? 6 : str + len - in);
      if (wc & 0x80000000)
	{
	  if (wc == (gunichar)-2)
	    {
	      if (items_read)
		break;
	      else
		g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,
                                     _("Partial character sequence at end of input"));
	    }
	  else
	    g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
                                 _("Invalid byte sequence in conversion input"));

	  goto err_out;
	}

      n_chars++;

      in = g_utf8_next_char (in);
    }

  result = try_malloc_n (n_chars + 1, sizeof (gunichar), error);
  if (result == NULL)
      goto err_out;

  in = str;
  for (i=0; i < n_chars; i++)
    {
      result[i] = g_utf8_get_char (in);
      in = g_utf8_next_char (in);
    }
  result[i] = 0;

  if (items_written)
    *items_written = n_chars;

 err_out:
  if (items_read)
    *items_read = in - str;

  return result;
}
예제 #2
0
파일: gutf8.c 프로젝트: cosimoc/glib
/**
 * g_ucs4_to_utf8:
 * @str: a UCS-4 encoded string
 * @len: the maximum length (number of characters) of @str to use. 
 *     If @len < 0, then the string is nul-terminated.
 * @items_read: (out caller-allocates) (optional): location to store number of
 *     characters read, or %NULL.
 * @items_written: (out caller-allocates) (optional): location to store number
 *     of bytes written or %NULL. The value here stored does not include the
 *     trailing 0 byte.
 * @error: location to store the error occurring, or %NULL to ignore
 *         errors. Any of the errors in #GConvertError other than
 *         %G_CONVERT_ERROR_NO_CONVERSION may occur.
 *
 * Convert a string from a 32-bit fixed width representation as UCS-4.
 * to UTF-8. The result will be terminated with a 0 byte.
 * 
 * Returns: a pointer to a newly allocated UTF-8 string.
 *     This value must be freed with g_free(). If an error occurs,
 *     %NULL will be returned and @error set. In that case, @items_read
 *     will be set to the position of the first invalid input character.
 */
gchar *
g_ucs4_to_utf8 (const gunichar *str,
		glong           len,              
		glong          *items_read,       
		glong          *items_written,    
		GError        **error)
{
  gint result_length;
  gchar *result = NULL;
  gchar *p;
  gint i;

  result_length = 0;
  for (i = 0; len < 0 || i < len ; i++)
    {
      if (!str[i])
	break;

      if (str[i] >= 0x80000000)
	{
	  g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
                               _("Character out of range for UTF-8"));
	  goto err_out;
	}
      
      result_length += UTF8_LENGTH (str[i]);
    }

  result = try_malloc_n (result_length + 1, 1, error);
  if (result == NULL)
      goto err_out;

  p = result;

  i = 0;
  while (p < result + result_length)
    p += g_unichar_to_utf8 (str[i++], p);
  
  *p = '\0';

  if (items_written)
    *items_written = p - result;

 err_out:
  if (items_read)
    *items_read = i;

  return result;
}
예제 #3
0
void * simple_segregated_storage<SizeType>::malloc_n(const size_type n,
    const size_type partition_size)
{
  void * start = &first;
  void * iter;
  do
  {
    if (nextof(start) == 0)
      return 0;
    iter = try_malloc_n(start, n, partition_size);
  } while (iter == 0);
  void * const ret = nextof(start);
  nextof(start) = nextof(iter);
  return ret;
}
예제 #4
0
파일: gutf8.c 프로젝트: cosimoc/glib
/**
 * g_ucs4_to_utf16:
 * @str: a UCS-4 encoded string
 * @len: the maximum length (number of characters) of @str to use. 
 *     If @len < 0, then the string is nul-terminated.
 * @items_read: (out caller-allocates) (optional): location to store number of
 *     bytes read, or %NULL. If an error occurs then the index of the invalid
 *     input is stored here.
 * @items_written: (out caller-allocates) (optional): location to store number
 *     of #gunichar2  written, or %NULL. The value stored here does not include
 *     the trailing 0.
 * @error: location to store the error occurring, or %NULL to ignore
 *     errors. Any of the errors in #GConvertError other than
 *     %G_CONVERT_ERROR_NO_CONVERSION may occur.
 *
 * Convert a string from UCS-4 to UTF-16. A 0 character will be
 * added to the result after the converted text.
 * 
 * Returns: a pointer to a newly allocated UTF-16 string.
 *     This value must be freed with g_free(). If an error occurs,
 *     %NULL will be returned and @error set.
 */
gunichar2 *
g_ucs4_to_utf16 (const gunichar  *str,
		 glong            len,              
		 glong           *items_read,       
		 glong           *items_written,    
		 GError         **error)
{
  gunichar2 *result = NULL;
  gint n16;
  gint i, j;

  n16 = 0;
  i = 0;
  while ((len < 0 || i < len) && str[i])
    {
      gunichar wc = str[i];

      if (wc < 0xd800)
	n16 += 1;
      else if (wc < 0xe000)
	{
	  g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
                               _("Invalid sequence in conversion input"));

	  goto err_out;
	}
      else if (wc < 0x10000)
	n16 += 1;
      else if (wc < 0x110000)
	n16 += 2;
      else
	{
	  g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
                               _("Character out of range for UTF-16"));

	  goto err_out;
	}

      i++;
    }

  result = try_malloc_n (n16 + 1, sizeof (gunichar2), error);
  if (result == NULL)
      goto err_out;

  for (i = 0, j = 0; j < n16; i++)
    {
      gunichar wc = str[i];

      if (wc < 0x10000)
	{
	  result[j++] = wc;
	}
      else
	{
	  result[j++] = (wc - 0x10000) / 0x400 + 0xd800;
	  result[j++] = (wc - 0x10000) % 0x400 + 0xdc00;
	}
    }
  result[j] = 0;

  if (items_written)
    *items_written = n16;
  
 err_out:
  if (items_read)
    *items_read = i;
  
  return result;
}
예제 #5
0
파일: gutf8.c 프로젝트: cosimoc/glib
/**
 * g_utf8_to_utf16:
 * @str: a UTF-8 encoded string
 * @len: the maximum length (number of bytes) of @str to use.
 *     If @len < 0, then the string is nul-terminated.
 * @items_read: (out caller-allocates) (optional): location to store number of
 *     bytes read, or %NULL. If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will
 *     be returned in case @str contains a trailing partial character. If
 *     an error occurs then the index of the invalid input is stored here.
 * @items_written: (out caller-allocates) (optional): location to store number
 *     of #gunichar2 written, or %NULL. The value stored here does not include
 *     the trailing 0.
 * @error: location to store the error occurring, or %NULL to ignore
 *     errors. Any of the errors in #GConvertError other than
 *     %G_CONVERT_ERROR_NO_CONVERSION may occur.
 *
 * Convert a string from UTF-8 to UTF-16. A 0 character will be
 * added to the result after the converted text.
 *
 * Returns: a pointer to a newly allocated UTF-16 string.
 *     This value must be freed with g_free(). If an error occurs,
 *     %NULL will be returned and @error set.
 */
gunichar2 *
g_utf8_to_utf16 (const gchar *str,
		 glong        len,
		 glong       *items_read,
		 glong       *items_written,
		 GError     **error)
{
  gunichar2 *result = NULL;
  gint n16;
  const gchar *in;
  gint i;

  g_return_val_if_fail (str != NULL, NULL);

  in = str;
  n16 = 0;
  while ((len < 0 || str + len - in > 0) && *in)
    {
      gunichar wc = g_utf8_get_char_extended (in, len < 0 ? 6 : str + len - in);
      if (wc & 0x80000000)
	{
	  if (wc == (gunichar)-2)
	    {
	      if (items_read)
		break;
	      else
		g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,
                                     _("Partial character sequence at end of input"));
	    }
	  else
	    g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
                                 _("Invalid byte sequence in conversion input"));

	  goto err_out;
	}

      if (wc < 0xd800)
	n16 += 1;
      else if (wc < 0xe000)
	{
	  g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
                               _("Invalid sequence in conversion input"));

	  goto err_out;
	}
      else if (wc < 0x10000)
	n16 += 1;
      else if (wc < 0x110000)
	n16 += 2;
      else
	{
	  g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
                               _("Character out of range for UTF-16"));

	  goto err_out;
	}
      
      in = g_utf8_next_char (in);
    }

  result = try_malloc_n (n16 + 1, sizeof (gunichar2), error);
  if (result == NULL)
      goto err_out;

  in = str;
  for (i = 0; i < n16;)
    {
      gunichar wc = g_utf8_get_char (in);

      if (wc < 0x10000)
	{
	  result[i++] = wc;
	}
      else
	{
	  result[i++] = (wc - 0x10000) / 0x400 + 0xd800;
	  result[i++] = (wc - 0x10000) % 0x400 + 0xdc00;
	}
      
      in = g_utf8_next_char (in);
    }

  result[i] = 0;

  if (items_written)
    *items_written = n16;

 err_out:
  if (items_read)
    *items_read = in - str;
  
  return result;
}
예제 #6
0
파일: gutf8.c 프로젝트: cosimoc/glib
/**
 * g_utf16_to_ucs4:
 * @str: a UTF-16 encoded string
 * @len: the maximum length (number of #gunichar2) of @str to use. 
 *     If @len < 0, then the string is nul-terminated.
 * @items_read: (out caller-allocates) (optional): location to store number of
 *     words read, or %NULL. If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will
 *     be returned in case @str contains a trailing partial character. If
 *     an error occurs then the index of the invalid input is stored here.
 * @items_written: (out caller-allocates) (optional): location to store number
 *     of characters written, or %NULL. The value stored here does not include
 *     the trailing 0 character.
 * @error: location to store the error occurring, or %NULL to ignore
 *     errors. Any of the errors in #GConvertError other than
 *     %G_CONVERT_ERROR_NO_CONVERSION may occur.
 *
 * Convert a string from UTF-16 to UCS-4. The result will be
 * nul-terminated.
 * 
 * Returns: a pointer to a newly allocated UCS-4 string.
 *     This value must be freed with g_free(). If an error occurs,
 *     %NULL will be returned and @error set.
 */
gunichar *
g_utf16_to_ucs4 (const gunichar2  *str,
		 glong             len,              
		 glong            *items_read,       
		 glong            *items_written,    
		 GError          **error)
{
  const gunichar2 *in;
  gchar *out;
  gchar *result = NULL;
  gint n_bytes;
  gunichar high_surrogate;

  g_return_val_if_fail (str != NULL, NULL);

  n_bytes = 0;
  in = str;
  high_surrogate = 0;
  while ((len < 0 || in - str < len) && *in)
    {
      gunichar2 c = *in;

      if (c >= 0xdc00 && c < 0xe000) /* low surrogate */
	{
	  if (high_surrogate)
	    {
	      high_surrogate = 0;
	    }
	  else
	    {
	      g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
                                   _("Invalid sequence in conversion input"));
	      goto err_out;
	    }
	}
      else
	{
	  if (high_surrogate)
	    {
	      g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
                                   _("Invalid sequence in conversion input"));
	      goto err_out;
	    }

	  if (c >= 0xd800 && c < 0xdc00) /* high surrogate */
	    {
	      high_surrogate = c;
	      goto next1;
	    }
	}

      /********** DIFFERENT for UTF8/UCS4 **********/
      n_bytes += sizeof (gunichar);

    next1:
      in++;
    }

  if (high_surrogate && !items_read)
    {
      g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,
                           _("Partial character sequence at end of input"));
      goto err_out;
    }
  
  /* At this point, everything is valid, and we just need to convert
   */
  /********** DIFFERENT for UTF8/UCS4 **********/
  result = try_malloc_n (n_bytes + 4, 1, error);
  if (result == NULL)
      goto err_out;

  high_surrogate = 0;
  out = result;
  in = str;
  while (out < result + n_bytes)
    {
      gunichar2 c = *in;
      gunichar wc;

      if (c >= 0xdc00 && c < 0xe000) /* low surrogate */
	{
	  wc = SURROGATE_VALUE (high_surrogate, c);
	  high_surrogate = 0;
	}
      else if (c >= 0xd800 && c < 0xdc00) /* high surrogate */
	{
	  high_surrogate = c;
	  goto next2;
	}
      else
	wc = c;

      /********** DIFFERENT for UTF8/UCS4 **********/
      *(gunichar *)out = wc;
      out += sizeof (gunichar);

    next2:
      in++;
    }

  /********** DIFFERENT for UTF8/UCS4 **********/
  *(gunichar *)out = 0;

  if (items_written)
    /********** DIFFERENT for UTF8/UCS4 **********/
    *items_written = (out - result) / sizeof (gunichar);

 err_out:
  if (items_read)
    *items_read = in - str;

  return (gunichar *)result;
}
예제 #7
0
파일: gutf8.c 프로젝트: cosimoc/glib
/**
 * g_utf16_to_utf8:
 * @str: a UTF-16 encoded string
 * @len: the maximum length (number of #gunichar2) of @str to use. 
 *     If @len < 0, then the string is nul-terminated.
 * @items_read: (out caller-allocates) (optional): location to store number of
 *     words read, or %NULL. If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will
 *     be returned in case @str contains a trailing partial character. If
 *     an error occurs then the index of the invalid input is stored here.
 * @items_written: (out caller-allocates) (optional): location to store number
 *     of bytes written, or %NULL. The value stored here does not include the
 *     trailing 0 byte.
 * @error: location to store the error occurring, or %NULL to ignore
 *     errors. Any of the errors in #GConvertError other than
 *     %G_CONVERT_ERROR_NO_CONVERSION may occur.
 *
 * Convert a string from UTF-16 to UTF-8. The result will be
 * terminated with a 0 byte.
 *
 * Note that the input is expected to be already in native endianness,
 * an initial byte-order-mark character is not handled specially.
 * g_convert() can be used to convert a byte buffer of UTF-16 data of
 * ambiguous endianess.
 *
 * Further note that this function does not validate the result
 * string; it may e.g. include embedded NUL characters. The only
 * validation done by this function is to ensure that the input can
 * be correctly interpreted as UTF-16, i.e. it doesn't contain
 * things unpaired surrogates.
 *
 * Returns: a pointer to a newly allocated UTF-8 string.
 *     This value must be freed with g_free(). If an error occurs,
 *     %NULL will be returned and @error set.
 **/
gchar *
g_utf16_to_utf8 (const gunichar2  *str,
		 glong             len,
		 glong            *items_read,
		 glong            *items_written,
		 GError          **error)
{
  /* This function and g_utf16_to_ucs4 are almost exactly identical -
   * The lines that differ are marked.
   */
  const gunichar2 *in;
  gchar *out;
  gchar *result = NULL;
  gint n_bytes;
  gunichar high_surrogate;

  g_return_val_if_fail (str != NULL, NULL);

  n_bytes = 0;
  in = str;
  high_surrogate = 0;
  while ((len < 0 || in - str < len) && *in)
    {
      gunichar2 c = *in;
      gunichar wc;

      if (c >= 0xdc00 && c < 0xe000) /* low surrogate */
	{
	  if (high_surrogate)
	    {
	      wc = SURROGATE_VALUE (high_surrogate, c);
	      high_surrogate = 0;
	    }
	  else
	    {
	      g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
                                   _("Invalid sequence in conversion input"));
	      goto err_out;
	    }
	}
      else
	{
	  if (high_surrogate)
	    {
	      g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
                                   _("Invalid sequence in conversion input"));
	      goto err_out;
	    }

	  if (c >= 0xd800 && c < 0xdc00) /* high surrogate */
	    {
	      high_surrogate = c;
	      goto next1;
	    }
	  else
	    wc = c;
	}

      /********** DIFFERENT for UTF8/UCS4 **********/
      n_bytes += UTF8_LENGTH (wc);

    next1:
      in++;
    }

  if (high_surrogate && !items_read)
    {
      g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,
                           _("Partial character sequence at end of input"));
      goto err_out;
    }
  
  /* At this point, everything is valid, and we just need to convert
   */
  /********** DIFFERENT for UTF8/UCS4 **********/
  result = try_malloc_n (n_bytes + 1, 1, error);
  if (result == NULL)
      goto err_out;

  high_surrogate = 0;
  out = result;
  in = str;
  while (out < result + n_bytes)
    {
      gunichar2 c = *in;
      gunichar wc;

      if (c >= 0xdc00 && c < 0xe000) /* low surrogate */
	{
	  wc = SURROGATE_VALUE (high_surrogate, c);
	  high_surrogate = 0;
	}
      else if (c >= 0xd800 && c < 0xdc00) /* high surrogate */
	{
	  high_surrogate = c;
	  goto next2;
	}
      else
	wc = c;

      /********** DIFFERENT for UTF8/UCS4 **********/
      out += g_unichar_to_utf8 (wc, out);

    next2:
      in++;
    }
  
  /********** DIFFERENT for UTF8/UCS4 **********/
  *out = '\0';

  if (items_written)
    /********** DIFFERENT for UTF8/UCS4 **********/
    *items_written = out - result;

 err_out:
  if (items_read)
    *items_read = in - str;

  return result;
}