/** * g_utf8_to_ucs4: * @str: a UTF-8 encoded string * @len: the maximum length of @str to use, in bytes. If @len < 0, * then the string is nul-terminated. * @items_read: (out caller-allocates) (optional): location to store number of * bytes read, or %NULL. * If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will be * returned in case @str contains a trailing partial * character. If an error occurs then the index of the * invalid input is stored here. * @items_written: (out caller-allocates) (optional): location to store number * of characters written or %NULL. The value here stored does not include * the trailing 0 character. * @error: location to store the error occurring, or %NULL to ignore * errors. Any of the errors in #GConvertError other than * %G_CONVERT_ERROR_NO_CONVERSION may occur. * * Convert a string from UTF-8 to a 32-bit fixed width * representation as UCS-4. A trailing 0 character will be added to the * string after the converted text. * * Returns: a pointer to a newly allocated UCS-4 string. * This value must be freed with g_free(). If an error occurs, * %NULL will be returned and @error set. */ gunichar * g_utf8_to_ucs4 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **error) { gunichar *result = NULL; gint n_chars, i; const gchar *in; in = str; n_chars = 0; while ((len < 0 || str + len - in > 0) && *in) { gunichar wc = g_utf8_get_char_extended (in, len < 0 ? 6 : str + len - in); if (wc & 0x80000000) { if (wc == (gunichar)-2) { if (items_read) break; else g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT, _("Partial character sequence at end of input")); } else g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, _("Invalid byte sequence in conversion input")); goto err_out; } n_chars++; in = g_utf8_next_char (in); } result = try_malloc_n (n_chars + 1, sizeof (gunichar), error); if (result == NULL) goto err_out; in = str; for (i=0; i < n_chars; i++) { result[i] = g_utf8_get_char (in); in = g_utf8_next_char (in); } result[i] = 0; if (items_written) *items_written = n_chars; err_out: if (items_read) *items_read = in - str; return result; }
/** * g_ucs4_to_utf8: * @str: a UCS-4 encoded string * @len: the maximum length (number of characters) of @str to use. * If @len < 0, then the string is nul-terminated. * @items_read: (out caller-allocates) (optional): location to store number of * characters read, or %NULL. * @items_written: (out caller-allocates) (optional): location to store number * of bytes written or %NULL. The value here stored does not include the * trailing 0 byte. * @error: location to store the error occurring, or %NULL to ignore * errors. Any of the errors in #GConvertError other than * %G_CONVERT_ERROR_NO_CONVERSION may occur. * * Convert a string from a 32-bit fixed width representation as UCS-4. * to UTF-8. The result will be terminated with a 0 byte. * * Returns: a pointer to a newly allocated UTF-8 string. * This value must be freed with g_free(). If an error occurs, * %NULL will be returned and @error set. In that case, @items_read * will be set to the position of the first invalid input character. */ gchar * g_ucs4_to_utf8 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **error) { gint result_length; gchar *result = NULL; gchar *p; gint i; result_length = 0; for (i = 0; len < 0 || i < len ; i++) { if (!str[i]) break; if (str[i] >= 0x80000000) { g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, _("Character out of range for UTF-8")); goto err_out; } result_length += UTF8_LENGTH (str[i]); } result = try_malloc_n (result_length + 1, 1, error); if (result == NULL) goto err_out; p = result; i = 0; while (p < result + result_length) p += g_unichar_to_utf8 (str[i++], p); *p = '\0'; if (items_written) *items_written = p - result; err_out: if (items_read) *items_read = i; return result; }
void * simple_segregated_storage<SizeType>::malloc_n(const size_type n, const size_type partition_size) { void * start = &first; void * iter; do { if (nextof(start) == 0) return 0; iter = try_malloc_n(start, n, partition_size); } while (iter == 0); void * const ret = nextof(start); nextof(start) = nextof(iter); return ret; }
/** * g_ucs4_to_utf16: * @str: a UCS-4 encoded string * @len: the maximum length (number of characters) of @str to use. * If @len < 0, then the string is nul-terminated. * @items_read: (out caller-allocates) (optional): location to store number of * bytes read, or %NULL. If an error occurs then the index of the invalid * input is stored here. * @items_written: (out caller-allocates) (optional): location to store number * of #gunichar2 written, or %NULL. The value stored here does not include * the trailing 0. * @error: location to store the error occurring, or %NULL to ignore * errors. Any of the errors in #GConvertError other than * %G_CONVERT_ERROR_NO_CONVERSION may occur. * * Convert a string from UCS-4 to UTF-16. A 0 character will be * added to the result after the converted text. * * Returns: a pointer to a newly allocated UTF-16 string. * This value must be freed with g_free(). If an error occurs, * %NULL will be returned and @error set. */ gunichar2 * g_ucs4_to_utf16 (const gunichar *str, glong len, glong *items_read, glong *items_written, GError **error) { gunichar2 *result = NULL; gint n16; gint i, j; n16 = 0; i = 0; while ((len < 0 || i < len) && str[i]) { gunichar wc = str[i]; if (wc < 0xd800) n16 += 1; else if (wc < 0xe000) { g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, _("Invalid sequence in conversion input")); goto err_out; } else if (wc < 0x10000) n16 += 1; else if (wc < 0x110000) n16 += 2; else { g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, _("Character out of range for UTF-16")); goto err_out; } i++; } result = try_malloc_n (n16 + 1, sizeof (gunichar2), error); if (result == NULL) goto err_out; for (i = 0, j = 0; j < n16; i++) { gunichar wc = str[i]; if (wc < 0x10000) { result[j++] = wc; } else { result[j++] = (wc - 0x10000) / 0x400 + 0xd800; result[j++] = (wc - 0x10000) % 0x400 + 0xdc00; } } result[j] = 0; if (items_written) *items_written = n16; err_out: if (items_read) *items_read = i; return result; }
/** * g_utf8_to_utf16: * @str: a UTF-8 encoded string * @len: the maximum length (number of bytes) of @str to use. * If @len < 0, then the string is nul-terminated. * @items_read: (out caller-allocates) (optional): location to store number of * bytes read, or %NULL. If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will * be returned in case @str contains a trailing partial character. If * an error occurs then the index of the invalid input is stored here. * @items_written: (out caller-allocates) (optional): location to store number * of #gunichar2 written, or %NULL. The value stored here does not include * the trailing 0. * @error: location to store the error occurring, or %NULL to ignore * errors. Any of the errors in #GConvertError other than * %G_CONVERT_ERROR_NO_CONVERSION may occur. * * Convert a string from UTF-8 to UTF-16. A 0 character will be * added to the result after the converted text. * * Returns: a pointer to a newly allocated UTF-16 string. * This value must be freed with g_free(). If an error occurs, * %NULL will be returned and @error set. */ gunichar2 * g_utf8_to_utf16 (const gchar *str, glong len, glong *items_read, glong *items_written, GError **error) { gunichar2 *result = NULL; gint n16; const gchar *in; gint i; g_return_val_if_fail (str != NULL, NULL); in = str; n16 = 0; while ((len < 0 || str + len - in > 0) && *in) { gunichar wc = g_utf8_get_char_extended (in, len < 0 ? 6 : str + len - in); if (wc & 0x80000000) { if (wc == (gunichar)-2) { if (items_read) break; else g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT, _("Partial character sequence at end of input")); } else g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, _("Invalid byte sequence in conversion input")); goto err_out; } if (wc < 0xd800) n16 += 1; else if (wc < 0xe000) { g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, _("Invalid sequence in conversion input")); goto err_out; } else if (wc < 0x10000) n16 += 1; else if (wc < 0x110000) n16 += 2; else { g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, _("Character out of range for UTF-16")); goto err_out; } in = g_utf8_next_char (in); } result = try_malloc_n (n16 + 1, sizeof (gunichar2), error); if (result == NULL) goto err_out; in = str; for (i = 0; i < n16;) { gunichar wc = g_utf8_get_char (in); if (wc < 0x10000) { result[i++] = wc; } else { result[i++] = (wc - 0x10000) / 0x400 + 0xd800; result[i++] = (wc - 0x10000) % 0x400 + 0xdc00; } in = g_utf8_next_char (in); } result[i] = 0; if (items_written) *items_written = n16; err_out: if (items_read) *items_read = in - str; return result; }
/** * g_utf16_to_ucs4: * @str: a UTF-16 encoded string * @len: the maximum length (number of #gunichar2) of @str to use. * If @len < 0, then the string is nul-terminated. * @items_read: (out caller-allocates) (optional): location to store number of * words read, or %NULL. If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will * be returned in case @str contains a trailing partial character. If * an error occurs then the index of the invalid input is stored here. * @items_written: (out caller-allocates) (optional): location to store number * of characters written, or %NULL. The value stored here does not include * the trailing 0 character. * @error: location to store the error occurring, or %NULL to ignore * errors. Any of the errors in #GConvertError other than * %G_CONVERT_ERROR_NO_CONVERSION may occur. * * Convert a string from UTF-16 to UCS-4. The result will be * nul-terminated. * * Returns: a pointer to a newly allocated UCS-4 string. * This value must be freed with g_free(). If an error occurs, * %NULL will be returned and @error set. */ gunichar * g_utf16_to_ucs4 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **error) { const gunichar2 *in; gchar *out; gchar *result = NULL; gint n_bytes; gunichar high_surrogate; g_return_val_if_fail (str != NULL, NULL); n_bytes = 0; in = str; high_surrogate = 0; while ((len < 0 || in - str < len) && *in) { gunichar2 c = *in; if (c >= 0xdc00 && c < 0xe000) /* low surrogate */ { if (high_surrogate) { high_surrogate = 0; } else { g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, _("Invalid sequence in conversion input")); goto err_out; } } else { if (high_surrogate) { g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, _("Invalid sequence in conversion input")); goto err_out; } if (c >= 0xd800 && c < 0xdc00) /* high surrogate */ { high_surrogate = c; goto next1; } } /********** DIFFERENT for UTF8/UCS4 **********/ n_bytes += sizeof (gunichar); next1: in++; } if (high_surrogate && !items_read) { g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT, _("Partial character sequence at end of input")); goto err_out; } /* At this point, everything is valid, and we just need to convert */ /********** DIFFERENT for UTF8/UCS4 **********/ result = try_malloc_n (n_bytes + 4, 1, error); if (result == NULL) goto err_out; high_surrogate = 0; out = result; in = str; while (out < result + n_bytes) { gunichar2 c = *in; gunichar wc; if (c >= 0xdc00 && c < 0xe000) /* low surrogate */ { wc = SURROGATE_VALUE (high_surrogate, c); high_surrogate = 0; } else if (c >= 0xd800 && c < 0xdc00) /* high surrogate */ { high_surrogate = c; goto next2; } else wc = c; /********** DIFFERENT for UTF8/UCS4 **********/ *(gunichar *)out = wc; out += sizeof (gunichar); next2: in++; } /********** DIFFERENT for UTF8/UCS4 **********/ *(gunichar *)out = 0; if (items_written) /********** DIFFERENT for UTF8/UCS4 **********/ *items_written = (out - result) / sizeof (gunichar); err_out: if (items_read) *items_read = in - str; return (gunichar *)result; }
/** * g_utf16_to_utf8: * @str: a UTF-16 encoded string * @len: the maximum length (number of #gunichar2) of @str to use. * If @len < 0, then the string is nul-terminated. * @items_read: (out caller-allocates) (optional): location to store number of * words read, or %NULL. If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will * be returned in case @str contains a trailing partial character. If * an error occurs then the index of the invalid input is stored here. * @items_written: (out caller-allocates) (optional): location to store number * of bytes written, or %NULL. The value stored here does not include the * trailing 0 byte. * @error: location to store the error occurring, or %NULL to ignore * errors. Any of the errors in #GConvertError other than * %G_CONVERT_ERROR_NO_CONVERSION may occur. * * Convert a string from UTF-16 to UTF-8. The result will be * terminated with a 0 byte. * * Note that the input is expected to be already in native endianness, * an initial byte-order-mark character is not handled specially. * g_convert() can be used to convert a byte buffer of UTF-16 data of * ambiguous endianess. * * Further note that this function does not validate the result * string; it may e.g. include embedded NUL characters. The only * validation done by this function is to ensure that the input can * be correctly interpreted as UTF-16, i.e. it doesn't contain * things unpaired surrogates. * * Returns: a pointer to a newly allocated UTF-8 string. * This value must be freed with g_free(). If an error occurs, * %NULL will be returned and @error set. **/ gchar * g_utf16_to_utf8 (const gunichar2 *str, glong len, glong *items_read, glong *items_written, GError **error) { /* This function and g_utf16_to_ucs4 are almost exactly identical - * The lines that differ are marked. */ const gunichar2 *in; gchar *out; gchar *result = NULL; gint n_bytes; gunichar high_surrogate; g_return_val_if_fail (str != NULL, NULL); n_bytes = 0; in = str; high_surrogate = 0; while ((len < 0 || in - str < len) && *in) { gunichar2 c = *in; gunichar wc; if (c >= 0xdc00 && c < 0xe000) /* low surrogate */ { if (high_surrogate) { wc = SURROGATE_VALUE (high_surrogate, c); high_surrogate = 0; } else { g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, _("Invalid sequence in conversion input")); goto err_out; } } else { if (high_surrogate) { g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE, _("Invalid sequence in conversion input")); goto err_out; } if (c >= 0xd800 && c < 0xdc00) /* high surrogate */ { high_surrogate = c; goto next1; } else wc = c; } /********** DIFFERENT for UTF8/UCS4 **********/ n_bytes += UTF8_LENGTH (wc); next1: in++; } if (high_surrogate && !items_read) { g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT, _("Partial character sequence at end of input")); goto err_out; } /* At this point, everything is valid, and we just need to convert */ /********** DIFFERENT for UTF8/UCS4 **********/ result = try_malloc_n (n_bytes + 1, 1, error); if (result == NULL) goto err_out; high_surrogate = 0; out = result; in = str; while (out < result + n_bytes) { gunichar2 c = *in; gunichar wc; if (c >= 0xdc00 && c < 0xe000) /* low surrogate */ { wc = SURROGATE_VALUE (high_surrogate, c); high_surrogate = 0; } else if (c >= 0xd800 && c < 0xdc00) /* high surrogate */ { high_surrogate = c; goto next2; } else wc = c; /********** DIFFERENT for UTF8/UCS4 **********/ out += g_unichar_to_utf8 (wc, out); next2: in++; } /********** DIFFERENT for UTF8/UCS4 **********/ *out = '\0'; if (items_written) /********** DIFFERENT for UTF8/UCS4 **********/ *items_written = out - result; err_out: if (items_read) *items_read = in - str; return result; }