PyObject * PyIMEngine::py_update_aux_string (PyIMEngineObject *self, PyObject *args) { Py_UNICODE *str = NULL; PyObject *pAttrs = NULL; AttributeList attrs; #if Py_UNICODE_SIZE == 4 if (!PyArg_ParseTuple (args, "u|O:update_aux_string", &str, &pAttrs)) return NULL; self->engine.update_aux_string (WideString ((wchar_t *)str), Attributes_FromTupleOrList (pAttrs)); #else int size = 0; gunichar *unistr = NULL; if (!PyArg_ParseTuple (args, "u#|O:update_aux_string", &str, &size, &pAttrs)) return NULL; unistr = g_utf16_to_ucs4 (str, size, NULL, NULL, NULL); self->engine.update_aux_string (WideString ((wchar_t *)unistr), Attributes_FromTupleOrList (pAttrs)); g_free (unistr); #endif Py_INCREF (Py_None); return Py_None; }
PyObject * PyIMEngine::py_commit_string (PyIMEngineObject *self, PyObject *args) { Py_UNICODE *str = NULL; #if Py_UNICODE_SIZE == 4 if (!PyArg_ParseTuple (args, "u:commit_string", &str)) return NULL; self->engine.commit_string (WideString ((wchar_t *)str)); #else int size = 0; gunichar *unistr = NULL; if (!PyArg_ParseTuple (args, "u#:commit_string", &str, &size)) return NULL; unistr = g_utf16_to_ucs4 (str, size, NULL, NULL, NULL); self->engine.commit_string (WideString ((wchar_t *)unistr)); g_free (unistr); #endif Py_INCREF (Py_None); return Py_None; }
gunichar glibsharp_utf16_to_unichar (guint16 chr) { gunichar *ucs4_str; gunichar result; ucs4_str = g_utf16_to_ucs4 (&chr, 1, NULL, NULL, NULL); result = *ucs4_str; g_free (ucs4_str); return result; }
/** * gjs_string_to_ucs4: * @cx: a #JSContext * @str: rooted JSString * @ucs4_string_p: return location for a #gunichar array * @len_p: return location for @ucs4_string_p length * * Returns: true on success, false otherwise in which case a JS error is thrown */ bool gjs_string_to_ucs4(JSContext *cx, JS::HandleString str, gunichar **ucs4_string_p, size_t *len_p) { if (ucs4_string_p == NULL) return true; JSAutoRequest ar(cx); size_t len; GError *error = NULL; if (JS_StringHasLatin1Chars(str)) return from_latin1(cx, str, ucs4_string_p, len_p); /* From this point on, crash if a GC is triggered while we are using * the string's chars */ JS::AutoCheckCannotGC nogc; const char16_t *utf16 = JS_GetTwoByteStringCharsAndLength(cx, nogc, str, &len); if (utf16 == NULL) { gjs_throw(cx, "Failed to get UTF-16 string data"); return false; } if (ucs4_string_p != NULL) { long length; *ucs4_string_p = g_utf16_to_ucs4(reinterpret_cast<const gunichar2 *>(utf16), len, NULL, &length, &error); if (*ucs4_string_p == NULL) { gjs_throw(cx, "Failed to convert UTF-16 string to UCS-4: %s", error->message); g_clear_error(&error); return false; } if (len_p != NULL) *len_p = (size_t) length; } return true; }
/** * gjs_string_to_ucs4: * @cx: a #JSContext * @value: JS::Value containing a string * @ucs4_string_p: return location for a #gunichar array * @len_p: return location for @ucs4_string_p length * * Returns: true on success, false otherwise in which case a JS error is thrown */ bool gjs_string_to_ucs4(JSContext *cx, JS::HandleValue value, gunichar **ucs4_string_p, size_t *len_p) { if (ucs4_string_p == NULL) return true; if (!value.isString()) { gjs_throw(cx, "Value is not a string, cannot convert to UCS-4"); return false; } JSAutoRequest ar(cx); JS::RootedString str(cx, value.toString()); size_t utf16_len; GError *error = NULL; const char16_t *utf16 = JS_GetStringCharsAndLength(cx, str, &utf16_len); if (utf16 == NULL) { gjs_throw(cx, "Failed to get UTF-16 string data"); return false; } if (ucs4_string_p != NULL) { long length; *ucs4_string_p = g_utf16_to_ucs4(reinterpret_cast<const gunichar2 *>(utf16), utf16_len, NULL, &length, &error); if (*ucs4_string_p == NULL) { gjs_throw(cx, "Failed to convert UTF-16 string to UCS-4: %s", error->message); g_clear_error(&error); return false; } if (len_p != NULL) *len_p = (size_t) length; } return true; }
static VALUE rg_s_to_ucs4(G_GNUC_UNUSED VALUE self, VALUE rb_utf16) { VALUE result; gunichar *ucs4; gunichar2 *utf16; glong len, items_written; GError *error = NULL; utf16 = (gunichar2 *)(void *)StringValueCStr(rb_utf16); len = RSTRING_LEN(rb_utf16) / sizeof(*utf16); ucs4 = g_utf16_to_ucs4(utf16, len, NULL, &items_written, &error); if (error) RAISE_GERROR(error); result = CSTR2RVAL_LEN_UCS4((char *)ucs4, items_written * sizeof(*ucs4)); g_free(ucs4); return result; }
RESULT test_utf16_to_ucs4 () { static gunichar2 str1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'}; static gunichar exp1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'}; static gunichar2 str2[7] = {'H', 0xD800, 0xDC01,0xD800,0xDBFF,'l','\0'}; static gunichar exp2[3] = {'H',0x00010001,'\0'}; static gunichar2 str3[4] = {'H', 0xDC00 ,'l','\0'}; static gunichar exp3[2] = {'H','\0'}; static gunichar2 str4[20] = {0xDC00,0xDFFF,0xDFF,0xD800,0xDBFF,0xD800,0xDC00,0xD800,0xDFFF, 0xD800,0xE000,0xDBFF,0xDBFF,0xDBFF,0xDC00,0xDBFF,0xDFFF,0xDBFF,0xE000,'\0'}; static gunichar exp4[6] = {0xDFF,0x10000,0x103ff,0x10fc00,0x10FFFF,'\0'}; static gunichar2 str5[3] = {0xD801, 0xDC00, 0}; static gunichar exp5[2] = {0x10400, 0}; static glong read_write[33] = {1,0,0,1,0,0,1,1,1,2,1,0,2,2,1,2,2,1,2,1,0,2,1,0,2,2,1,2,2,1,2,1,0}; gunichar* res; glong items_read, items_written, current_read_index,current_write_index; GError* err=0; RESULT check_result; glong i; res = g_utf16_to_ucs4 (str1, 12, &items_read, &items_written, &err); check_result = utf16_to_ucs4_check_result (res, exp1, items_read, 11, items_written, 11, err, FALSE); if (check_result) return check_result; g_free (res); items_read = items_written = 0; res = g_utf16_to_ucs4 (str2, 0, &items_read, &items_written, &err); check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 0, items_written, 0, err, FALSE); if (check_result) return check_result; g_free (res); items_read = items_written = 0; res = g_utf16_to_ucs4 (str2, 1, &items_read, &items_written, &err); check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 1, items_written, 1, err, FALSE); if (check_result) return check_result; g_free (res); items_read = items_written = 0; res = g_utf16_to_ucs4 (str2, 2, &items_read, &items_written, &err); check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 1, items_written, 1, err, FALSE); if (check_result) return check_result; g_free (res); items_read = items_written = 0; res = g_utf16_to_ucs4 (str2, 3, &items_read, &items_written, &err); check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 3, items_written, 2, err, FALSE); if (check_result) return check_result; g_free (res); items_read = items_written = 0; res = g_utf16_to_ucs4 (str2, 4, &items_read, &items_written, &err); check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 3, items_written, 2, err, FALSE); if (check_result) return check_result; g_free (res); items_read = items_written = 0; res = g_utf16_to_ucs4 (str2, 5, &items_read, &items_written, &err); check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 4, items_written, 0, err, TRUE); if (check_result) return check_result; g_free (res); items_read = items_written = 0; err = 0; res = g_utf16_to_ucs4 (str3, 5, &items_read, &items_written, &err); check_result = utf16_to_ucs4_check_result (res, exp3, items_read, 1, items_written, 0, err, TRUE); if (check_result) return check_result; g_free (res); // This loop tests the bounds of the conversion algorithm current_read_index = current_write_index = 0; for (i=0;i<11;i++) { items_read = items_written = 0; err = 0; res = g_utf16_to_ucs4 (&str4[current_read_index], read_write[i*3], &items_read, &items_written, &err); check_result = utf16_to_ucs4_check_result (res, &exp4[current_write_index], items_read, read_write[(i*3)+1], items_written, read_write[(i*3)+2], err, !read_write[(i*3)+2]); if (check_result) return check_result; g_free (res); current_read_index += read_write[i*3]; current_write_index += items_written; } items_read = items_written = 0; err = 0; res = g_utf16_to_ucs4 (str5, 2, &items_read, &items_written, &err); check_result = utf16_to_ucs4_check_result (res, exp5, items_read, 2, items_written, 1, err, FALSE); if (check_result) return check_result; g_free (res); return OK; }
static void process (gint line, gchar *utf8, Status status, gunichar *ucs4, gint ucs4_len) { const gchar *end; gboolean is_valid = g_utf8_validate (utf8, -1, &end); GError *error = NULL; glong items_read, items_written; switch (status) { case VALID: if (!is_valid) { fail ("line %d: valid but g_utf8_validate returned FALSE\n", line); return; } break; case NOTUNICODE: case INCOMPLETE: case OVERLONG: case MALFORMED: if (is_valid) { fail ("line %d: invalid but g_utf8_validate returned TRUE\n", line); return; } break; } if (status == INCOMPLETE) { gunichar *ucs4_result; ucs4_result = g_utf8_to_ucs4 (utf8, -1, NULL, NULL, &error); if (!error || !g_error_matches (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT)) { fail ("line %d: incomplete input not properly detected\n", line); return; } g_clear_error (&error); ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, NULL, &error); if (!ucs4_result || items_read == strlen (utf8)) { fail ("line %d: incomplete input not properly detected\n", line); return; } g_free (ucs4_result); } if (status == VALID || status == NOTUNICODE) { gunichar *ucs4_result; gchar *utf8_result; ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, &items_written, &error); if (!ucs4_result) { fail ("line %d: conversion to ucs4 failed: %s\n", line, error->message); return; } if (!ucs4_equal (ucs4_result, ucs4) || items_read != strlen (utf8) || items_written != ucs4_len) { fail ("line %d: results of conversion to ucs4 do not match expected.\n", line); return; } g_free (ucs4_result); ucs4_result = g_utf8_to_ucs4_fast (utf8, -1, &items_written); if (!ucs4_equal (ucs4_result, ucs4) || items_written != ucs4_len) { fail ("line %d: results of conversion to ucs4 do not match expected.\n", line); return; } utf8_result = g_ucs4_to_utf8 (ucs4_result, -1, &items_read, &items_written, &error); if (!utf8_result) { fail ("line %d: conversion back to utf8 failed: %s", line, error->message); return; } if (strcmp (utf8_result, utf8) != 0 || items_read != ucs4_len || items_written != strlen (utf8)) { fail ("line %d: conversion back to utf8 did not match original\n", line); return; } g_free (utf8_result); g_free (ucs4_result); } if (status == VALID) { gunichar2 *utf16_expected_tmp; gunichar2 *utf16_expected; gunichar2 *utf16_from_utf8; gunichar2 *utf16_from_ucs4; gunichar *ucs4_result; gsize bytes_written; gint n_chars; gchar *utf8_result; #if G_BYTE_ORDER == G_LITTLE_ENDIAN #define TARGET "UTF-16LE" #else #define TARGET "UTF-16" #endif if (!(utf16_expected_tmp = (gunichar2 *)g_convert (utf8, -1, TARGET, "UTF-8", NULL, &bytes_written, NULL))) { fail ("line %d: could not convert to UTF-16 via g_convert\n", line); return; } /* zero-terminate and remove BOM */ n_chars = bytes_written / 2; if (utf16_expected_tmp[0] == 0xfeff) /* BOM */ { n_chars--; utf16_expected = g_new (gunichar2, n_chars + 1); memcpy (utf16_expected, utf16_expected_tmp + 1, sizeof(gunichar2) * n_chars); } else if (utf16_expected_tmp[0] == 0xfffe) /* ANTI-BOM */ { fail ("line %d: conversion via iconv to \"UTF-16\" is not native-endian\n", line); return; } else { utf16_expected = g_new (gunichar2, n_chars + 1); memcpy (utf16_expected, utf16_expected_tmp, sizeof(gunichar2) * n_chars); } utf16_expected[n_chars] = '\0'; if (!(utf16_from_utf8 = g_utf8_to_utf16 (utf8, -1, &items_read, &items_written, &error))) { fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message); return; } if (items_read != strlen (utf8) || utf16_count (utf16_from_utf8) != items_written) { fail ("line %d: length error in conversion to ucs16\n", line); return; } if (!(utf16_from_ucs4 = g_ucs4_to_utf16 (ucs4, -1, &items_read, &items_written, &error))) { fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message); return; } if (items_read != ucs4_len || utf16_count (utf16_from_ucs4) != items_written) { fail ("line %d: length error in conversion to ucs16\n", line); return; } if (!utf16_equal (utf16_from_utf8, utf16_expected) || !utf16_equal (utf16_from_ucs4, utf16_expected)) { fail ("line %d: results of conversion to ucs16 do not match\n", line); return; } if (!(utf8_result = g_utf16_to_utf8 (utf16_from_utf8, -1, &items_read, &items_written, &error))) { fail ("line %d: conversion back to utf8 failed: %s\n", line, error->message); return; } if (items_read != utf16_count (utf16_from_utf8) || items_written != strlen (utf8)) { fail ("line %d: length error in conversion from ucs16 to utf8\n", line); return; } if (!(ucs4_result = g_utf16_to_ucs4 (utf16_from_ucs4, -1, &items_read, &items_written, &error))) { fail ("line %d: conversion back to utf8/ucs4 failed\n", line); return; } if (items_read != utf16_count (utf16_from_utf8) || items_written != ucs4_len) { fail ("line %d: length error in conversion from ucs16 to ucs4\n", line); return; } if (strcmp (utf8, utf8_result) != 0 || !ucs4_equal (ucs4, ucs4_result)) { fail ("line %d: conversion back to utf8/ucs4 did not match original\n", line); return; } g_free (utf16_expected_tmp); g_free (utf16_expected); g_free (utf16_from_utf8); g_free (utf16_from_ucs4); g_free (utf8_result); g_free (ucs4_result); } }