PyObject *
PyIMEngine::py_update_aux_string (PyIMEngineObject *self, PyObject *args)
{
	Py_UNICODE *str = NULL;
	PyObject *pAttrs = NULL;
	AttributeList attrs;

#if Py_UNICODE_SIZE == 4
	if (!PyArg_ParseTuple (args, "u|O:update_aux_string", &str, &pAttrs))
		return NULL;

	self->engine.update_aux_string (WideString ((wchar_t *)str),
								Attributes_FromTupleOrList (pAttrs));
#else
	int size = 0;
	gunichar *unistr = NULL;
	if (!PyArg_ParseTuple (args, "u#|O:update_aux_string", &str, &size, &pAttrs))
		return NULL;

	unistr = g_utf16_to_ucs4 (str, size, NULL, NULL, NULL);
	self->engine.update_aux_string (WideString ((wchar_t *)unistr),
								Attributes_FromTupleOrList (pAttrs));
	g_free (unistr);
#endif

	Py_INCREF (Py_None);
	return Py_None;
}
PyObject *
PyIMEngine::py_commit_string (PyIMEngineObject *self, PyObject *args)
{
	Py_UNICODE *str = NULL;

#if Py_UNICODE_SIZE == 4
	if (!PyArg_ParseTuple (args, "u:commit_string", &str))
		return NULL;

	self->engine.commit_string (WideString ((wchar_t *)str));
#else
	int size = 0;
	gunichar *unistr = NULL;

	if (!PyArg_ParseTuple (args, "u#:commit_string", &str, &size))
		return NULL;

	unistr = g_utf16_to_ucs4 (str, size, NULL, NULL, NULL);
	self->engine.commit_string (WideString ((wchar_t *)unistr));
	g_free (unistr);
#endif

	Py_INCREF (Py_None);
	return Py_None;
}
Ejemplo n.º 3
0
gunichar
glibsharp_utf16_to_unichar (guint16 chr)
{
	gunichar *ucs4_str;
	gunichar result;

	ucs4_str = g_utf16_to_ucs4 (&chr, 1, NULL, NULL, NULL);
	result = *ucs4_str;
	g_free (ucs4_str);
	return result;
}
Ejemplo n.º 4
0
/**
 * gjs_string_to_ucs4:
 * @cx: a #JSContext
 * @str: rooted JSString
 * @ucs4_string_p: return location for a #gunichar array
 * @len_p: return location for @ucs4_string_p length
 *
 * Returns: true on success, false otherwise in which case a JS error is thrown
 */
bool
gjs_string_to_ucs4(JSContext       *cx,
                   JS::HandleString str,
                   gunichar       **ucs4_string_p,
                   size_t          *len_p)
{
    if (ucs4_string_p == NULL)
        return true;

    JSAutoRequest ar(cx);
    size_t len;
    GError *error = NULL;

    if (JS_StringHasLatin1Chars(str))
        return from_latin1(cx, str, ucs4_string_p, len_p);

    /* From this point on, crash if a GC is triggered while we are using
     * the string's chars */
    JS::AutoCheckCannotGC nogc;

    const char16_t *utf16 =
        JS_GetTwoByteStringCharsAndLength(cx, nogc, str, &len);

    if (utf16 == NULL) {
        gjs_throw(cx, "Failed to get UTF-16 string data");
        return false;
    }

    if (ucs4_string_p != NULL) {
        long length;
        *ucs4_string_p = g_utf16_to_ucs4(reinterpret_cast<const gunichar2 *>(utf16),
                                         len, NULL, &length, &error);
        if (*ucs4_string_p == NULL) {
            gjs_throw(cx, "Failed to convert UTF-16 string to UCS-4: %s",
                      error->message);
            g_clear_error(&error);
            return false;
        }
        if (len_p != NULL)
            *len_p = (size_t) length;
    }

    return true;
}
Ejemplo n.º 5
0
/**
 * gjs_string_to_ucs4:
 * @cx: a #JSContext
 * @value: JS::Value containing a string
 * @ucs4_string_p: return location for a #gunichar array
 * @len_p: return location for @ucs4_string_p length
 *
 * Returns: true on success, false otherwise in which case a JS error is thrown
 */
bool
gjs_string_to_ucs4(JSContext      *cx,
                   JS::HandleValue value,
                   gunichar      **ucs4_string_p,
                   size_t         *len_p)
{
    if (ucs4_string_p == NULL)
        return true;

    if (!value.isString()) {
        gjs_throw(cx, "Value is not a string, cannot convert to UCS-4");
        return false;
    }

    JSAutoRequest ar(cx);
    JS::RootedString str(cx, value.toString());
    size_t utf16_len;
    GError *error = NULL;

    const char16_t *utf16 = JS_GetStringCharsAndLength(cx, str, &utf16_len);
    if (utf16 == NULL) {
        gjs_throw(cx, "Failed to get UTF-16 string data");
        return false;
    }

    if (ucs4_string_p != NULL) {
        long length;
        *ucs4_string_p = g_utf16_to_ucs4(reinterpret_cast<const gunichar2 *>(utf16),
                                         utf16_len, NULL, &length, &error);
        if (*ucs4_string_p == NULL) {
            gjs_throw(cx, "Failed to convert UTF-16 string to UCS-4: %s",
                      error->message);
            g_clear_error(&error);
            return false;
        }
        if (len_p != NULL)
            *len_p = (size_t) length;
    }

    return true;
}
Ejemplo n.º 6
0
static VALUE
rg_s_to_ucs4(G_GNUC_UNUSED VALUE self, VALUE rb_utf16)
{
    VALUE result;
    gunichar *ucs4;
    gunichar2 *utf16;
    glong len, items_written;
    GError *error = NULL;

    utf16 = (gunichar2 *)(void *)StringValueCStr(rb_utf16);
    len = RSTRING_LEN(rb_utf16) / sizeof(*utf16);

    ucs4 = g_utf16_to_ucs4(utf16, len, NULL, &items_written, &error);

    if (error)
        RAISE_GERROR(error);

    result = CSTR2RVAL_LEN_UCS4((char *)ucs4, items_written * sizeof(*ucs4));
    g_free(ucs4);
    return result;
}
Ejemplo n.º 7
0
RESULT
test_utf16_to_ucs4 ()
{
	static gunichar2 str1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
	static gunichar exp1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'};
	static gunichar2 str2[7] = {'H', 0xD800, 0xDC01,0xD800,0xDBFF,'l','\0'};
	static gunichar exp2[3] = {'H',0x00010001,'\0'};
	static gunichar2 str3[4] = {'H', 0xDC00 ,'l','\0'};
	static gunichar exp3[2] = {'H','\0'};
	static gunichar2 str4[20] = {0xDC00,0xDFFF,0xDFF,0xD800,0xDBFF,0xD800,0xDC00,0xD800,0xDFFF,
				     0xD800,0xE000,0xDBFF,0xDBFF,0xDBFF,0xDC00,0xDBFF,0xDFFF,0xDBFF,0xE000,'\0'};
	static gunichar exp4[6] = {0xDFF,0x10000,0x103ff,0x10fc00,0x10FFFF,'\0'};
	static gunichar2 str5[3] = {0xD801, 0xDC00, 0};
	static gunichar exp5[2] = {0x10400, 0};
	static glong read_write[33] = {1,0,0,1,0,0,1,1,1,2,1,0,2,2,1,2,2,1,2,1,0,2,1,0,2,2,1,2,2,1,2,1,0};
	gunichar* res;
	glong items_read, items_written, current_read_index,current_write_index;
	GError* err=0;
	RESULT check_result;
	glong i;
	
	res = g_utf16_to_ucs4 (str1, 12, &items_read, &items_written, &err);
	check_result = utf16_to_ucs4_check_result (res, exp1, items_read, 11, items_written, 11, err, FALSE);
	if (check_result) return check_result;
	g_free (res);
	
	items_read = items_written = 0;
	res = g_utf16_to_ucs4 (str2, 0, &items_read, &items_written, &err);
	check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 0, items_written, 0, err, FALSE);
	if (check_result) return check_result;
	g_free (res);
	
	items_read = items_written = 0;
	res = g_utf16_to_ucs4 (str2, 1, &items_read, &items_written, &err);
	check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 1, items_written, 1, err, FALSE);
	if (check_result) return check_result;
	g_free (res);
	
	items_read = items_written = 0;
	res = g_utf16_to_ucs4 (str2, 2, &items_read, &items_written, &err);
	check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 1, items_written, 1, err, FALSE);
	if (check_result) return check_result;
	g_free (res);
	
	items_read = items_written = 0;
	res = g_utf16_to_ucs4 (str2, 3, &items_read, &items_written, &err);
	check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 3, items_written, 2, err, FALSE);
	if (check_result) return check_result;
	g_free (res);
	
	items_read = items_written = 0;
	res = g_utf16_to_ucs4 (str2, 4, &items_read, &items_written, &err);
	check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 3, items_written, 2, err, FALSE);
	if (check_result) return check_result;
	g_free (res);
	
	items_read = items_written = 0;
	res = g_utf16_to_ucs4 (str2, 5, &items_read, &items_written, &err);
	check_result = utf16_to_ucs4_check_result (res, exp2, items_read, 4, items_written, 0, err, TRUE);
	if (check_result) return check_result;
	g_free (res);
	
	items_read = items_written = 0;
	err = 0;
	res = g_utf16_to_ucs4 (str3, 5, &items_read, &items_written, &err);
	check_result = utf16_to_ucs4_check_result (res, exp3, items_read, 1, items_written, 0, err, TRUE);
	if (check_result) return check_result;
	g_free (res);
	
	// This loop tests the bounds of the conversion algorithm
	current_read_index = current_write_index = 0;
	for (i=0;i<11;i++) {
		items_read = items_written = 0;
		err = 0;
		res = g_utf16_to_ucs4 (&str4[current_read_index], read_write[i*3], &items_read, &items_written, &err);
		check_result = utf16_to_ucs4_check_result (res, &exp4[current_write_index], items_read, 
					     read_write[(i*3)+1], items_written, read_write[(i*3)+2], err, 
					     !read_write[(i*3)+2]);
		if (check_result) return check_result;
		g_free (res);
		current_read_index += read_write[i*3];
		current_write_index += items_written;
	}

	items_read = items_written = 0;
	err = 0;
	res = g_utf16_to_ucs4 (str5, 2, &items_read, &items_written, &err);
	check_result = utf16_to_ucs4_check_result (res, exp5, items_read, 2, items_written, 1, err, FALSE);
	if (check_result) return check_result;
	g_free (res);

	return OK;
}
Ejemplo n.º 8
0
static void
process (gint      line,
	 gchar    *utf8,
	 Status    status,
	 gunichar *ucs4,
	 gint      ucs4_len)
{
  const gchar *end;
  gboolean is_valid = g_utf8_validate (utf8, -1, &end);
  GError *error = NULL;
  glong items_read, items_written;

  switch (status)
    {
    case VALID:
      if (!is_valid)
	{
	  fail ("line %d: valid but g_utf8_validate returned FALSE\n", line);
	  return;
	}
      break;
    case NOTUNICODE:
    case INCOMPLETE:
    case OVERLONG:
    case MALFORMED:
      if (is_valid)
	{
	  fail ("line %d: invalid but g_utf8_validate returned TRUE\n", line);
	  return;
	}
      break;
    }

  if (status == INCOMPLETE)
    {
      gunichar *ucs4_result;      

      ucs4_result = g_utf8_to_ucs4 (utf8, -1, NULL, NULL, &error);

      if (!error || !g_error_matches (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT))
	{
	  fail ("line %d: incomplete input not properly detected\n", line);
	  return;
	}
      g_clear_error (&error);

      ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, NULL, &error);

      if (!ucs4_result || items_read == strlen (utf8))
	{
	  fail ("line %d: incomplete input not properly detected\n", line);
	  return;
	}

      g_free (ucs4_result);
    }

  if (status == VALID || status == NOTUNICODE)
    {
      gunichar *ucs4_result;
      gchar *utf8_result;

      ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, &items_written, &error);
      if (!ucs4_result)
	{
	  fail ("line %d: conversion to ucs4 failed: %s\n", line, error->message);
	  return;
	}
      
      if (!ucs4_equal (ucs4_result, ucs4) ||
	  items_read != strlen (utf8) ||
	  items_written != ucs4_len)
	{
	  fail ("line %d: results of conversion to ucs4 do not match expected.\n", line);
	  return;
	}

      g_free (ucs4_result);

      ucs4_result = g_utf8_to_ucs4_fast (utf8, -1, &items_written);
      
      if (!ucs4_equal (ucs4_result, ucs4) ||
	  items_written != ucs4_len)
	{
	  fail ("line %d: results of conversion to ucs4 do not match expected.\n", line);
	  return;
	}

      utf8_result = g_ucs4_to_utf8 (ucs4_result, -1, &items_read, &items_written, &error);
      if (!utf8_result)
	{
	  fail ("line %d: conversion back to utf8 failed: %s", line, error->message);
	  return;
	}

      if (strcmp (utf8_result, utf8) != 0 ||
	  items_read != ucs4_len ||
	  items_written != strlen (utf8))
	{
	  fail ("line %d: conversion back to utf8 did not match original\n", line);
	  return;
	}

      g_free (utf8_result);
      g_free (ucs4_result);
    }

  if (status == VALID)
    {
      gunichar2 *utf16_expected_tmp;
      gunichar2 *utf16_expected;
      gunichar2 *utf16_from_utf8;
      gunichar2 *utf16_from_ucs4;
      gunichar *ucs4_result;
      gsize bytes_written;
      gint n_chars;
      gchar *utf8_result;

#if G_BYTE_ORDER == G_LITTLE_ENDIAN
#define TARGET "UTF-16LE"
#else
#define TARGET "UTF-16"
#endif

      if (!(utf16_expected_tmp = (gunichar2 *)g_convert (utf8, -1, TARGET, "UTF-8",
							 NULL, &bytes_written, NULL)))
	{
	  fail ("line %d: could not convert to UTF-16 via g_convert\n", line);
	  return;
	}

      /* zero-terminate and remove BOM
       */
      n_chars = bytes_written / 2;
      if (utf16_expected_tmp[0] == 0xfeff) /* BOM */
	{
	  n_chars--;
	  utf16_expected = g_new (gunichar2, n_chars + 1);
	  memcpy (utf16_expected, utf16_expected_tmp + 1, sizeof(gunichar2) * n_chars);
	}
      else if (utf16_expected_tmp[0] == 0xfffe) /* ANTI-BOM */
	{
	  fail ("line %d: conversion via iconv to \"UTF-16\" is not native-endian\n", line);
	  return;
	}
      else
	{
	  utf16_expected = g_new (gunichar2, n_chars + 1);
	  memcpy (utf16_expected, utf16_expected_tmp, sizeof(gunichar2) * n_chars);
	}

      utf16_expected[n_chars] = '\0';
      
      if (!(utf16_from_utf8 = g_utf8_to_utf16 (utf8, -1, &items_read, &items_written, &error)))
	{
	  fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message);
	  return;
	}

      if (items_read != strlen (utf8) ||
	  utf16_count (utf16_from_utf8) != items_written)
	{
	  fail ("line %d: length error in conversion to ucs16\n", line);
	  return;
	}

      if (!(utf16_from_ucs4 = g_ucs4_to_utf16 (ucs4, -1, &items_read, &items_written, &error)))
	{
	  fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message);
	  return;
	}

      if (items_read != ucs4_len ||
	  utf16_count (utf16_from_ucs4) != items_written)
	{
	  fail ("line %d: length error in conversion to ucs16\n", line);
	  return;
	}

      if (!utf16_equal (utf16_from_utf8, utf16_expected) ||
	  !utf16_equal (utf16_from_ucs4, utf16_expected))
	{
	  fail ("line %d: results of conversion to ucs16 do not match\n", line);
	  return;
	}

      if (!(utf8_result = g_utf16_to_utf8 (utf16_from_utf8, -1, &items_read, &items_written, &error)))
	{
	  fail ("line %d: conversion back to utf8 failed: %s\n", line, error->message);
	  return;
	}

      if (items_read != utf16_count (utf16_from_utf8) ||
	  items_written != strlen (utf8))
	{
	  fail ("line %d: length error in conversion from ucs16 to utf8\n", line);
	  return;
	}

      if (!(ucs4_result = g_utf16_to_ucs4 (utf16_from_ucs4, -1, &items_read, &items_written, &error)))
	{
	  fail ("line %d: conversion back to utf8/ucs4 failed\n", line);
	  return;
	}

      if (items_read != utf16_count (utf16_from_utf8) ||
	  items_written != ucs4_len)
	{
	  fail ("line %d: length error in conversion from ucs16 to ucs4\n", line);
	  return;
	}

      if (strcmp (utf8, utf8_result) != 0 ||
	  !ucs4_equal (ucs4, ucs4_result))
	{
	  fail ("line %d: conversion back to utf8/ucs4 did not match original\n", line);
	  return;
	}
      
      g_free (utf16_expected_tmp);
      g_free (utf16_expected);
      g_free (utf16_from_utf8);
      g_free (utf16_from_ucs4);
      g_free (utf8_result);
      g_free (ucs4_result);
    }
}