String PlatformKeyboardEvent::singleCharacterString(unsigned val) { switch (val) { case GDK_ISO_Enter: case GDK_KP_Enter: case GDK_Return: return String("\r"); case GDK_BackSpace: return String("\x8"); case GDK_Tab: return String("\t"); default: gunichar c = gdk_keyval_to_unicode(val); glong nwc; gunichar2* uchar16 = g_ucs4_to_utf16(&c, 1, 0, &nwc, 0); String retVal; if (uchar16) retVal = String((UChar*)uchar16, nwc); else retVal = String(); g_free(uchar16); return retVal; } }
PyObject * PyIMEngine::py_get_surrounding_text (PyIMEngineObject *self, PyObject *args) { PyObject *tuple; int maxlen_before = -1; int maxlen_after = -1; if (!PyArg_ParseTuple (args, "|ii:get_surrounding_text", &maxlen_before, &maxlen_after)) return NULL; WideString text; int cursor; int provided = self->engine.get_surrounding_text(text, cursor, maxlen_before, maxlen_after); tuple = PyTuple_New (2); if (!provided) { text = L""; cursor = 0; } #if Py_UNICODE_SIZE == 4 PyTuple_SET_ITEM (tuple, 0, PyUnicode_FromUnicode ((Py_UNICODE *)text.c_str(), text.length())); #else gunichar2 *utf16_str = g_ucs4_to_utf16 (text.c_str(), -1, NULL, NULL, NULL); PyTuple_SET_ITEM (tuple, 0, PyUnicode_FromUnicode ((Py_UNICODE *)utf16_str, text.length())); #endif PyTuple_SET_ITEM (tuple, 1, PyInt_FromLong ((long) cursor)); return tuple; }
/** * gjs_string_from_ucs4: * @cx: a #JSContext * @ucs4_string: string of #gunichar * @n_chars: number of characters in @ucs4_string or -1 for zero-terminated * @value_p: JS::Value that will be filled with a string * * Returns: true on success, false otherwise in which case a JS error is thrown */ bool gjs_string_from_ucs4(JSContext *cx, const gunichar *ucs4_string, ssize_t n_chars, JS::MutableHandleValue value_p) { long u16_string_length; GError *error = NULL; char16_t *u16_string = reinterpret_cast<char16_t *>(g_ucs4_to_utf16(ucs4_string, n_chars, NULL, &u16_string_length, &error)); if (!u16_string) { gjs_throw(cx, "Failed to convert UCS-4 string to UTF-16: %s", error->message); g_error_free(error); return false; } JSAutoRequest ar(cx); /* Avoid a copy - assumes that g_malloc == js_malloc == malloc */ JS::RootedString str(cx, JS_NewUCString(cx, u16_string, u16_string_length)); if (!str) { gjs_throw(cx, "Failed to convert UCS-4 string to UTF-16"); return false; } value_p.setString(str); return true; }
static String singleCharacterStringForKeyEvent(const WPE::Input::KeyboardEvent& event) { const char* singleCharacter = WPE::Input::singleCharacterForKeyEvent(event); if (singleCharacter) return String(singleCharacter); glong length; GUniquePtr<gunichar2> uchar16(g_ucs4_to_utf16(&event.unicode, 1, 0, &length, nullptr)); if (uchar16) return String(uchar16.get()); return String(); }
static VALUE rg_s_to_utf16(G_GNUC_UNUSED VALUE self, VALUE rb_ucs4) { VALUE result; gunichar *ucs4; gunichar2 *utf16; glong len, items_written; GError *error = NULL; ucs4 = (gunichar *)StringValuePtr(rb_ucs4); len = RSTRING_LEN(rb_ucs4) / sizeof(*ucs4); utf16 = g_ucs4_to_utf16(ucs4, len, NULL, &items_written, &error); if (error) RAISE_GERROR(error); result = CSTR2RVAL_LEN_UTF16((char *)utf16, items_written * sizeof(*utf16)); g_free(utf16); return result; }
static String singleCharacterString(guint val) { switch (val) { case GDK_ISO_Enter: case GDK_KP_Enter: case GDK_Return: return String("\r"); default: gunichar c = gdk_keyval_to_unicode(val); glong nwc; gunichar2* uchar16 = g_ucs4_to_utf16(&c, 1, 0, &nwc, 0); String retVal; if (uchar16) retVal = String((UChar*)uchar16, nwc); else retVal = String(); g_free(uchar16); return retVal; } }
bool WebPopupMenuProxyGtk::typeAheadFind(GdkEventKey* event) { // If we were given a non-printable character just skip it. gunichar unicodeCharacter = gdk_keyval_to_unicode(event->keyval); if (!g_unichar_isprint(unicodeCharacter)) { resetTypeAheadFindState(); return false; } glong charactersWritten; GUniquePtr<gunichar2> utf16String(g_ucs4_to_utf16(&unicodeCharacter, 1, nullptr, &charactersWritten, nullptr)); if (!utf16String) { resetTypeAheadFindState(); return false; } // If the character is the same as the last character, the user is probably trying to // cycle through the menulist entries. This matches the WebCore behavior for collapsed menulists. static const uint32_t searchTimeoutMs = 1000; bool repeatingCharacter = unicodeCharacter != m_previousKeyEventCharacter; if (event->time - m_previousKeyEventTimestamp > searchTimeoutMs) m_currentSearchString = String(reinterpret_cast<UChar*>(utf16String.get()), charactersWritten); else if (repeatingCharacter) m_currentSearchString.append(String(reinterpret_cast<UChar*>(utf16String.get()), charactersWritten)); m_previousKeyEventTimestamp = event->time; m_previousKeyEventCharacter = unicodeCharacter; GUniquePtr<GList> children(gtk_container_get_children(GTK_CONTAINER(m_popup))); if (!children) return true; // We case fold before searching, because strncmp does not handle non-ASCII characters. GUniquePtr<gchar> searchStringWithCaseFolded(g_utf8_casefold(m_currentSearchString.utf8().data(), -1)); size_t prefixLength = strlen(searchStringWithCaseFolded.get()); // If a menu item has already been selected, start searching from the current // item down the list. This will make multiple key presses of the same character // advance the selection. GList* currentChild = children.get(); if (m_currentlySelectedMenuItem) { currentChild = g_list_find(children.get(), m_currentlySelectedMenuItem); if (!currentChild) { m_currentlySelectedMenuItem = nullptr; currentChild = children.get(); } // Repeating characters should iterate. if (repeatingCharacter) { if (GList* nextChild = g_list_next(currentChild)) currentChild = nextChild; } } GList* firstChild = currentChild; do { currentChild = g_list_next(currentChild); if (!currentChild) currentChild = children.get(); GUniquePtr<gchar> itemText(g_utf8_casefold(gtk_menu_item_get_label(GTK_MENU_ITEM(currentChild->data)), -1)); if (!strncmp(searchStringWithCaseFolded.get(), itemText.get(), prefixLength)) { gtk_menu_shell_select_item(GTK_MENU_SHELL(m_popup), GTK_WIDGET(currentChild->data)); break; } } while (currentChild != firstChild); return true; }
RESULT test_ucs4_to_utf16 () { static gunichar str1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'}; static gunichar2 exp1[12] = {'H','e','l','l','o',' ','W','o','r','l','d','\0'}; static gunichar str2[3] = {'h',0x80000000,'\0'}; static gunichar2 exp2[2] = {'h','\0'}; static gunichar str3[3] = {'h',0xDA00,'\0'}; static gunichar str4[3] = {'h',0x10FFFF,'\0'}; static gunichar2 exp4[4] = {'h',0xdbff,0xdfff,'\0'}; static gunichar str5[7] = {0xD7FF,0xD800,0xDFFF,0xE000,0x110000,0x10FFFF,'\0'}; static gunichar2 exp5[5] = {0xD7FF,0xE000,0xdbff,0xdfff,'\0'}; static gunichar str6[2] = {0x10400, '\0'}; static gunichar2 exp6[3] = {0xD801, 0xDC00, '\0'}; static glong read_write[12] = {1,1,0,0,0,0,1,1,0,0,1,2}; gunichar2* res; glong items_read, items_written, current_write_index; GError* err=0; RESULT check_result; glong i; res = g_ucs4_to_utf16 (str1, 12, &items_read, &items_written, &err); check_result = ucs4_to_utf16_check_result (res, exp1, items_read, 11, items_written, 11, err, FALSE); if (check_result) return check_result; g_free (res); items_read = items_written = 0; res = g_ucs4_to_utf16 (str2, 0, &items_read, &items_written, &err); check_result = ucs4_to_utf16_check_result (res, exp2, items_read, 0, items_written, 0, err, FALSE); if (check_result) return check_result; g_free (res); items_read = items_written = 0; res = g_ucs4_to_utf16 (str2, 1, &items_read, &items_written, &err); check_result = ucs4_to_utf16_check_result (res, exp2, items_read, 1, items_written, 1, err, FALSE); if (check_result) return check_result; g_free (res); items_read = items_written = 0; res = g_ucs4_to_utf16 (str2, 2, &items_read, &items_written, &err); check_result = ucs4_to_utf16_check_result (res, 0, items_read, 1, items_written, 0, err, TRUE); g_free (res); if (check_result) return check_result; items_read = items_written = 0; err = 0; res = g_ucs4_to_utf16 (str3, 2, &items_read, &items_written, &err); check_result = ucs4_to_utf16_check_result (res, 0, items_read, 1, items_written, 0, err, TRUE); if (check_result) return check_result; g_free (res); items_read = items_written = 0; err = 0; res = g_ucs4_to_utf16 (str4, 5, &items_read, &items_written, &err); check_result = ucs4_to_utf16_check_result (res, exp4, items_read, 2, items_written, 3, err, FALSE); if (check_result) return check_result; g_free (res); // This loop tests the bounds of the conversion algorithm current_write_index = 0; for (i=0;i<6;i++) { items_read = items_written = 0; err = 0; res = g_ucs4_to_utf16 (&str5[i], 1, &items_read, &items_written, &err); check_result = ucs4_to_utf16_check_result (res, &exp5[current_write_index], items_read, read_write[i*2], items_written, read_write[(i*2)+1], err, !read_write[(i*2)+1]); if (check_result) return check_result; g_free (res); current_write_index += items_written; } items_read = items_written = 0; err = 0; res = g_ucs4_to_utf16 (str6, 1, &items_read, &items_written, &err); check_result = ucs4_to_utf16_check_result (res, exp6, items_read, 1, items_written, 2, err, FALSE); if (check_result) return check_result; g_free (res); return OK; }
static gsize win32_strftime_helper (const GDate *d, const gchar *format, const struct tm *tm, gchar *s, gsize slen) { SYSTEMTIME systemtime; TIME_ZONE_INFORMATION tzinfo; LCID lcid; int n, k; GArray *result; const gchar *p; gunichar c; const wchar_t digits[] = L"0123456789"; gchar *convbuf; glong convlen = 0; gsize retval; systemtime.wYear = tm->tm_year + 1900; systemtime.wMonth = tm->tm_mon + 1; systemtime.wDayOfWeek = tm->tm_wday; systemtime.wDay = tm->tm_mday; systemtime.wHour = tm->tm_hour; systemtime.wMinute = tm->tm_min; systemtime.wSecond = tm->tm_sec; systemtime.wMilliseconds = 0; lcid = GetThreadLocale (); result = g_array_sized_new (FALSE, FALSE, sizeof (wchar_t), MAX (128, strlen (format) * 2)); p = format; while (*p) { c = g_utf8_get_char (p); if (c == '%') { p = g_utf8_next_char (p); if (!*p) { s[0] = '\0'; g_array_free (result, TRUE); return 0; } c = g_utf8_get_char (p); if (c == 'E' || c == 'O') { /* Ignore modified conversion specifiers for now. */ p = g_utf8_next_char (p); if (!*p) { s[0] = '\0'; g_array_free (result, TRUE); return 0; } c = g_utf8_get_char (p); } switch (c) { case 'a': if (systemtime.wDayOfWeek == 0) k = 6; else k = systemtime.wDayOfWeek - 1; n = GetLocaleInfoW (lcid, LOCALE_SABBREVDAYNAME1+k, NULL, 0); g_array_set_size (result, result->len + n); GetLocaleInfoW (lcid, LOCALE_SABBREVDAYNAME1+k, ((wchar_t *) result->data) + result->len - n, n); g_array_set_size (result, result->len - 1); break; case 'A': if (systemtime.wDayOfWeek == 0) k = 6; else k = systemtime.wDayOfWeek - 1; n = GetLocaleInfoW (lcid, LOCALE_SDAYNAME1+k, NULL, 0); g_array_set_size (result, result->len + n); GetLocaleInfoW (lcid, LOCALE_SDAYNAME1+k, ((wchar_t *) result->data) + result->len - n, n); g_array_set_size (result, result->len - 1); break; case 'b': case 'h': n = GetLocaleInfoW (lcid, LOCALE_SABBREVMONTHNAME1+systemtime.wMonth-1, NULL, 0); g_array_set_size (result, result->len + n); GetLocaleInfoW (lcid, LOCALE_SABBREVMONTHNAME1+systemtime.wMonth-1, ((wchar_t *) result->data) + result->len - n, n); g_array_set_size (result, result->len - 1); break; case 'B': n = GetLocaleInfoW (lcid, LOCALE_SMONTHNAME1+systemtime.wMonth-1, NULL, 0); g_array_set_size (result, result->len + n); GetLocaleInfoW (lcid, LOCALE_SMONTHNAME1+systemtime.wMonth-1, ((wchar_t *) result->data) + result->len - n, n); g_array_set_size (result, result->len - 1); break; case 'c': n = GetDateFormatW (lcid, 0, &systemtime, NULL, NULL, 0); if (n > 0) { g_array_set_size (result, result->len + n); GetDateFormatW (lcid, 0, &systemtime, NULL, ((wchar_t *) result->data) + result->len - n, n); g_array_set_size (result, result->len - 1); } g_array_append_vals (result, L" ", 1); n = GetTimeFormatW (lcid, 0, &systemtime, NULL, NULL, 0); if (n > 0) { g_array_set_size (result, result->len + n); GetTimeFormatW (lcid, 0, &systemtime, NULL, ((wchar_t *) result->data) + result->len - n, n); g_array_set_size (result, result->len - 1); } break; case 'C': g_array_append_vals (result, digits + systemtime.wYear/1000, 1); g_array_append_vals (result, digits + (systemtime.wYear/1000)%10, 1); break; case 'd': g_array_append_vals (result, digits + systemtime.wDay/10, 1); g_array_append_vals (result, digits + systemtime.wDay%10, 1); break; case 'D': g_array_append_vals (result, digits + systemtime.wMonth/10, 1); g_array_append_vals (result, digits + systemtime.wMonth%10, 1); g_array_append_vals (result, L"/", 1); g_array_append_vals (result, digits + systemtime.wDay/10, 1); g_array_append_vals (result, digits + systemtime.wDay%10, 1); g_array_append_vals (result, L"/", 1); g_array_append_vals (result, digits + (systemtime.wYear/10)%10, 1); g_array_append_vals (result, digits + systemtime.wYear%10, 1); break; case 'e': if (systemtime.wDay >= 10) g_array_append_vals (result, digits + systemtime.wDay/10, 1); else g_array_append_vals (result, L" ", 1); g_array_append_vals (result, digits + systemtime.wDay%10, 1); break; /* A GDate has no time fields, so for now we can * hardcode all time conversions into zeros (or 12 for * %I). The alternative code snippets in the #else * branches are here ready to be taken into use when * needed by a g_strftime() or g_date_and_time_format() * or whatever. */ case 'H': #if 1 g_array_append_vals (result, L"00", 2); #else g_array_append_vals (result, digits + systemtime.wHour/10, 1); g_array_append_vals (result, digits + systemtime.wHour%10, 1); #endif break; case 'I': #if 1 g_array_append_vals (result, L"12", 2); #else if (systemtime.wHour == 0) g_array_append_vals (result, L"12", 2); else { g_array_append_vals (result, digits + (systemtime.wHour%12)/10, 1); g_array_append_vals (result, digits + (systemtime.wHour%12)%10, 1); } #endif break; case 'j': g_array_append_vals (result, digits + (tm->tm_yday+1)/100, 1); g_array_append_vals (result, digits + ((tm->tm_yday+1)/10)%10, 1); g_array_append_vals (result, digits + (tm->tm_yday+1)%10, 1); break; case 'm': g_array_append_vals (result, digits + systemtime.wMonth/10, 1); g_array_append_vals (result, digits + systemtime.wMonth%10, 1); break; case 'M': #if 1 g_array_append_vals (result, L"00", 2); #else g_array_append_vals (result, digits + systemtime.wMinute/10, 1); g_array_append_vals (result, digits + systemtime.wMinute%10, 1); #endif break; case 'n': g_array_append_vals (result, L"\n", 1); break; case 'p': n = GetTimeFormatW (lcid, 0, &systemtime, L"tt", NULL, 0); if (n > 0) { g_array_set_size (result, result->len + n); GetTimeFormatW (lcid, 0, &systemtime, L"tt", ((wchar_t *) result->data) + result->len - n, n); g_array_set_size (result, result->len - 1); } break; case 'r': /* This is a rather odd format. Hard to say what to do. * Let's always use the POSIX %I:%M:%S %p */ #if 1 g_array_append_vals (result, L"12:00:00", 8); #else if (systemtime.wHour == 0) g_array_append_vals (result, L"12", 2); else { g_array_append_vals (result, digits + (systemtime.wHour%12)/10, 1); g_array_append_vals (result, digits + (systemtime.wHour%12)%10, 1); } g_array_append_vals (result, L":", 1); g_array_append_vals (result, digits + systemtime.wMinute/10, 1); g_array_append_vals (result, digits + systemtime.wMinute%10, 1); g_array_append_vals (result, L":", 1); g_array_append_vals (result, digits + systemtime.wSecond/10, 1); g_array_append_vals (result, digits + systemtime.wSecond%10, 1); g_array_append_vals (result, L" ", 1); #endif n = GetTimeFormatW (lcid, 0, &systemtime, L"tt", NULL, 0); if (n > 0) { g_array_set_size (result, result->len + n); GetTimeFormatW (lcid, 0, &systemtime, L"tt", ((wchar_t *) result->data) + result->len - n, n); g_array_set_size (result, result->len - 1); } break; case 'R': #if 1 g_array_append_vals (result, L"00:00", 5); #else g_array_append_vals (result, digits + systemtime.wHour/10, 1); g_array_append_vals (result, digits + systemtime.wHour%10, 1); g_array_append_vals (result, L":", 1); g_array_append_vals (result, digits + systemtime.wMinute/10, 1); g_array_append_vals (result, digits + systemtime.wMinute%10, 1); #endif break; case 'S': #if 1 g_array_append_vals (result, L"00", 2); #else g_array_append_vals (result, digits + systemtime.wSecond/10, 1); g_array_append_vals (result, digits + systemtime.wSecond%10, 1); #endif break; case 't': g_array_append_vals (result, L"\t", 1); break; case 'T': #if 1 g_array_append_vals (result, L"00:00:00", 8); #else g_array_append_vals (result, digits + systemtime.wHour/10, 1); g_array_append_vals (result, digits + systemtime.wHour%10, 1); g_array_append_vals (result, L":", 1); g_array_append_vals (result, digits + systemtime.wMinute/10, 1); g_array_append_vals (result, digits + systemtime.wMinute%10, 1); g_array_append_vals (result, L":", 1); g_array_append_vals (result, digits + systemtime.wSecond/10, 1); g_array_append_vals (result, digits + systemtime.wSecond%10, 1); #endif break; case 'u': if (systemtime.wDayOfWeek == 0) g_array_append_vals (result, L"7", 1); else g_array_append_vals (result, digits + systemtime.wDayOfWeek, 1); break; case 'U': n = g_date_get_sunday_week_of_year (d); g_array_append_vals (result, digits + n/10, 1); g_array_append_vals (result, digits + n%10, 1); break; case 'V': n = g_date_get_iso8601_week_of_year (d); g_array_append_vals (result, digits + n/10, 1); g_array_append_vals (result, digits + n%10, 1); break; case 'w': g_array_append_vals (result, digits + systemtime.wDayOfWeek, 1); break; case 'W': n = g_date_get_monday_week_of_year (d); g_array_append_vals (result, digits + n/10, 1); g_array_append_vals (result, digits + n%10, 1); break; case 'x': n = GetDateFormatW (lcid, 0, &systemtime, NULL, NULL, 0); if (n > 0) { g_array_set_size (result, result->len + n); GetDateFormatW (lcid, 0, &systemtime, NULL, ((wchar_t *) result->data) + result->len - n, n); g_array_set_size (result, result->len - 1); } break; case 'X': n = GetTimeFormatW (lcid, 0, &systemtime, NULL, NULL, 0); if (n > 0) { g_array_set_size (result, result->len + n); GetTimeFormatW (lcid, 0, &systemtime, NULL, ((wchar_t *) result->data) + result->len - n, n); g_array_set_size (result, result->len - 1); } break; case 'y': g_array_append_vals (result, digits + (systemtime.wYear/10)%10, 1); g_array_append_vals (result, digits + systemtime.wYear%10, 1); break; case 'Y': g_array_append_vals (result, digits + systemtime.wYear/1000, 1); g_array_append_vals (result, digits + (systemtime.wYear/100)%10, 1); g_array_append_vals (result, digits + (systemtime.wYear/10)%10, 1); g_array_append_vals (result, digits + systemtime.wYear%10, 1); break; case 'Z': n = GetTimeZoneInformation (&tzinfo); if (n == TIME_ZONE_ID_UNKNOWN) ; else if (n == TIME_ZONE_ID_STANDARD) g_array_append_vals (result, tzinfo.StandardName, wcslen (tzinfo.StandardName)); else if (n == TIME_ZONE_ID_DAYLIGHT) g_array_append_vals (result, tzinfo.DaylightName, wcslen (tzinfo.DaylightName)); break; case '%': g_array_append_vals (result, L"%", 1); break; } } else if (c <= 0xFFFF) { wchar_t wc = c; g_array_append_vals (result, &wc, 1); } else { glong nwc; wchar_t *ws; ws = g_ucs4_to_utf16 (&c, 1, NULL, &nwc, NULL); g_array_append_vals (result, ws, nwc); g_free (ws); } p = g_utf8_next_char (p); } convbuf = g_utf16_to_utf8 ((wchar_t *) result->data, result->len, NULL, &convlen, NULL); g_array_free (result, TRUE); if (!convbuf) { s[0] = '\0'; return 0; } if (slen <= convlen) { /* Ensure only whole characters are copied into the buffer. */ gchar *end = g_utf8_find_prev_char (convbuf, convbuf + slen); g_assert (end != NULL); convlen = end - convbuf; /* Return 0 because the buffer isn't large enough. */ retval = 0; } else retval = convlen; memcpy (s, convbuf, convlen); s[convlen] = '\0'; g_free (convbuf); return retval; }
static void lookup(const char *text, char ***pppWord, char ****ppppWordData) { if ((text[0] == '&' && text[1] == '#' && g_str_has_suffix(text, ";")) || g_str_has_prefix(text, "U+")) { gunichar uc; if (text[0] == '&') { if (text[2] == 'x' || text[2] == 'X') { uc = htoi(text+3); } else { uc = atoi(text+2); } } else { // U+ uc = htoi(text+2); } gchar utf8[7]; gint n = g_unichar_to_utf8(uc, utf8); utf8[n] = '\0'; *pppWord = (gchar **)g_malloc(sizeof(gchar *)*2); (*pppWord)[0] = g_strdup(text); (*pppWord)[1] = NULL; *ppppWordData = (gchar ***)g_malloc(sizeof(gchar **)*(1)); (*ppppWordData)[0] = (gchar **)g_malloc(sizeof(gchar *)*2); (*ppppWordData)[0][0] = build_dictdata('m', utf8); (*ppppWordData)[0][1] = NULL; return; } bool found; gunichar uc; if (g_utf8_strlen(text, -1) != 1) { found = false; // Don't query it. } else { uc = g_utf8_get_char(text); if (!gucharmap_unichar_validate (uc) || !gucharmap_unichar_isdefined (uc)) found = false; else found = true; } if (!found) { *pppWord = NULL; return; } std::string definition; definition += "\n"; gchar buf[12]; int n = gucharmap_unichar_to_printable_utf8 (uc, buf); if (n == 0) { definition += _("[not a printable character]"); } else { gchar *str = g_markup_escape_text(buf, n); definition += "<big><big><big><big>"; definition += str; definition += "</big></big></big></big>"; g_free(str); } definition += "\n\n"; gchar *temp; /* character name */ temp = g_strdup_printf ("U+%4.4X %s", uc, gucharmap_get_unicode_name (uc)); definition += "<big><b>"; definition += temp; definition += "</b></big>\n"; g_free (temp); definition += "\n<b>"; definition += _("General Character Properties"); definition += "</b>\n\n"; /* character category */ definition += get_vanilla_detail(_("Unicode category:"), gucharmap_get_unicode_category_name (uc)); /* canonical decomposition */ gunichar *decomposition; gsize result_len; decomposition = gucharmap_unicode_canonical_decomposition (uc, &result_len); if (result_len != 1) { definition += _("Canonical decomposition:"); definition += " "; definition += get_codepoint(decomposition[0]); for (gsize i = 1; i < result_len; i++) { definition += " + "; definition += get_codepoint(decomposition[i]); } definition += "\n"; } g_free (decomposition); /* representations */ if (g_unichar_break_type(uc) != G_UNICODE_BREAK_SURROGATE) { definition += "\n<b>"; definition += _("Various Useful Representations"); definition += "</b>\n\n"; guchar utf8[7]; gunichar2 *utf16; GString *gstemp; n = g_unichar_to_utf8 (uc, (gchar *)utf8); utf16 = g_ucs4_to_utf16 (&uc, 1, NULL, NULL, NULL); /* UTF-8 */ gstemp = g_string_new (NULL); gint i; for (i = 0; i < n; i++) g_string_append_printf (gstemp, "0x%2.2X ", utf8[i]); g_string_erase (gstemp, gstemp->len - 1, -1); definition += get_vanilla_detail(_("UTF-8:"), gstemp->str); g_string_free (gstemp, TRUE); /* UTF-16 */ gstemp = g_string_new (NULL); g_string_append_printf (gstemp, "0x%4.4X", utf16[0]); if (utf16[0] != '\0' && utf16[1] != '\0') g_string_append_printf (gstemp, " 0x%4.4X", utf16[1]); definition += get_vanilla_detail(_("UTF-16:"), gstemp->str); g_string_free (gstemp, TRUE); /* an empty line */ definition += "\n"; /* C octal \012\234 */ gstemp = g_string_new (NULL); for (i = 0; i < n; i++) g_string_append_printf (gstemp, "\\%3.3o", utf8[i]); definition += get_vanilla_detail(_("C octal escaped UTF-8:"), gstemp->str); g_string_free (gstemp, TRUE); /* XML entity */ if ((0x0001 <= uc && uc <= 0xD7FF) || (0xE000 <= uc && uc <= 0xFFFD) || (0x10000 <= uc && uc <= 0x10FFFF)) { temp = g_strdup_printf ("<kref>&#%d;</kref>", uc); definition += get_vanilla_detail(_("XML decimal entity:"), temp); g_free (temp); temp = g_strdup_printf ("<kref>&#x%X;</kref>", uc); definition += get_vanilla_detail(_("XML hexadecimal entity:"), temp); g_free (temp); } g_free(utf16); } /* annotations */ std::string annotations; /* nameslist equals (alias names) */ const gchar **csarr; csarr = gucharmap_get_nameslist_equals (uc); if (csarr != NULL) { annotations += get_chocolate_detail(_("Alias names:"), csarr, FALSE); g_free (csarr); } /* nameslist stars (notes) */ csarr = gucharmap_get_nameslist_stars (uc); if (csarr != NULL) { annotations += get_chocolate_detail(_("Notes:"), csarr, TRUE); g_free (csarr); } /* nameslist exes (see also) */ gunichar *ucs; ucs = gucharmap_get_nameslist_exes (uc); if (ucs != NULL) { annotations += get_chocolate_detail_codepoints(_("See also:"), ucs); g_free (ucs); } /* nameslist pounds (approximate equivalents) */ csarr = gucharmap_get_nameslist_pounds (uc); if (csarr != NULL) { annotations += get_chocolate_detail(_("Approximate equivalents:"), csarr, TRUE); g_free (csarr); } /* nameslist colons (equivalents) */ csarr = gucharmap_get_nameslist_colons (uc); if (csarr != NULL) { annotations += get_chocolate_detail(_("Equivalents:"), csarr, TRUE); g_free (csarr); } if (!annotations.empty()) { definition += "\n<b>"; definition += _("Annotations and Cross References"); definition += "</b>\n\n"; definition += annotations; } std::string unihan; const gchar *csp = gucharmap_get_unicode_kDefinition (uc); if (csp) unihan += get_vanilla_detail(_("Definition in English:"), csp); csp = gucharmap_get_unicode_kMandarin (uc); if (csp) unihan += get_vanilla_detail(_("Mandarin Pronunciation:"), csp); csp = gucharmap_get_unicode_kCantonese (uc); if (csp) unihan += get_vanilla_detail(_("Cantonese Pronunciation:"), csp); csp = gucharmap_get_unicode_kJapaneseOn (uc); if (csp) unihan += get_vanilla_detail(_("Japanese On Pronunciation:"), csp); csp = gucharmap_get_unicode_kJapaneseKun (uc); if (csp) unihan += get_vanilla_detail(_("Japanese Kun Pronunciation:"), csp); csp = gucharmap_get_unicode_kTang (uc); if (csp) unihan += get_vanilla_detail(_("Tang Pronunciation:"), csp); csp = gucharmap_get_unicode_kKorean (uc); if (csp) unihan += get_vanilla_detail(_("Korean Pronunciation:"), csp); if (!unihan.empty()) { definition += "\n<b>"; definition += _("CJK Ideograph Information"); definition += "</b>\n\n"; definition += unihan; } n = definition.length(); int l = n-1; while (l >= 0 && definition[l] == '\n') { l--; } if (l < n-1) { definition.erase(l+1, n-1-l); } *pppWord = (gchar **)g_malloc(sizeof(gchar *)*2); (*pppWord)[0] = g_strdup(text); (*pppWord)[1] = NULL; *ppppWordData = (gchar ***)g_malloc(sizeof(gchar **)*(1)); (*ppppWordData)[0] = (gchar **)g_malloc(sizeof(gchar *)*2); (*ppppWordData)[0][0] = build_dictdata('x', definition.c_str()); (*ppppWordData)[0][1] = NULL; }
static void set_details (GucharmapCharmap *charmap, gunichar uc) { GucharmapCharmapPrivate *priv = charmap->priv; GtkTextBuffer *buffer; GtkTextIter iter; GString *gstemp; gchar *temp; const gchar *csp; gchar buf[12]; guchar utf8[7]; gint n, i; const gchar **csarr; gunichar *ucs; gunichar2 *utf16; GucharmapUnicodeVersion version; buffer = gtk_text_view_get_buffer (priv->details_view); gtk_text_buffer_set_text (buffer, "", 0); gtk_text_buffer_get_start_iter (buffer, &iter); gtk_text_buffer_place_cursor (buffer, &iter); gtk_text_buffer_insert (buffer, &iter, "\n", -1); n = gucharmap_unichar_to_printable_utf8 (uc, buf); if (n == 0) gtk_text_buffer_insert ( buffer, &iter, _("[not a printable character]"), -1); else gtk_text_buffer_insert_with_tags_by_name (buffer, &iter, buf, n, "gimongous", NULL); gtk_text_buffer_insert (buffer, &iter, "\n\n", -1); /* character name */ temp = g_strdup_printf ("U+%4.4X %s\n", uc, gucharmap_get_unicode_name (uc)); gtk_text_buffer_insert_with_tags_by_name (buffer, &iter, temp, -1, "big", "bold", NULL); g_free (temp); insert_heading (charmap, buffer, &iter, _("General Character Properties")); /* Unicode version */ version = gucharmap_get_unicode_version (uc); if (version != GUCHARMAP_UNICODE_VERSION_UNASSIGNED) insert_vanilla_detail (charmap, buffer, &iter, _("In Unicode since:"), gucharmap_unicode_version_to_string (version)); /* character category */ insert_vanilla_detail (charmap, buffer, &iter, _("Unicode category:"), gucharmap_get_unicode_category_name (uc)); /* canonical decomposition */ conditionally_insert_canonical_decomposition (charmap, buffer, &iter, uc); /* representations */ if (g_unichar_break_type(uc) != G_UNICODE_BREAK_SURROGATE) { insert_heading (charmap, buffer, &iter, _("Various Useful Representations")); n = g_unichar_to_utf8 (uc, (gchar *)utf8); utf16 = g_ucs4_to_utf16 (&uc, 1, NULL, NULL, NULL); /* UTF-8 */ gstemp = g_string_new (NULL); for (i = 0; i < n; i++) g_string_append_printf (gstemp, "0x%2.2X ", utf8[i]); g_string_erase (gstemp, gstemp->len - 1, -1); insert_vanilla_detail (charmap, buffer, &iter, _("UTF-8:"), gstemp->str); g_string_free (gstemp, TRUE); /* UTF-16 */ gstemp = g_string_new (NULL); g_string_append_printf (gstemp, "0x%4.4X", utf16[0]); if (utf16[0] != '\0' && utf16[1] != '\0') g_string_append_printf (gstemp, " 0x%4.4X", utf16[1]); insert_vanilla_detail (charmap, buffer, &iter, _("UTF-16:"), gstemp->str); g_string_free (gstemp, TRUE); /* an empty line */ gtk_text_buffer_insert (buffer, &iter, "\n", -1); /* C octal \012\234 */ gstemp = g_string_new (NULL); for (i = 0; i < n; i++) g_string_append_printf (gstemp, "\\%3.3o", utf8[i]); insert_vanilla_detail (charmap, buffer, &iter, _("C octal escaped UTF-8:"), gstemp->str); g_string_free (gstemp, TRUE); /* XML decimal entity */ if ((0x0001 <= uc && uc <= 0xD7FF) || (0xE000 <= uc && uc <= 0xFFFD) || (0x10000 <= uc && uc <= 0x10FFFF)) { temp = g_strdup_printf ("&#%d;", uc); insert_vanilla_detail (charmap, buffer, &iter, _("XML decimal entity:"), temp); g_free (temp); } g_free(utf16); } /* annotations */ if (_gucharmap_unicode_has_nameslist_entry (uc)) { insert_heading (charmap, buffer, &iter, _("Annotations and Cross References")); /* nameslist equals (alias names) */ csarr = gucharmap_get_nameslist_equals (uc); if (csarr != NULL) { insert_chocolate_detail (charmap, buffer, &iter, _("Alias names:"), csarr, FALSE); g_free (csarr); } /* nameslist stars (notes) */ csarr = gucharmap_get_nameslist_stars (uc); if (csarr != NULL) { insert_chocolate_detail (charmap, buffer, &iter, _("Notes:"), csarr, TRUE); g_free (csarr); } /* nameslist exes (see also) */ ucs = gucharmap_get_nameslist_exes (uc); if (ucs != NULL) { insert_chocolate_detail_codepoints (charmap, buffer, &iter, _("See also:"), ucs); g_free (ucs); } /* nameslist pounds (approximate equivalents) */ csarr = gucharmap_get_nameslist_pounds (uc); if (csarr != NULL) { insert_chocolate_detail (charmap, buffer, &iter, _("Approximate equivalents:"), csarr, TRUE); g_free (csarr); } /* nameslist colons (equivalents) */ csarr = gucharmap_get_nameslist_colons (uc); if (csarr != NULL) { insert_chocolate_detail (charmap, buffer, &iter, _("Equivalents:"), csarr, TRUE); g_free (csarr); } } #if ENABLE_UNIHAN /* this isn't so bad efficiency-wise */ if (gucharmap_get_unicode_kDefinition (uc) || gucharmap_get_unicode_kCantonese (uc) || gucharmap_get_unicode_kMandarin (uc) || gucharmap_get_unicode_kJapaneseOn (uc) || gucharmap_get_unicode_kJapaneseKun (uc) || gucharmap_get_unicode_kTang (uc) || gucharmap_get_unicode_kKorean (uc)) { insert_heading (charmap, buffer, &iter, _("CJK Ideograph Information")); csp = gucharmap_get_unicode_kDefinition (uc); if (csp) insert_vanilla_detail (charmap, buffer, &iter, _("Definition in English:"), csp); csp = gucharmap_get_unicode_kMandarin (uc); if (csp) insert_vanilla_detail (charmap, buffer, &iter, _("Mandarin Pronunciation:"), csp); csp = gucharmap_get_unicode_kCantonese (uc); if (csp) insert_vanilla_detail (charmap, buffer, &iter, _("Cantonese Pronunciation:"), csp); csp = gucharmap_get_unicode_kJapaneseOn (uc); if (csp) insert_vanilla_detail (charmap, buffer, &iter, _("Japanese On Pronunciation:"), csp); csp = gucharmap_get_unicode_kJapaneseKun (uc); if (csp) insert_vanilla_detail (charmap, buffer, &iter, _("Japanese Kun Pronunciation:"), csp); csp = gucharmap_get_unicode_kTang (uc); if (csp) insert_vanilla_detail (charmap, buffer, &iter, _("Tang Pronunciation:"), csp); csp = gucharmap_get_unicode_kKorean (uc); if (csp) insert_vanilla_detail (charmap, buffer, &iter, _("Korean Pronunciation:"), csp); } #endif /* #if ENABLE_UNIHAN */ }
static void process (gint line, gchar *utf8, Status status, gunichar *ucs4, gint ucs4_len) { const gchar *end; gboolean is_valid = g_utf8_validate (utf8, -1, &end); GError *error = NULL; glong items_read, items_written; switch (status) { case VALID: if (!is_valid) { fail ("line %d: valid but g_utf8_validate returned FALSE\n", line); return; } break; case NOTUNICODE: case INCOMPLETE: case OVERLONG: case MALFORMED: if (is_valid) { fail ("line %d: invalid but g_utf8_validate returned TRUE\n", line); return; } break; } if (status == INCOMPLETE) { gunichar *ucs4_result; ucs4_result = g_utf8_to_ucs4 (utf8, -1, NULL, NULL, &error); if (!error || !g_error_matches (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT)) { fail ("line %d: incomplete input not properly detected\n", line); return; } g_clear_error (&error); ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, NULL, &error); if (!ucs4_result || items_read == strlen (utf8)) { fail ("line %d: incomplete input not properly detected\n", line); return; } g_free (ucs4_result); } if (status == VALID || status == NOTUNICODE) { gunichar *ucs4_result; gchar *utf8_result; ucs4_result = g_utf8_to_ucs4 (utf8, -1, &items_read, &items_written, &error); if (!ucs4_result) { fail ("line %d: conversion to ucs4 failed: %s\n", line, error->message); return; } if (!ucs4_equal (ucs4_result, ucs4) || items_read != strlen (utf8) || items_written != ucs4_len) { fail ("line %d: results of conversion to ucs4 do not match expected.\n", line); return; } g_free (ucs4_result); ucs4_result = g_utf8_to_ucs4_fast (utf8, -1, &items_written); if (!ucs4_equal (ucs4_result, ucs4) || items_written != ucs4_len) { fail ("line %d: results of conversion to ucs4 do not match expected.\n", line); return; } utf8_result = g_ucs4_to_utf8 (ucs4_result, -1, &items_read, &items_written, &error); if (!utf8_result) { fail ("line %d: conversion back to utf8 failed: %s", line, error->message); return; } if (strcmp (utf8_result, utf8) != 0 || items_read != ucs4_len || items_written != strlen (utf8)) { fail ("line %d: conversion back to utf8 did not match original\n", line); return; } g_free (utf8_result); g_free (ucs4_result); } if (status == VALID) { gunichar2 *utf16_expected_tmp; gunichar2 *utf16_expected; gunichar2 *utf16_from_utf8; gunichar2 *utf16_from_ucs4; gunichar *ucs4_result; gsize bytes_written; gint n_chars; gchar *utf8_result; #if G_BYTE_ORDER == G_LITTLE_ENDIAN #define TARGET "UTF-16LE" #else #define TARGET "UTF-16" #endif if (!(utf16_expected_tmp = (gunichar2 *)g_convert (utf8, -1, TARGET, "UTF-8", NULL, &bytes_written, NULL))) { fail ("line %d: could not convert to UTF-16 via g_convert\n", line); return; } /* zero-terminate and remove BOM */ n_chars = bytes_written / 2; if (utf16_expected_tmp[0] == 0xfeff) /* BOM */ { n_chars--; utf16_expected = g_new (gunichar2, n_chars + 1); memcpy (utf16_expected, utf16_expected_tmp + 1, sizeof(gunichar2) * n_chars); } else if (utf16_expected_tmp[0] == 0xfffe) /* ANTI-BOM */ { fail ("line %d: conversion via iconv to \"UTF-16\" is not native-endian\n", line); return; } else { utf16_expected = g_new (gunichar2, n_chars + 1); memcpy (utf16_expected, utf16_expected_tmp, sizeof(gunichar2) * n_chars); } utf16_expected[n_chars] = '\0'; if (!(utf16_from_utf8 = g_utf8_to_utf16 (utf8, -1, &items_read, &items_written, &error))) { fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message); return; } if (items_read != strlen (utf8) || utf16_count (utf16_from_utf8) != items_written) { fail ("line %d: length error in conversion to ucs16\n", line); return; } if (!(utf16_from_ucs4 = g_ucs4_to_utf16 (ucs4, -1, &items_read, &items_written, &error))) { fail ("line %d: conversion to ucs16 failed: %s\n", line, error->message); return; } if (items_read != ucs4_len || utf16_count (utf16_from_ucs4) != items_written) { fail ("line %d: length error in conversion to ucs16\n", line); return; } if (!utf16_equal (utf16_from_utf8, utf16_expected) || !utf16_equal (utf16_from_ucs4, utf16_expected)) { fail ("line %d: results of conversion to ucs16 do not match\n", line); return; } if (!(utf8_result = g_utf16_to_utf8 (utf16_from_utf8, -1, &items_read, &items_written, &error))) { fail ("line %d: conversion back to utf8 failed: %s\n", line, error->message); return; } if (items_read != utf16_count (utf16_from_utf8) || items_written != strlen (utf8)) { fail ("line %d: length error in conversion from ucs16 to utf8\n", line); return; } if (!(ucs4_result = g_utf16_to_ucs4 (utf16_from_ucs4, -1, &items_read, &items_written, &error))) { fail ("line %d: conversion back to utf8/ucs4 failed\n", line); return; } if (items_read != utf16_count (utf16_from_utf8) || items_written != ucs4_len) { fail ("line %d: length error in conversion from ucs16 to ucs4\n", line); return; } if (strcmp (utf8, utf8_result) != 0 || !ucs4_equal (ucs4, ucs4_result)) { fail ("line %d: conversion back to utf8/ucs4 did not match original\n", line); return; } g_free (utf16_expected_tmp); g_free (utf16_expected); g_free (utf16_from_utf8); g_free (utf16_from_ucs4); g_free (utf8_result); g_free (ucs4_result); } }