/** @brief move foward @a count characters, ignoring any that match supplied flags @param iter text iter referring to current position @param count the no. of chars to move @param skip_invisible TRUE to skip invisible chars @param skip_nontext TRUE to skip non-text chars @param skip_decomp TRUE to skip canonical decompositions @return */ static void _e2_textiter_forward_chars_with_skipping ( GtkTextIter *iter, gint count, gboolean skip_invisible, gboolean skip_nontext, gboolean skip_decomp) { gint i; g_return_if_fail (count >= 0); i = count; while (i > 0) { gboolean ignored = FALSE; if (skip_nontext && gtk_text_iter_get_char (iter) == GTK_TEXT_UNKNOWN_CHAR) ignored = TRUE; if (!ignored && skip_invisible //&& _gtk_text_btree_char_is_invisible (iter) ) ignored = TRUE; if (!ignored && skip_decomp) { /* being utf8-correct sucks; this accounts for extra offsets coming from canonical decompositions of utf8 characters (e.g. accented characters) which g_utf8_normalize() performs */ gsize decomp_len; #ifdef USE_GLIB2_30 decomp_len = g_unichar_fully_decompose ( gtk_text_iter_get_char (iter), FALSE, NULL, 0); #else gunichar *decomp; decomp = g_unicode_canonical_decomposition ( gtk_text_iter_get_char (iter), &decomp_len); g_free (decomp); #endif i -= (decomp_len - 1); } gtk_text_iter_forward_char (iter); if (!ignored) --i; } }
static void lookup(const char *text, char ***pppWord, char ****ppppWordData) { if ((text[0] == '&' && text[1] == '#' && g_str_has_suffix(text, ";")) || g_str_has_prefix(text, "U+")) { gunichar uc; if (text[0] == '&') { if (text[2] == 'x' || text[2] == 'X') { uc = htoi(text+3); } else { uc = atoi(text+2); } } else { // U+ uc = htoi(text+2); } gchar utf8[7]; gint n = g_unichar_to_utf8(uc, utf8); utf8[n] = '\0'; *pppWord = (gchar **)g_malloc(sizeof(gchar *)*2); (*pppWord)[0] = g_strdup(text); (*pppWord)[1] = NULL; *ppppWordData = (gchar ***)g_malloc(sizeof(gchar **)*(1)); (*ppppWordData)[0] = (gchar **)g_malloc(sizeof(gchar *)*2); (*ppppWordData)[0][0] = build_dictdata('m', utf8); (*ppppWordData)[0][1] = NULL; return; } bool found; gunichar uc; if (g_utf8_strlen(text, -1) != 1) { found = false; // Don't query it. } else { uc = g_utf8_get_char(text); if (!gucharmap_unichar_validate (uc) || !gucharmap_unichar_isdefined (uc)) found = false; else found = true; } if (!found) { *pppWord = NULL; return; } std::string definition; definition += "\n"; gchar buf[12]; int n = gucharmap_unichar_to_printable_utf8 (uc, buf); if (n == 0) { definition += _("[not a printable character]"); } else { gchar *str = g_markup_escape_text(buf, n); definition += "<big><big><big><big>"; definition += str; definition += "</big></big></big></big>"; g_free(str); } definition += "\n\n"; gchar *temp; /* character name */ temp = g_strdup_printf ("U+%4.4X %s", uc, gucharmap_get_unicode_name (uc)); definition += "<big><b>"; definition += temp; definition += "</b></big>\n"; g_free (temp); definition += "\n<b>"; definition += _("General Character Properties"); definition += "</b>\n\n"; /* character category */ definition += get_vanilla_detail(_("Unicode category:"), gucharmap_get_unicode_category_name (uc)); /* canonical decomposition */ gunichar decomposition[G_UNICHAR_MAX_DECOMPOSITION_LENGTH]; gsize result_len; result_len = g_unichar_fully_decompose(uc, FALSE, decomposition, G_UNICHAR_MAX_DECOMPOSITION_LENGTH); if (result_len != 1) { definition += _("Canonical decomposition:"); definition += " "; definition += get_codepoint(decomposition[0]); for (gsize i = 1; i < result_len; i++) { definition += " + "; definition += get_codepoint(decomposition[i]); } definition += "\n"; } /* representations */ if (g_unichar_break_type(uc) != G_UNICODE_BREAK_SURROGATE) { definition += "\n<b>"; definition += _("Various Useful Representations"); definition += "</b>\n\n"; guchar utf8[7]; gunichar2 *utf16; GString *gstemp; n = g_unichar_to_utf8 (uc, (gchar *)utf8); utf16 = g_ucs4_to_utf16 (&uc, 1, NULL, NULL, NULL); /* UTF-8 */ gstemp = g_string_new (NULL); gint i; for (i = 0; i < n; i++) g_string_append_printf (gstemp, "0x%2.2X ", utf8[i]); g_string_erase (gstemp, gstemp->len - 1, -1); definition += get_vanilla_detail(_("UTF-8:"), gstemp->str); g_string_free (gstemp, TRUE); /* UTF-16 */ gstemp = g_string_new (NULL); g_string_append_printf (gstemp, "0x%4.4X", utf16[0]); if (utf16[0] != '\0' && utf16[1] != '\0') g_string_append_printf (gstemp, " 0x%4.4X", utf16[1]); definition += get_vanilla_detail(_("UTF-16:"), gstemp->str); g_string_free (gstemp, TRUE); /* an empty line */ definition += "\n"; /* C octal \012\234 */ gstemp = g_string_new (NULL); for (i = 0; i < n; i++) g_string_append_printf (gstemp, "\\%3.3o", utf8[i]); definition += get_vanilla_detail(_("C octal escaped UTF-8:"), gstemp->str); g_string_free (gstemp, TRUE); /* XML entity */ if ((0x0001 <= uc && uc <= 0xD7FF) || (0xE000 <= uc && uc <= 0xFFFD) || (0x10000 <= uc && uc <= 0x10FFFF)) { temp = g_strdup_printf ("<kref>&#%d;</kref>", uc); definition += get_vanilla_detail(_("XML decimal entity:"), temp); g_free (temp); temp = g_strdup_printf ("<kref>&#x%X;</kref>", uc); definition += get_vanilla_detail(_("XML hexadecimal entity:"), temp); g_free (temp); } g_free(utf16); } /* annotations */ std::string annotations; /* nameslist equals (alias names) */ const gchar **csarr; csarr = gucharmap_get_nameslist_equals (uc); if (csarr != NULL) { annotations += get_chocolate_detail(_("Alias names:"), csarr, FALSE); g_free (csarr); } /* nameslist stars (notes) */ csarr = gucharmap_get_nameslist_stars (uc); if (csarr != NULL) { annotations += get_chocolate_detail(_("Notes:"), csarr, TRUE); g_free (csarr); } /* nameslist exes (see also) */ gunichar *ucs; ucs = gucharmap_get_nameslist_exes (uc); if (ucs != NULL) { annotations += get_chocolate_detail_codepoints(_("See also:"), ucs); g_free (ucs); } /* nameslist pounds (approximate equivalents) */ csarr = gucharmap_get_nameslist_pounds (uc); if (csarr != NULL) { annotations += get_chocolate_detail(_("Approximate equivalents:"), csarr, TRUE); g_free (csarr); } /* nameslist colons (equivalents) */ csarr = gucharmap_get_nameslist_colons (uc); if (csarr != NULL) { annotations += get_chocolate_detail(_("Equivalents:"), csarr, TRUE); g_free (csarr); } if (!annotations.empty()) { definition += "\n<b>"; definition += _("Annotations and Cross References"); definition += "</b>\n\n"; definition += annotations; } std::string unihan; const gchar *csp = gucharmap_get_unicode_kDefinition (uc); if (csp) unihan += get_vanilla_detail(_("Definition in English:"), csp); csp = gucharmap_get_unicode_kMandarin (uc); if (csp) unihan += get_vanilla_detail(_("Mandarin Pronunciation:"), csp); csp = gucharmap_get_unicode_kCantonese (uc); if (csp) unihan += get_vanilla_detail(_("Cantonese Pronunciation:"), csp); csp = gucharmap_get_unicode_kJapaneseOn (uc); if (csp) unihan += get_vanilla_detail(_("Japanese On Pronunciation:"), csp); csp = gucharmap_get_unicode_kJapaneseKun (uc); if (csp) unihan += get_vanilla_detail(_("Japanese Kun Pronunciation:"), csp); csp = gucharmap_get_unicode_kTang (uc); if (csp) unihan += get_vanilla_detail(_("Tang Pronunciation:"), csp); csp = gucharmap_get_unicode_kKorean (uc); if (csp) unihan += get_vanilla_detail(_("Korean Pronunciation:"), csp); if (!unihan.empty()) { definition += "\n<b>"; definition += _("CJK Ideograph Information"); definition += "</b>\n\n"; definition += unihan; } n = definition.length(); int l = n-1; while (l >= 0 && definition[l] == '\n') { l--; } if (l < n-1) { definition.erase(l+1, n-1-l); } *pppWord = (gchar **)g_malloc(sizeof(gchar *)*2); (*pppWord)[0] = g_strdup(text); (*pppWord)[1] = NULL; *ppppWordData = (gchar ***)g_malloc(sizeof(gchar **)*(1)); (*ppppWordData)[0] = (gchar **)g_malloc(sizeof(gchar *)*2); (*ppppWordData)[0][0] = build_dictdata('x', definition.c_str()); (*ppppWordData)[0][1] = NULL; }