/** * gucharmap_unichar_to_printable_utf8: * @uc: a unicode character * @outbuf: output buffer, must have at least 10 bytes of space. * If %NULL, the length will be computed and returned * and nothing will be written to @outbuf. * * Converts a single character to UTF-8 suitable for rendering. Check the * source to see what this means. ;-) * * * Return value: number of bytes written **/ gint gucharmap_unichar_to_printable_utf8 (gunichar uc, gchar *outbuf) { /* Unicode Standard 3.2, section 2.6, "By convention, diacritical marks * used by the Unicode Standard may be exhibited in (apparent) isolation * by applying them to U+0020 SPACE or to U+00A0 NO BREAK SPACE." */ /* 17:10 < owen> noah: I'm *not* claiming that what Pango does currently * is right, but convention isn't a requirement. I think * it's probably better to do the Uniscribe thing and put * the lone combining mark on a dummy character and require * ZWJ * 17:11 < noah> owen: do you mean that i should put a ZWJ in there, or * that pango will do that? * 17:11 < owen> noah: I mean, you should (assuming some future more * capable version of Pango) put it in there */ if (! gucharmap_unichar_validate (uc) || (! gucharmap_unichar_isgraph (uc) && gucharmap_unichar_type (uc) != G_UNICODE_PRIVATE_USE)) return 0; else if (gucharmap_unichar_type (uc) == G_UNICODE_COMBINING_MARK || gucharmap_unichar_type (uc) == G_UNICODE_ENCLOSING_MARK || gucharmap_unichar_type (uc) == G_UNICODE_NON_SPACING_MARK) { gint x; outbuf[0] = ' '; outbuf[1] = '\xe2'; /* ZERO */ outbuf[2] = '\x80'; /* WIDTH */ outbuf[3] = '\x8d'; /* JOINER (0x200D) */ x = g_unichar_to_utf8 (uc, outbuf + 4); return x + 4; } else return g_unichar_to_utf8 (uc, outbuf); }
static void lookup(const char *text, char ***pppWord, char ****ppppWordData) { if ((text[0] == '&' && text[1] == '#' && g_str_has_suffix(text, ";")) || g_str_has_prefix(text, "U+")) { gunichar uc; if (text[0] == '&') { if (text[2] == 'x' || text[2] == 'X') { uc = htoi(text+3); } else { uc = atoi(text+2); } } else { // U+ uc = htoi(text+2); } gchar utf8[7]; gint n = g_unichar_to_utf8(uc, utf8); utf8[n] = '\0'; *pppWord = (gchar **)g_malloc(sizeof(gchar *)*2); (*pppWord)[0] = g_strdup(text); (*pppWord)[1] = NULL; *ppppWordData = (gchar ***)g_malloc(sizeof(gchar **)*(1)); (*ppppWordData)[0] = (gchar **)g_malloc(sizeof(gchar *)*2); (*ppppWordData)[0][0] = build_dictdata('m', utf8); (*ppppWordData)[0][1] = NULL; return; } bool found; gunichar uc; if (g_utf8_strlen(text, -1) != 1) { found = false; // Don't query it. } else { uc = g_utf8_get_char(text); if (!gucharmap_unichar_validate (uc) || !gucharmap_unichar_isdefined (uc)) found = false; else found = true; } if (!found) { *pppWord = NULL; return; } std::string definition; definition += "\n"; gchar buf[12]; int n = gucharmap_unichar_to_printable_utf8 (uc, buf); if (n == 0) { definition += _("[not a printable character]"); } else { gchar *str = g_markup_escape_text(buf, n); definition += "<big><big><big><big>"; definition += str; definition += "</big></big></big></big>"; g_free(str); } definition += "\n\n"; gchar *temp; /* character name */ temp = g_strdup_printf ("U+%4.4X %s", uc, gucharmap_get_unicode_name (uc)); definition += "<big><b>"; definition += temp; definition += "</b></big>\n"; g_free (temp); definition += "\n<b>"; definition += _("General Character Properties"); definition += "</b>\n\n"; /* character category */ definition += get_vanilla_detail(_("Unicode category:"), gucharmap_get_unicode_category_name (uc)); /* canonical decomposition */ gunichar *decomposition; gsize result_len; decomposition = gucharmap_unicode_canonical_decomposition (uc, &result_len); if (result_len != 1) { definition += _("Canonical decomposition:"); definition += " "; definition += get_codepoint(decomposition[0]); for (gsize i = 1; i < result_len; i++) { definition += " + "; definition += get_codepoint(decomposition[i]); } definition += "\n"; } g_free (decomposition); /* representations */ if (g_unichar_break_type(uc) != G_UNICODE_BREAK_SURROGATE) { definition += "\n<b>"; definition += _("Various Useful Representations"); definition += "</b>\n\n"; guchar utf8[7]; gunichar2 *utf16; GString *gstemp; n = g_unichar_to_utf8 (uc, (gchar *)utf8); utf16 = g_ucs4_to_utf16 (&uc, 1, NULL, NULL, NULL); /* UTF-8 */ gstemp = g_string_new (NULL); gint i; for (i = 0; i < n; i++) g_string_append_printf (gstemp, "0x%2.2X ", utf8[i]); g_string_erase (gstemp, gstemp->len - 1, -1); definition += get_vanilla_detail(_("UTF-8:"), gstemp->str); g_string_free (gstemp, TRUE); /* UTF-16 */ gstemp = g_string_new (NULL); g_string_append_printf (gstemp, "0x%4.4X", utf16[0]); if (utf16[0] != '\0' && utf16[1] != '\0') g_string_append_printf (gstemp, " 0x%4.4X", utf16[1]); definition += get_vanilla_detail(_("UTF-16:"), gstemp->str); g_string_free (gstemp, TRUE); /* an empty line */ definition += "\n"; /* C octal \012\234 */ gstemp = g_string_new (NULL); for (i = 0; i < n; i++) g_string_append_printf (gstemp, "\\%3.3o", utf8[i]); definition += get_vanilla_detail(_("C octal escaped UTF-8:"), gstemp->str); g_string_free (gstemp, TRUE); /* XML entity */ if ((0x0001 <= uc && uc <= 0xD7FF) || (0xE000 <= uc && uc <= 0xFFFD) || (0x10000 <= uc && uc <= 0x10FFFF)) { temp = g_strdup_printf ("<kref>&#%d;</kref>", uc); definition += get_vanilla_detail(_("XML decimal entity:"), temp); g_free (temp); temp = g_strdup_printf ("<kref>&#x%X;</kref>", uc); definition += get_vanilla_detail(_("XML hexadecimal entity:"), temp); g_free (temp); } g_free(utf16); } /* annotations */ std::string annotations; /* nameslist equals (alias names) */ const gchar **csarr; csarr = gucharmap_get_nameslist_equals (uc); if (csarr != NULL) { annotations += get_chocolate_detail(_("Alias names:"), csarr, FALSE); g_free (csarr); } /* nameslist stars (notes) */ csarr = gucharmap_get_nameslist_stars (uc); if (csarr != NULL) { annotations += get_chocolate_detail(_("Notes:"), csarr, TRUE); g_free (csarr); } /* nameslist exes (see also) */ gunichar *ucs; ucs = gucharmap_get_nameslist_exes (uc); if (ucs != NULL) { annotations += get_chocolate_detail_codepoints(_("See also:"), ucs); g_free (ucs); } /* nameslist pounds (approximate equivalents) */ csarr = gucharmap_get_nameslist_pounds (uc); if (csarr != NULL) { annotations += get_chocolate_detail(_("Approximate equivalents:"), csarr, TRUE); g_free (csarr); } /* nameslist colons (equivalents) */ csarr = gucharmap_get_nameslist_colons (uc); if (csarr != NULL) { annotations += get_chocolate_detail(_("Equivalents:"), csarr, TRUE); g_free (csarr); } if (!annotations.empty()) { definition += "\n<b>"; definition += _("Annotations and Cross References"); definition += "</b>\n\n"; definition += annotations; } std::string unihan; const gchar *csp = gucharmap_get_unicode_kDefinition (uc); if (csp) unihan += get_vanilla_detail(_("Definition in English:"), csp); csp = gucharmap_get_unicode_kMandarin (uc); if (csp) unihan += get_vanilla_detail(_("Mandarin Pronunciation:"), csp); csp = gucharmap_get_unicode_kCantonese (uc); if (csp) unihan += get_vanilla_detail(_("Cantonese Pronunciation:"), csp); csp = gucharmap_get_unicode_kJapaneseOn (uc); if (csp) unihan += get_vanilla_detail(_("Japanese On Pronunciation:"), csp); csp = gucharmap_get_unicode_kJapaneseKun (uc); if (csp) unihan += get_vanilla_detail(_("Japanese Kun Pronunciation:"), csp); csp = gucharmap_get_unicode_kTang (uc); if (csp) unihan += get_vanilla_detail(_("Tang Pronunciation:"), csp); csp = gucharmap_get_unicode_kKorean (uc); if (csp) unihan += get_vanilla_detail(_("Korean Pronunciation:"), csp); if (!unihan.empty()) { definition += "\n<b>"; definition += _("CJK Ideograph Information"); definition += "</b>\n\n"; definition += unihan; } n = definition.length(); int l = n-1; while (l >= 0 && definition[l] == '\n') { l--; } if (l < n-1) { definition.erase(l+1, n-1-l); } *pppWord = (gchar **)g_malloc(sizeof(gchar *)*2); (*pppWord)[0] = g_strdup(text); (*pppWord)[1] = NULL; *ppppWordData = (gchar ***)g_malloc(sizeof(gchar **)*(1)); (*ppppWordData)[0] = (gchar **)g_malloc(sizeof(gchar *)*2); (*ppppWordData)[0][0] = build_dictdata('x', definition.c_str()); (*ppppWordData)[0][1] = NULL; }