static CharsetTable *get_charset_table(void) { static CharsetTable *ctable = NULL; EncArray *encarray; gint i; if (!ctable) { ctable = g_malloc(sizeof(CharsetTable)); ctable->num = 0; ctable->charset[ctable->num] = get_default_charset(); ctable->str[ctable->num] = g_strdup_printf(_("Current Locale (%s)"), get_default_charset()); ctable->num++; ctable->charset[ctable->num] = "UTF-8"; ctable->str[ctable->num] = ctable->charset[ctable->num]; ctable->num++; encarray = get_encoding_items(get_encoding_code()); for (i = 0; i < ENCODING_MAX_ITEM_NUM; i++) if (encarray->item[i]) { ctable->charset[ctable->num] = encarray->item[i]; ctable->str[ctable->num] = encarray->item[i]; ctable->num++; } } return ctable; }
static const gchar *detect_charset_cylillic(const gchar *text) { guint8 c = *text; gboolean noniso = FALSE; guint32 xc = 0, xd = 0, xef = 0; const gchar *charset = get_encoding_items(get_encoding_code())->item[OPENI18N]; while ((c = *text++) != '\0') { if (c >= 0x80 && c <= 0x9F) noniso = TRUE; else if (c >= 0xC0 && c <= 0xCF) xc++; else if (c >= 0xD0 && c <= 0xDF) xd++; else if (c >= 0xE0) xef++; } if (!noniso && ((xc + xef) < xd)) charset = "ISO-8859-5"; else if ((xc + xd) < xef) charset = "CP1251"; return charset; }
static const gchar *detect_charset_chinese(const gchar *text) { guint8 c = *text; const gchar *charset = get_encoding_items(get_encoding_code())->item[IANA]; while ((c = *text++) != '\0') { if (c >= 0x81 && c <= 0x87) { charset = "GB18030"; break; } else if (c >= 0x88 && c <= 0xA0) { c = *text++; if ((c >= 0x30 && c <= 0x39) || (c >= 0x80 && c <= 0xA0)) { charset = "GB18030"; break; } //else GBK/Big5-HKSCS cannot determine } else if ((c >= 0xA1 && c <= 0xC6) || (c >= 0xC9 && c <= 0xF9)) { c = *text++; if (c >= 0x40 && c <= 0x7E) charset = "BIG5"; else if ((c >= 0x30 && c <= 0x39) || (c >= 0x80 && c <= 0xA0)) { charset = "GB18030"; break; } } else if (c >= 0xC7) { c = *text++; if ((c >= 0x30 && c <= 0x39) || (c >= 0x80 && c <= 0xA0)) { charset = "GB18030"; break; } } } return charset; }
static void parse_args(gint argc, gchar **argv, FileInfo *fi) { EncArray *encarray; gint i; GError *error = NULL; #if GLIB_CHECK_VERSION(2, 6, 0) GOptionContext *context; gchar *opt_codeset = NULL; gint opt_tab_width = 0; gboolean opt_jump = 0; gboolean opt_version = FALSE; GOptionEntry entries[] = { { "codeset", 0, 0, G_OPTION_ARG_STRING, &opt_codeset, "Set codeset to open file", "CODESET" }, { "tab-width", 0, 0, G_OPTION_ARG_INT, &opt_tab_width, "Set tab width", "WIDTH" }, { "jump", 0, 0, G_OPTION_ARG_INT, &opt_jump, "Jump to specified line", "LINENUM" }, { "version", 0, 0, G_OPTION_ARG_NONE, &opt_version, "Show version number", NULL }, { NULL } }; //Structure defined in glib for entries and commandline option parser //4th argument is an enum defined in glib context = g_option_context_new("[Filename]"); //Creates new context for option parsing g_option_context_add_main_entries(context, entries, PACKAGE); //PACKAGE == Translation Domain ?? g_option_context_add_group(context, gtk_get_option_group(TRUE)); g_option_context_set_ignore_unknown_options(context, FALSE); //sets error when unknown command g_option_context_parse(context, &argc, &argv, &error); //passes the arguments g_option_context_free(context); //free for gcontext if (error) { g_print("%s: %s\n", PACKAGE, error->message); //things to do for each option g_error_free(error); exit(-1); } if (opt_version) { g_print("%s\n", PACKAGE_STRING); exit(0); } if (opt_codeset) { g_convert("TEST", -1, "UTF-8", opt_codeset, NULL, NULL, &error); if (error) { g_error_free(error); error = NULL; } else { g_free(fi->charset); fi->charset = g_strdup(opt_codeset); } } if (opt_tab_width) indent_set_default_tab_width(opt_tab_width); if (opt_jump) jump_linenum = opt_jump; #else //code for older version of glib gint c; //getopt also parses the command line argument do { c = getopt_long(argc, argv, "", longopts, NULL); switch (c) { case 0: if (optarg) { g_convert("TEST", -1, "UTF-8", optarg, NULL, NULL, &error); if (error) { g_error_free(error); error = NULL; } else { g_free(fi->charset); fi->charset = g_strdup(optarg); } } break; case 't': if (optarg) indent_set_default_tab_width(atoi(optarg)); break; case 'j': if (optarg) jump_linenum = atoi(optarg); break; case 'v': g_print("%s\n", PACKAGE_STRING); exit(0); case '?': print_usage(); exit(0); } } while (c != -1); #endif if (fi->charset //Find fileinfo encarray data structure && (g_strcasecmp(fi->charset, get_default_charset()) != 0) && (g_strcasecmp(fi->charset, "UTF-8") != 0)) { encarray = get_encoding_items(get_encoding_code()); for (i = 0; i < ENCODING_MAX_ITEM_NUM; i++) if (encarray->item[i]) if (g_strcasecmp(fi->charset, encarray->item[i]) == 0) break; if (i == ENCODING_MAX_ITEM_NUM) fi->charset_flag = TRUE; } #if GLIB_CHECK_VERSION(2, 6, 0) if (argc >= 2) fi->filename = parse_file_uri(argv[1]); #else if (optind < argc) fi->filename = parse_file_uri(argv[optind]); #endif }
const gchar *detect_charset(const gchar *text) { guint8 c = *text; const gchar *charset = NULL; if (g_utf8_validate(text, -1, NULL)) { while ((c = *text++) != '\0') { if (c > 0x7F) { charset = "UTF-8"; break; } if (c == 0x1B) /* ESC */ { c = *text++; if (c == '$') { c = *text++; switch (c) { case 'B': // JIS X 0208-1983 case '@': // JIS X 0208-1978 charset = "ISO-2022-JP"; continue; case 'A': // GB2312-1980 charset = "ISO-2022-JP-2"; break; case '(': c = *text++; switch (c) { case 'C': // KSC5601-1987 case 'D': // JIS X 0212-1990 charset = "ISO-2022-JP-2"; } break; case ')': c = *text++; if (c == 'C') charset = "ISO-2022-KR"; // KSC5601-1987 } break; } } } if (!charset) charset = get_default_charset(); } if (!charset) { switch (get_encoding_code()) { case LATINC: case LATINC_UA: case LATINC_TJ: charset = detect_charset_cylillic(text); // fuzzy... break; case CHINESE_CN: case CHINESE_TW: case CHINESE_HK: charset = detect_charset_chinese(text); break; case JAPANESE: charset = detect_charset_japanese(text); break; case KOREAN: charset = detect_charset_korean(text); break; case VIETNAMESE: case THAI: case GEORGIAN: charset = get_encoding_items(get_encoding_code())->item[OPENI18N]; break; default: if (strcmp(get_default_charset(), "UTF-8") != 0) charset = get_default_charset(); else if (detect_noniso(text)) charset = get_encoding_items(get_encoding_code())->item[CODEPAGE]; else charset = get_encoding_items(get_encoding_code())->item[OPENI18N]; if (!charset) charset = get_encoding_items(get_encoding_code())->item[IANA]; } } return charset; }
static void parse_args(gint argc, gchar **argv, FileInfo *fi) { EncArray *encarray; gint i; GError *error = NULL; GOptionContext *context; gchar *opt_codeset = NULL; gint opt_tab_width = 0; gboolean opt_jump = 0; gboolean opt_version = FALSE; GOptionEntry entries[] = { { "codeset", 0, 0, G_OPTION_ARG_STRING, &opt_codeset, "Set codeset to open file", "CODESET" }, { "tab-width", 0, 0, G_OPTION_ARG_INT, &opt_tab_width, "Set tab width", "WIDTH" }, { "jump", 0, 0, G_OPTION_ARG_INT, &opt_jump, "Jump to specified line", "LINENUM" }, { "version", 0, 0, G_OPTION_ARG_NONE, &opt_version, "Show version number", NULL }, { NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL } }; context = g_option_context_new("[filename]"); g_option_context_add_main_entries(context, entries, PACKAGE); g_option_context_add_group(context, gtk_get_option_group(TRUE)); g_option_context_set_ignore_unknown_options(context, FALSE); g_option_context_parse(context, &argc, &argv, &error); g_option_context_free(context); if (error) { g_print("%s: %s\n", PACKAGE, error->message); g_error_free(error); exit(-1); } if (opt_version) { g_print("%s\n", PACKAGE_STRING); exit(0); } if (opt_codeset) { g_convert("TEST", -1, "UTF-8", opt_codeset, NULL, NULL, &error); if (error) { g_error_free(error); error = NULL; } else { g_free(fi->charset); fi->charset = g_strdup(opt_codeset); } } if (opt_tab_width) indent_set_default_tab_width(opt_tab_width); if (opt_jump) jump_linenum = opt_jump; if (fi->charset && (g_ascii_strcasecmp(fi->charset, get_default_charset()) != 0) && (g_ascii_strcasecmp(fi->charset, "UTF-8") != 0)) { encarray = get_encoding_items(get_encoding_code()); for (i = 0; i < ENCODING_MAX_ITEM_NUM; i++) if (encarray->item[i]) if (g_ascii_strcasecmp(fi->charset, encarray->item[i]) == 0) break; if (i == ENCODING_MAX_ITEM_NUM) fi->charset_flag = TRUE; } if (argc >= 2) fi->filename = parse_file_uri(argv[1]); }