Exemplo n.º 1
0
static CharsetTable *get_charset_table(void)
{
	static CharsetTable *ctable = NULL;
	EncArray *encarray;
	gint i;

	if (!ctable) {
		ctable = g_malloc(sizeof(CharsetTable));
		ctable->num = 0;
		ctable->charset[ctable->num] = get_default_charset();
		ctable->str[ctable->num] = g_strdup_printf(_("Current Locale (%s)"), get_default_charset());
		ctable->num++;
		ctable->charset[ctable->num] = "UTF-8";
		ctable->str[ctable->num] = ctable->charset[ctable->num];
		ctable->num++;
		encarray = get_encoding_items(get_encoding_code());
		for (i = 0; i < ENCODING_MAX_ITEM_NUM; i++)
			if (encarray->item[i]) {
				ctable->charset[ctable->num] = encarray->item[i];
				ctable->str[ctable->num] = encarray->item[i];
				ctable->num++;
			}
	}

	return ctable;
}
Exemplo n.º 2
0
static const gchar *detect_charset_cylillic(const gchar *text)
{
	guint8 c = *text;
	gboolean noniso = FALSE;
	guint32 xc = 0, xd = 0, xef = 0;
	
	const gchar *charset = get_encoding_items(get_encoding_code())->item[OPENI18N];
	
	while ((c = *text++) != '\0') {
		if (c >= 0x80 && c <= 0x9F)
			noniso = TRUE;
		else if (c >= 0xC0 && c <= 0xCF)
			xc++;
		else if (c >= 0xD0 && c <= 0xDF)
			xd++;
		else if (c >= 0xE0)
			xef++;
	}
	
	if (!noniso && ((xc + xef) < xd))
		charset = "ISO-8859-5";
	else if ((xc + xd) < xef)
		charset = "CP1251";
	
	return charset;
}
Exemplo n.º 3
0
static const gchar *detect_charset_chinese(const gchar *text)
{
	guint8 c = *text;
	
	const gchar *charset = get_encoding_items(get_encoding_code())->item[IANA];
	
	while ((c = *text++) != '\0') {
		if (c >= 0x81 && c <= 0x87) {
			charset = "GB18030";
			break;
		}
		else if (c >= 0x88 && c <= 0xA0) {
			c = *text++;
			if ((c >= 0x30 && c <= 0x39) || (c >= 0x80 && c <= 0xA0)) {
				charset = "GB18030";
				break;
			} //else GBK/Big5-HKSCS cannot determine
		}
		else if ((c >= 0xA1 && c <= 0xC6) || (c >= 0xC9 && c <= 0xF9)) {
			c = *text++;
			if (c >= 0x40 && c <= 0x7E)
				charset = "BIG5";
			else if ((c >= 0x30 && c <= 0x39) || (c >= 0x80 && c <= 0xA0)) {
				charset = "GB18030";
				break;
			}
		}
		else if (c >= 0xC7) {
			c = *text++;
			if ((c >= 0x30 && c <= 0x39) || (c >= 0x80 && c <= 0xA0)) {
				charset = "GB18030";
				break;
			}
		}
	}
	
	return charset;
}
Exemplo n.º 4
0
static void parse_args(gint argc, gchar **argv, FileInfo *fi)
{
	EncArray *encarray;
	gint i;
	GError *error = NULL;
	
#if GLIB_CHECK_VERSION(2, 6, 0)
	GOptionContext *context;
	gchar *opt_codeset = NULL;
	gint opt_tab_width = 0;
	gboolean opt_jump = 0;
	gboolean opt_version = FALSE;
	GOptionEntry entries[] = 
	{
		{ "codeset", 0, 0, G_OPTION_ARG_STRING, &opt_codeset, "Set codeset to open file", "CODESET" },
		{ "tab-width", 0, 0, G_OPTION_ARG_INT, &opt_tab_width, "Set tab width", "WIDTH" },
		{ "jump", 0, 0, G_OPTION_ARG_INT, &opt_jump, "Jump to specified line", "LINENUM" },
		{ "version", 0, 0, G_OPTION_ARG_NONE, &opt_version, "Show version number", NULL },
		{ NULL }
	};
	//Structure defined in glib for entries and commandline option parser
	//4th argument is an enum defined in glib
	context = g_option_context_new("[Filename]");		//Creates new context for option parsing
	g_option_context_add_main_entries(context, entries, PACKAGE);		//PACKAGE == Translation Domain ??
	g_option_context_add_group(context, gtk_get_option_group(TRUE));
	g_option_context_set_ignore_unknown_options(context, FALSE);		//sets error when unknown command
	g_option_context_parse(context, &argc, &argv, &error);			//passes the arguments
	g_option_context_free(context);						//free for gcontext
	
	if (error) {
		g_print("%s: %s\n", PACKAGE, error->message);			//things to do for each option
		g_error_free(error);
		exit(-1);
	}
	if (opt_version) {
		g_print("%s\n", PACKAGE_STRING);
		exit(0);
	}
	if (opt_codeset) {
		g_convert("TEST", -1, "UTF-8", opt_codeset, NULL, NULL, &error);
		if (error) {
			g_error_free(error);
			error = NULL;
		} else {
			g_free(fi->charset);
			fi->charset = g_strdup(opt_codeset);
		}
	}
	if (opt_tab_width)
		indent_set_default_tab_width(opt_tab_width);
	if (opt_jump)
		jump_linenum = opt_jump;
	
#else									//code for older version of glib
	gint c;								//getopt also parses the command line argument
	
	do {
		c = getopt_long(argc, argv, "", longopts, NULL);
		switch (c) {
		case 0:
			if (optarg) {
				g_convert("TEST", -1, "UTF-8", optarg, NULL, NULL, &error);
				if (error) {
					g_error_free(error);
					error = NULL;
				} else {
					g_free(fi->charset);
					fi->charset = g_strdup(optarg);
				}
			}
			break;
		case 't':
			if (optarg)
				indent_set_default_tab_width(atoi(optarg));
			break;
		case 'j':
			if (optarg)
				jump_linenum = atoi(optarg);
			break;
		case 'v':
			g_print("%s\n", PACKAGE_STRING);
			exit(0);
		case '?':
			print_usage();
			exit(0);
		}
	} while (c != -1);
#endif
	
	if (fi->charset 							//Find fileinfo encarray data structure
		&& (g_strcasecmp(fi->charset, get_default_charset()) != 0)
		&& (g_strcasecmp(fi->charset, "UTF-8") != 0)) {
		encarray = get_encoding_items(get_encoding_code());
		for (i = 0; i < ENCODING_MAX_ITEM_NUM; i++)
			if (encarray->item[i])
				if (g_strcasecmp(fi->charset, encarray->item[i]) == 0)
					break;
		if (i == ENCODING_MAX_ITEM_NUM)
			fi->charset_flag = TRUE;
	}	
#if GLIB_CHECK_VERSION(2, 6, 0)
	if (argc >= 2)
		fi->filename = parse_file_uri(argv[1]);
#else
	if (optind < argc)
		fi->filename = parse_file_uri(argv[optind]);
#endif
}
Exemplo n.º 5
0
const gchar *detect_charset(const gchar *text)
{
	guint8 c = *text;
	const gchar *charset = NULL;
	
	if (g_utf8_validate(text, -1, NULL)) {
		while ((c = *text++) != '\0') {
			if (c > 0x7F) {
				charset = "UTF-8";
				break;
			}
			if (c == 0x1B) /* ESC */ {
				c = *text++;
				if (c == '$') {
					c = *text++;
					switch (c) {
					case 'B': // JIS X 0208-1983
					case '@': // JIS X 0208-1978
						charset = "ISO-2022-JP";
						continue;
					case 'A': // GB2312-1980
						charset = "ISO-2022-JP-2";
						break;
					case '(':
						c = *text++;
						switch (c) {
						case 'C': // KSC5601-1987
						case 'D': // JIS X 0212-1990
							charset = "ISO-2022-JP-2";
						}
						break;
					case ')':
						c = *text++;
						if (c == 'C')
							charset = "ISO-2022-KR"; // KSC5601-1987
					}
					break;
				}
			}
		}
		if (!charset)
			charset = get_default_charset();
	}
	
	if (!charset) {
		switch (get_encoding_code()) {
		case LATINC:
		case LATINC_UA:
		case LATINC_TJ:
			charset = detect_charset_cylillic(text); // fuzzy...
			break;
		case CHINESE_CN:
		case CHINESE_TW:
		case CHINESE_HK:
			charset = detect_charset_chinese(text);
			break;
		case JAPANESE:
			charset = detect_charset_japanese(text);
			break;
		case KOREAN:
			charset = detect_charset_korean(text);
			break;
		case VIETNAMESE:
		case THAI:
		case GEORGIAN:
			charset = get_encoding_items(get_encoding_code())->item[OPENI18N];
			break;
		default:
			if (strcmp(get_default_charset(), "UTF-8") != 0)
				charset = get_default_charset();
			else if (detect_noniso(text))
				charset = get_encoding_items(get_encoding_code())->item[CODEPAGE];
			else
				charset = get_encoding_items(get_encoding_code())->item[OPENI18N];
			if (!charset)
				charset = get_encoding_items(get_encoding_code())->item[IANA];					
		}
	}
	
	return charset;
}
Exemplo n.º 6
0
static void parse_args(gint argc, gchar **argv, FileInfo *fi)
{
	EncArray *encarray;
	gint i;
	GError *error = NULL;

	GOptionContext *context;
	gchar *opt_codeset = NULL;
	gint opt_tab_width = 0;
	gboolean opt_jump = 0;
	gboolean opt_version = FALSE;
	GOptionEntry entries[] =
	{
		{ "codeset", 0, 0, G_OPTION_ARG_STRING, &opt_codeset, "Set codeset to open file", "CODESET" },
		{ "tab-width", 0, 0, G_OPTION_ARG_INT, &opt_tab_width, "Set tab width", "WIDTH" },
		{ "jump", 0, 0, G_OPTION_ARG_INT, &opt_jump, "Jump to specified line", "LINENUM" },
		{ "version", 0, 0, G_OPTION_ARG_NONE, &opt_version, "Show version number", NULL },
		{ NULL, 0, 0, G_OPTION_ARG_NONE, NULL, NULL, NULL }
	};

	context = g_option_context_new("[filename]");
	g_option_context_add_main_entries(context, entries, PACKAGE);
	g_option_context_add_group(context, gtk_get_option_group(TRUE));
	g_option_context_set_ignore_unknown_options(context, FALSE);
	g_option_context_parse(context, &argc, &argv, &error);
	g_option_context_free(context);

	if (error) {
		g_print("%s: %s\n", PACKAGE, error->message);
		g_error_free(error);
		exit(-1);
	}
	if (opt_version) {
		g_print("%s\n", PACKAGE_STRING);
		exit(0);
	}
	if (opt_codeset) {
		g_convert("TEST", -1, "UTF-8", opt_codeset, NULL, NULL, &error);
		if (error) {
			g_error_free(error);
			error = NULL;
		} else {
			g_free(fi->charset);
			fi->charset = g_strdup(opt_codeset);
		}
	}
	if (opt_tab_width)
		indent_set_default_tab_width(opt_tab_width);
	if (opt_jump)
		jump_linenum = opt_jump;

	if (fi->charset
		&& (g_ascii_strcasecmp(fi->charset, get_default_charset()) != 0)
		&& (g_ascii_strcasecmp(fi->charset, "UTF-8") != 0)) {
		encarray = get_encoding_items(get_encoding_code());
		for (i = 0; i < ENCODING_MAX_ITEM_NUM; i++)
			if (encarray->item[i])
				if (g_ascii_strcasecmp(fi->charset, encarray->item[i]) == 0)
					break;
		if (i == ENCODING_MAX_ITEM_NUM)
			fi->charset_flag = TRUE;
	}

	if (argc >= 2)
		fi->filename = parse_file_uri(argv[1]);
}