Beispiel #1
0
int main (int argc, char *argv[])
{
	int i;
	FILE *f = stdin;
	char format = '\0';
	int regex_file_specified = 0;

	setlocale (LC_TIME, "C");
	determine_mode_from_name (argv[0]);
	while (1) {
		static struct option long_options[] = {
	       		{"help", 0, 0, 1000 + 'H'},
			{"version", 0, 0, 1000 + 'V'},
			{"verbose", 0, 0, 'v'},
			{"list", 0, 0, 'l'},
			{"filter", 0, 0, 1000 + 'f'},
			{"grep", 0, 0, 'g'},
			{"strip", 1, 0, 1000 + 'S'},
			{"addprefix", 1, 0, 1000 + 'A'},
			{"addoldprefix", 1, 0, 1000 + 'O'},
			{"addnewprefix", 1, 0, 1000 + 'N'},
			{"hunks", 1, 0, '#'},
			{"lines", 1, 0, 1000 + ':'},
			{"files", 1, 0, 'F'},
			{"as-numbered-lines", 1, 0, 1000 + 'L'},
			{"annotate", 0, 0, 1000 + 'a'},
			{"format", 1, 0, 1000 + 'F'},
			{"output-matching", 1, 0, 1000 + 'o'},
			{"remove-timestamps", 0, 0, 1000 + 'r'},
			{"with-filename", 0, 0, 'H'},
			{"no-filename", 0, 0, 'h'},
			{"empty-files-as-absent", 0, 0, 'E'},
			{"number-files", 0, 0, 'N'},
			{"clean", 0, 0, 1000 + 'c'},
			{"strip-match", 1, 0, 'p'},
			{"include", 1, 0, 'i'},
			{"exclude", 1, 0, 'x'},
			{"include-from-file", 1, 0, 'I'},
			{"exclude-from-file", 1, 0, 'X'},
			{"decompress", 0, 0, 'z'},
			{"line-number", 0, 0, 'n'},
			{"strip-match", 1, 0, 'p'},
			{"status", 0, 0, 's'},
			{"extended-regexp", 0, 0, 'E'},
			{"empty-files-as-removed", 0, 0, 'E'},
			{"file", 1, 0, 'f'},
			{0, 0, 0, 0}
		};
		char *end;
		int c = getopt_long (argc, argv, "vp:i:I:x:X:zns#:F:Ef:HhN",
				     long_options, NULL);
		if (c == -1)
			break;
		
		switch (c) {
		case 'g':
			set_grep ();
			break;
		case 1000 + 'f':
			set_filter ();
			break;
		case 'l':
			set_list ();
			break;
		case 'E':
			if (mode == mode_grep)
				egrepping = REG_EXTENDED;
			else if (mode == mode_list)
				empty_files_as_absent = 1;
			else syntax (1);
			break;
		case 'f':
			if (mode == mode_grep) {
				regex_file_specified = 1;
				read_regex_file (optarg);
			} else syntax (1);
			break;
		case 1000 + 'V':
			printf("%s - patchutils version %s\n", progname,
			       VERSION);
			exit(0);
		case 1000 + 'H':
			syntax (0);
			break;
		case 1000 + 'S':
			strip_components = strtoul (optarg, &end, 0);
			if (optarg == end)
				syntax (1);
			break;
		case 1000 + 'A':
			prefix_to_add = optarg;
			break;
		case 1000 + 'O':
			old_prefix_to_add = optarg;
			break;
		case 1000 + 'N':
			new_prefix_to_add = optarg;
			break;
		case 'p':
			ignore_components = strtoul (optarg, &end, 0);
			if (optarg == end)
				syntax (1);
			break;
		case 'x':
			patlist_add (&pat_exclude, optarg);
			break;
		case 'X':
			patlist_add_file (&pat_exclude, optarg);
			break;
		case 'i':
			patlist_add (&pat_include, optarg);
			break;
		case 'I':
			patlist_add_file (&pat_include, optarg);
			break;
		case 'z':
			unzip = 1;
			break;
		case 'n':
			numbering = 1;
			break;
		case 'N':
			number_files = 1;
			break;
		case 's':
			show_status = 1;
			break;
		case 'v':
			verbose++;
			if (numbering && verbose > 1)
				number_files = 1;
			break;
		case '#':
			if (hunks)
				syntax (1);
			if (*optarg == 'x') {
				hunks_exclude = 1;
				optarg = optarg + 1;
			}
			parse_range (&hunks, optarg);
			break;
		case 'H':
			if (mode == mode_list || mode == mode_grep)
				print_patchnames = 1;
			else syntax (1);
			break;
		case 'h':
			if (mode == mode_list || mode == mode_grep)
				print_patchnames = 0;
			else syntax (1);
			break;
		case 1000 + ':':
			if (lines)
				syntax (1);
			if (*optarg == 'x') {
				lines_exclude = 1;
				optarg = optarg + 1;
			}
			parse_range (&lines, optarg);
			break;
		case 'F':
			if (files)
				syntax (1);
			if (*optarg == 'x') {
				files_exclude = 1;
				optarg = optarg + 1;
			}
			parse_range (&files, optarg);
			break;
		case 1000 + 'L':
			if (!strcmp (optarg, "before"))
				number_lines = Before;
			else if (!strcmp (optarg, "after"))
				number_lines = After;
			else syntax (1);
			break;
		case 1000 + 'a':
			if (mode == mode_list)
				syntax (1);
			annotating = 1;
			break;
		case 1000 + 'F':
			if (!strcmp (optarg, "context") && !format)
				format = 'c';
			else if (!strcmp (optarg, "unified") && !format)
				format = 'u';
			else syntax (1);
			break;
		case 1000 + 'o':
			if (!strncmp (optarg, "hunk", 4))
				output_matching = output_hunk;
			else if (!strncmp (optarg, "file", 4))
				output_matching = output_file;
			else syntax (1);
			break;
		case 1000 + 'r':
			removing_timestamp = 1;
			break;
		case 1000 + 'c':
			clean_comments = 1;
			break;
		default:
			syntax(1);
		}
	}

	/* Preserve the old semantics of -p. */
	if (mode != mode_filter && ignore_components && !strip_components &&
	    !pat_include && !pat_exclude) {
		fprintf (stderr,
			 "-p given without -i or -x; guessing that you "
			 "meant --strip instead.\n");
		strip_components = ignore_components;
		ignore_components = 0;
	}

	if (mode != mode_grep && output_matching != output_none)
		error (EXIT_FAILURE, 0, "--output-matching only applies to "
		       "grep mode");

	if (numbering &&
	    !(mode == mode_list ||
	      (mode == mode_grep && output_matching == output_none)))
		error (EXIT_FAILURE, 0, "-n only applies to list mode");

	if (mode != mode_filter &&
	    output_matching == output_none &&
	    number_lines != None)
		error (EXIT_FAILURE, 0, "--as-numbered-lines is "
		       "inappropriate in this context");

	if (mode == mode_filter &&
	    verbose && clean_comments)
		error (EXIT_FAILURE, 0, "can't use --verbose and "
		       "--clean options simultaneously");

	if (mode == mode_grep && !regex_file_specified) {
		int err;

		if (optind == argc)
			syntax (1);

		regex = xrealloc (regex, ++num_regex * sizeof (regex[0]));
		err = regcomp (&regex[num_regex - 1], argv[optind++],
			       REG_NOSUB | egrepping);
		if (err) {
			char errstr[300];
			regerror (err, &regex[num_regex - 1], errstr,
				  sizeof (errstr));
			error (EXIT_FAILURE, 0, "%s", errstr);
			exit (1);
		}
	}

	if (number_lines != None ||
	    output_matching != output_none) {
		if (print_patchnames == 1)
			error (EXIT_FAILURE, 0,
			       "-H is inappropriate in this context");
	} else if (print_patchnames == -1) {
		if ((mode == mode_list || mode == mode_grep) &&
		    optind + 1 < argc)
			print_patchnames = 1;
		else
			print_patchnames = 0;
	}

	if (optind == argc) {
		f = convert_format (stdin, format);
		filterdiff (f, "(standard input)");
		fclose (f);
	} else {
		for (i = optind; i < argc; i++) {
			if (unzip) {
				f = xopen_unzip (argv[i], "rb");
			} else {
				f = xopen(argv[i], "rbm");
			}

			f = convert_format (f, format);
			filterdiff (f, argv[i]);
			fclose (f);
		}
	}

	return 0;
}
Beispiel #2
0
static Dictionary
dictionary_six_str(const char * lang,
                   const char * input,
                   const char * dict_name,
                   const char * pp_name, const char * cons_name,
                   const char * affix_name, const char * regex_name)
{
	const char * t;
	Dictionary dict;
	Dict_node *dict_node;

	dict = (Dictionary) xalloc(sizeof(struct Dictionary_s));
	memset(dict, 0, sizeof(struct Dictionary_s));

	/* Language and file-name stuff */
	dict->string_set = string_set_create();
	t = strrchr (lang, '/');
	t = (NULL == t) ? lang : t+1;
	dict->lang = string_set_add(t, dict->string_set);
	lgdebug(D_USER_FILES, "Debug: Language: %s\n", dict->lang);
	dict->name = string_set_add(dict_name, dict->string_set);

	/*
	 * A special setup per dictionary type. The check here assumes the affix
	 * dictionary name contains "affix". FIXME: For not using this
	 * assumption, the dictionary creating stuff needs a rearrangement.
	 */
	if (0 == strstr(dict->name, "affix"))
	{
		/* To disable spell-checking, just set the checker to NULL */
		dict->spell_checker = spellcheck_create(dict->lang);
#if defined HAVE_HUNSPELL || defined HAVE_ASPELL
		/* TODO:
		 * 1. Set the spell option to 0, to signify no spell checking is done.
		 * 2. On verbosity >= 1, add a detailed message on the reason. */
		if (NULL == dict->spell_checker)
			prt_error("Info: Spell checker disabled.");
#endif
		dict->insert_entry = insert_list;

		dict->lookup_list = lookup_list;
		dict->free_lookup = free_llist;
		dict->lookup = boolean_lookup;
	}
	else
	{
		/*
		 * Affix dictionary.
		 */
		size_t i;

		dict->insert_entry = load_affix;
		dict->lookup = return_true;

		/* initialize the class table */
		dict->afdict_class =
		   malloc(sizeof(*dict->afdict_class) * ARRAY_SIZE(afdict_classname));
		for (i = 0; i < ARRAY_SIZE(afdict_classname); i++)
		{
			dict->afdict_class[i].mem_elems = 0;
			dict->afdict_class[i].length = 0;
			dict->afdict_class[i].string = NULL;
		}
	}
	dict->affix_table = NULL;

	/* Read dictionary from the input string. */
	dict->input = input;
	dict->pin = dict->input;
	if (!read_dictionary(dict))
	{
		dict->pin = NULL;
		dict->input = NULL;
		goto failure;
	}
	dict->pin = NULL;
	dict->input = NULL;

	if (NULL == affix_name)
	{
		/*
		 * The affix table is handled alone in this invocation.
		 * Skip the rest of processing!
		 * FIXME: The dictionary creating stuff needs a rearrangement.
		 */
		return dict;
	}

	/* If we don't have a locale per dictionary, the following
	 * will also set the program's locale. */
	dict->locale = linkgrammar_get_dict_locale(dict);
	set_utf8_program_locale();

#ifdef HAVE_LOCALE_T
	/* We have a locale per dictionary. */
	if (NULL != dict->locale)
		dict->locale_t = newlocale_LC_CTYPE(dict->locale);

	/* If we didn't succeed to set the dictionary locale, the program will
	 * SEGFAULT when it tries to use it with the isw*() functions.
	 * So set it to the current program's locale as a last resort. */
	if (NULL == dict->locale)
	{
		dict->locale = setlocale(LC_CTYPE, NULL);
		dict->locale_t = newlocale_LC_CTYPE(setlocale(LC_CTYPE, NULL));
		prt_error("Warning: Couldn't set dictionary locale! "
		          "Using current program locale %s", dict->locale);
	}
	/* If dict->locale is still not set, there is a bug. */
	assert((locale_t)0 != dict->locale_t, "Dictionary locale is not set.");
#else
	/* We don't have a locale per dictionary - but anyway make sure
	 * dict->locale is consistent with the current program's locale,
	 * and especially that it is not NULL. It still indicates the intended
	 * locale of this dictionary and the locale of the compiled regexs. */
	dict->locale = setlocale(LC_CTYPE, NULL);
#endif /* HAVE_LOCALE_T */

	dict->affix_table = dictionary_six(lang, affix_name, NULL, NULL, NULL, NULL);
	if (dict->affix_table == NULL)
	{
		prt_error("Error: Could not open affix file %s", affix_name);
		goto failure;
	}
	if (! afdict_init(dict))
		goto failure;

	/*
	 * Process the regex file.
	 * We have to compile regexs using the dictionary locale,
	 * so make a temporary locale swap.
	 */
	if (read_regex_file(dict, regex_name)) goto failure;

	const char *locale = setlocale(LC_CTYPE, NULL);
	locale = strdupa(locale); /* setlocale() uses static memory. */
	setlocale(LC_CTYPE, dict->locale);
	lgdebug(+D_DICT, "Regexs locale %s\n", setlocale(LC_CTYPE, NULL));

	if (compile_regexs(dict->regex_root, dict))
	{
		locale = setlocale(LC_CTYPE, locale);
		goto failure;
	}
	locale = setlocale(LC_CTYPE, locale);
	assert(NULL != locale, "Cannot restore program locale\n");

#ifdef USE_CORPUS
	dict->corpus = lg_corpus_new();
#endif

	dict->left_wall_defined  = boolean_dictionary_lookup(dict, LEFT_WALL_WORD);
	dict->right_wall_defined = boolean_dictionary_lookup(dict, RIGHT_WALL_WORD);

	dict->empty_word_defined = boolean_dictionary_lookup(dict, EMPTY_WORD_MARK);

	dict->base_knowledge  = pp_knowledge_open(pp_name);
	dict->hpsg_knowledge  = pp_knowledge_open(cons_name);

	dict->unknown_word_defined = boolean_dictionary_lookup(dict, UNKNOWN_WORD);
	dict->use_unknown_word = true;

	dict_node = dictionary_lookup_list(dict, UNLIMITED_CONNECTORS_WORD);
	if (dict_node != NULL)
		dict->unlimited_connector_set = connector_set_create(dict_node->exp);

	free_lookup(dict_node);

	return dict;

failure:
	string_set_delete(dict->string_set);
	if (dict->affix_table) xfree(dict->affix_table, sizeof(struct Dictionary_s));
	xfree(dict, sizeof(struct Dictionary_s));
	return NULL;
}
Beispiel #3
0
/**
 * Read dictionary entries from a wide-character string "input".
 * All other parts are read from files.
 */
static Dictionary
dictionary_six_str(const char * lang,
                   const char * input,
                   const char * dict_name,
                   const char * pp_name, const char * cons_name,
                   const char * affix_name, const char * regex_name)
{
	const char * t;
	Dictionary dict;
	Dict_node *dict_node;

	dict = (Dictionary) xalloc(sizeof(struct Dictionary_s));
	memset(dict, 0, sizeof(struct Dictionary_s));

	dict->num_entries = 0;
	dict->is_special = false;
	dict->already_got_it = '\0';
	dict->line_number = 0;
	dict->root = NULL;
	dict->regex_root = NULL;
	dict->word_file_header = NULL;
	dict->exp_list = NULL;
	dict->affix_table = NULL;
	dict->recursive_error = false;
	dict->version = NULL;
#ifdef HAVE_SQLITE
	dict->db_handle = NULL;
#endif
#ifdef USE_ANYSPLIT
	dict->anysplit = NULL;
#endif

	/* Language and file-name stuff */
	dict->string_set = string_set_create();
	dict->lang = lang;
	t = strrchr (lang, '/');
	if (t) dict->lang = string_set_add(t+1, dict->string_set);
	dict->name = string_set_add(dict_name, dict->string_set);

	/*
	 * A special setup per dictionary type. The check here assumes the affix
	 * dictionary name contains "affix". FIXME: For not using this
	 * assumption, the dictionary creating stuff needs a rearrangement.
	 */
	if (0 == strstr(dict->name, "affix"))
	{
		/* To disable spell-checking, just set the checker to NULL */
		dict->spell_checker = spellcheck_create(dict->lang);
		dict->insert_entry = insert_list;

		dict->lookup_list = lookup_list;
		dict->free_lookup = free_llist;
		dict->lookup = boolean_lookup;
	}
	else
	{
		/*
		 * Affix dictionary.
		 */
		size_t i;

		dict->insert_entry = load_affix;
		dict->lookup = return_true;

		/* initialize the class table */
		dict->afdict_class =
		   malloc(sizeof(*dict->afdict_class) * NUMELEMS(afdict_classname));
		for (i = 0; i < NUMELEMS(afdict_classname); i++)
		{
			dict->afdict_class[i].mem_elems = 0;
			dict->afdict_class[i].length = 0;
			dict->afdict_class[i].string = NULL;
		}
	}
	dict->affix_table = NULL;

	/* Read dictionary from the input string. */
	dict->input = input;
	dict->pin = dict->input;
	if (!read_dictionary(dict))
	{
		dict->pin = NULL;
		dict->input = NULL;
		goto failure;
	}
	dict->pin = NULL;
	dict->input = NULL;

	if (NULL == affix_name)
	{
		/*
		 * The affix table is handled alone in this invocation.
		 * Skip the rest of processing!
		 * FIXME: The dictionary creating stuff needs a rearrangement.
		 */
		return dict;
	}

	dict->affix_table = dictionary_six(lang, affix_name, NULL, NULL, NULL, NULL);
	if (dict->affix_table == NULL)
	{
		prt_error("Error: Could not open affix file %s", affix_name);
		goto failure;
	}
	if (! afdict_init(dict))
		goto failure;

	if (read_regex_file(dict, regex_name)) goto failure;
	if (compile_regexs(dict->regex_root, dict)) goto failure;

#ifdef USE_CORPUS
	dict->corpus = lg_corpus_new();
#endif

	dict->left_wall_defined  = boolean_dictionary_lookup(dict, LEFT_WALL_WORD);
	dict->right_wall_defined = boolean_dictionary_lookup(dict, RIGHT_WALL_WORD);

	dict->empty_word_defined = boolean_dictionary_lookup(dict, EMPTY_WORD_MARK);

	dict->base_knowledge  = pp_knowledge_open(pp_name);
	dict->hpsg_knowledge  = pp_knowledge_open(cons_name);

	dict->unknown_word_defined = boolean_dictionary_lookup(dict, UNKNOWN_WORD);
	dict->use_unknown_word = true;

	dict_node = dictionary_lookup_list(dict, UNLIMITED_CONNECTORS_WORD);
	if (dict_node != NULL) {
		dict->unlimited_connector_set = connector_set_create(dict_node->exp);
	} else {
		dict->unlimited_connector_set = NULL;
	}
	free_lookup(dict_node);

	return dict;

failure:
	string_set_delete(dict->string_set);
	if (dict->affix_table) xfree(dict->affix_table, sizeof(struct Dictionary_s));
	xfree(dict, sizeof(struct Dictionary_s));
	return NULL;
}