Beispiel #1
0
/**
 * Create and initialize a Parse_Options object
 */
Parse_Options parse_options_create(void)
{
	Parse_Options po;

	init_memusage();
	po = (Parse_Options) malloc(sizeof(struct Parse_Options_s));

	/* Here's where the values are initialized */

	/* The parse_options_set_(verbosity|debug|test) functions set also the
	 * corresponding global variables. So these globals are initialized
	 * here too. */
	verbosity = po->verbosity = 1;
	debug = po->debug = (char *)"";
	test = po->test = (char *)"";

	/* A cost of 2.7 allows the usual cost-2 connectors, plus the
	 * assorted fractional costs, without going to cost 3.0, which
	 * is used only during panic-parsing.
	 * XXX In the long run, this should be fetched from the dictionary
	 * (and should probably not be a parse option).
	 */
	po->disjunct_cost = 2.7;
	po->min_null_count = 0;
	po->max_null_count = 0;
	po->islands_ok = false;
	po->use_sat_solver = false;
	po->use_viterbi = false;
	po->linkage_limit = 100;
#if defined HAVE_HUNSPELL || defined HAVE_ASPELL
	po->use_spell_guess = 7;
#else
	po->use_spell_guess = 0;
#endif /* defined HAVE_HUNSPELL || defined HAVE_ASPELL */

#ifdef XXX_USE_CORPUS
	/* Use the corpus cost model, if available.
	 * It really does a better job at parse ranking.
	 * Err .. sometimes ...
	 */
	po->cost_model.compare_fn = &CORP_compare_parse;
	po->cost_model.type = CORPUS;
#else /* USE_CORPUS */
	po->cost_model.compare_fn = &VDAL_compare_parse;
	po->cost_model.type = VDAL;
#endif /* USE_CORPUS */
	po->short_length = 16;
	po->all_short = false;
	po->perform_pp_prune = true;
	po->twopass_length = 30;
	po->repeatable_rand = true;
	po->resources = resources_create();
	po->use_cluster_disjuncts = false;
	po->display_morphology = false;

	return po;
}
Beispiel #2
0
/**
 * Create and initialize a Parse_Options object
 */
Parse_Options parse_options_create(void)
{
	Parse_Options po;

	init_memusage();
	po = (Parse_Options) xalloc(sizeof(struct Parse_Options_s));

	/* Here's where the values are initialized */
	po->verbosity = 1;
	po->debug = (char *)"";
	po->test = (char *)"";
	po->linkage_limit = 100;

	/* A cost of 2.7 allows the usual cost-2 connectors, plus the
	 * assorted fractional costs, without going to cost 3.0, which
	 * is used only during panic-parsing.
	 */
	po->disjunct_cost = 2.7; /* 3.0 is needed for Russian dicts */
	po->min_null_count = 0;
	po->max_null_count = 0;
	po->islands_ok = false;
	po->use_spell_guess = 7;
	po->use_sat_solver = false;
	po->use_viterbi = false;

#ifdef XXX_USE_CORPUS
	/* Use the corpus cost model, if available.
	 * It really does a better job at parse ranking.
	 * Err .. sometimes ...
	 */
	po->cost_model.compare_fn = &CORP_compare_parse;
	po->cost_model.type = CORPUS;
#else /* USE_CORPUS */
	po->cost_model.compare_fn = &VDAL_compare_parse;
	po->cost_model.type = VDAL;
#endif /* USE_CORPUS */
	po->short_length = 16;
	po->all_short = false;
	po->twopass_length = 30;
	po->repeatable_rand = true;
	po->resources = resources_create();
	po->use_cluster_disjuncts = false;
	po->display_morphology = false;

	return po;
}
Beispiel #3
0
/**
 * Use "string" as the input dictionary. All of the other parts,
 * including post-processing, affix table, etc, are NULL.
 * This routine is intended for unit-testing ONLY.
 */
Dictionary dictionary_create_from_utf8(const char * input)
{
	Dictionary dictionary = NULL;
	char * lang;

	init_memusage();

	lang = get_default_locale();
	if (lang && *lang) {
		dictionary = dictionary_six_str(lang, input, "string",
		                                NULL, NULL, NULL, NULL);
		free(lang);
	} else {
		/* Default to en when locales are broken (e.g. WIN32) */
		dictionary = dictionary_six_str("en", input, "string",
		                                NULL, NULL, NULL, NULL);
	}

	return dictionary;
}
Beispiel #4
0
Dictionary dictionary_create_from_file(const char * lang)
{
	Dictionary dictionary;

	init_memusage();
	if (lang && *lang)
	{
		char * dict_name;
		char * pp_name;
		char * cons_name;
		char * affix_name;
		char * regex_name;

		dict_name = join_path(lang, "4.0.dict");
		pp_name = join_path(lang, "4.0.knowledge");
		cons_name = join_path(lang, "4.0.constituent-knowledge");
		affix_name = join_path(lang, "4.0.affix");
		regex_name = join_path(lang, "4.0.regex");

		dictionary = dictionary_six(lang, dict_name, pp_name, cons_name,
		                            affix_name, regex_name);

		free(regex_name);
		free(affix_name);
		free(cons_name);
		free(pp_name);
		free(dict_name);
	}
	else
	{
		prt_error("Error: No language specified!");
		dictionary = NULL;
	}

	return dictionary;
}