/** * Create and initialize a Parse_Options object */ Parse_Options parse_options_create(void) { Parse_Options po; init_memusage(); po = (Parse_Options) malloc(sizeof(struct Parse_Options_s)); /* Here's where the values are initialized */ /* The parse_options_set_(verbosity|debug|test) functions set also the * corresponding global variables. So these globals are initialized * here too. */ verbosity = po->verbosity = 1; debug = po->debug = (char *)""; test = po->test = (char *)""; /* A cost of 2.7 allows the usual cost-2 connectors, plus the * assorted fractional costs, without going to cost 3.0, which * is used only during panic-parsing. * XXX In the long run, this should be fetched from the dictionary * (and should probably not be a parse option). */ po->disjunct_cost = 2.7; po->min_null_count = 0; po->max_null_count = 0; po->islands_ok = false; po->use_sat_solver = false; po->use_viterbi = false; po->linkage_limit = 100; #if defined HAVE_HUNSPELL || defined HAVE_ASPELL po->use_spell_guess = 7; #else po->use_spell_guess = 0; #endif /* defined HAVE_HUNSPELL || defined HAVE_ASPELL */ #ifdef XXX_USE_CORPUS /* Use the corpus cost model, if available. * It really does a better job at parse ranking. * Err .. sometimes ... */ po->cost_model.compare_fn = &CORP_compare_parse; po->cost_model.type = CORPUS; #else /* USE_CORPUS */ po->cost_model.compare_fn = &VDAL_compare_parse; po->cost_model.type = VDAL; #endif /* USE_CORPUS */ po->short_length = 16; po->all_short = false; po->perform_pp_prune = true; po->twopass_length = 30; po->repeatable_rand = true; po->resources = resources_create(); po->use_cluster_disjuncts = false; po->display_morphology = false; return po; }
/** * Create and initialize a Parse_Options object */ Parse_Options parse_options_create(void) { Parse_Options po; init_memusage(); po = (Parse_Options) xalloc(sizeof(struct Parse_Options_s)); /* Here's where the values are initialized */ po->verbosity = 1; po->debug = (char *)""; po->test = (char *)""; po->linkage_limit = 100; /* A cost of 2.7 allows the usual cost-2 connectors, plus the * assorted fractional costs, without going to cost 3.0, which * is used only during panic-parsing. */ po->disjunct_cost = 2.7; /* 3.0 is needed for Russian dicts */ po->min_null_count = 0; po->max_null_count = 0; po->islands_ok = false; po->use_spell_guess = 7; po->use_sat_solver = false; po->use_viterbi = false; #ifdef XXX_USE_CORPUS /* Use the corpus cost model, if available. * It really does a better job at parse ranking. * Err .. sometimes ... */ po->cost_model.compare_fn = &CORP_compare_parse; po->cost_model.type = CORPUS; #else /* USE_CORPUS */ po->cost_model.compare_fn = &VDAL_compare_parse; po->cost_model.type = VDAL; #endif /* USE_CORPUS */ po->short_length = 16; po->all_short = false; po->twopass_length = 30; po->repeatable_rand = true; po->resources = resources_create(); po->use_cluster_disjuncts = false; po->display_morphology = false; return po; }
/** * Use "string" as the input dictionary. All of the other parts, * including post-processing, affix table, etc, are NULL. * This routine is intended for unit-testing ONLY. */ Dictionary dictionary_create_from_utf8(const char * input) { Dictionary dictionary = NULL; char * lang; init_memusage(); lang = get_default_locale(); if (lang && *lang) { dictionary = dictionary_six_str(lang, input, "string", NULL, NULL, NULL, NULL); free(lang); } else { /* Default to en when locales are broken (e.g. WIN32) */ dictionary = dictionary_six_str("en", input, "string", NULL, NULL, NULL, NULL); } return dictionary; }
Dictionary dictionary_create_from_file(const char * lang) { Dictionary dictionary; init_memusage(); if (lang && *lang) { char * dict_name; char * pp_name; char * cons_name; char * affix_name; char * regex_name; dict_name = join_path(lang, "4.0.dict"); pp_name = join_path(lang, "4.0.knowledge"); cons_name = join_path(lang, "4.0.constituent-knowledge"); affix_name = join_path(lang, "4.0.affix"); regex_name = join_path(lang, "4.0.regex"); dictionary = dictionary_six(lang, dict_name, pp_name, cons_name, affix_name, regex_name); free(regex_name); free(affix_name); free(cons_name); free(pp_name); free(dict_name); } else { prt_error("Error: No language specified!"); dictionary = NULL; } return dictionary; }