static Dictionary dictionary_six_str(const char * lang, const char * input, const char * dict_name, const char * pp_name, const char * cons_name, const char * affix_name, const char * regex_name) { const char * t; Dictionary dict; Dict_node *dict_node; dict = (Dictionary) xalloc(sizeof(struct Dictionary_s)); memset(dict, 0, sizeof(struct Dictionary_s)); /* Language and file-name stuff */ dict->string_set = string_set_create(); t = strrchr (lang, '/'); t = (NULL == t) ? lang : t+1; dict->lang = string_set_add(t, dict->string_set); lgdebug(D_USER_FILES, "Debug: Language: %s\n", dict->lang); dict->name = string_set_add(dict_name, dict->string_set); /* * A special setup per dictionary type. The check here assumes the affix * dictionary name contains "affix". FIXME: For not using this * assumption, the dictionary creating stuff needs a rearrangement. */ if (0 == strstr(dict->name, "affix")) { /* To disable spell-checking, just set the checker to NULL */ dict->spell_checker = spellcheck_create(dict->lang); #if defined HAVE_HUNSPELL || defined HAVE_ASPELL /* TODO: * 1. Set the spell option to 0, to signify no spell checking is done. * 2. On verbosity >= 1, add a detailed message on the reason. */ if (NULL == dict->spell_checker) prt_error("Info: Spell checker disabled."); #endif dict->insert_entry = insert_list; dict->lookup_list = lookup_list; dict->free_lookup = free_llist; dict->lookup = boolean_lookup; } else { /* * Affix dictionary. */ size_t i; dict->insert_entry = load_affix; dict->lookup = return_true; /* initialize the class table */ dict->afdict_class = malloc(sizeof(*dict->afdict_class) * ARRAY_SIZE(afdict_classname)); for (i = 0; i < ARRAY_SIZE(afdict_classname); i++) { dict->afdict_class[i].mem_elems = 0; dict->afdict_class[i].length = 0; dict->afdict_class[i].string = NULL; } } dict->affix_table = NULL; /* Read dictionary from the input string. */ dict->input = input; dict->pin = dict->input; if (!read_dictionary(dict)) { dict->pin = NULL; dict->input = NULL; goto failure; } dict->pin = NULL; dict->input = NULL; if (NULL == affix_name) { /* * The affix table is handled alone in this invocation. * Skip the rest of processing! * FIXME: The dictionary creating stuff needs a rearrangement. */ return dict; } /* If we don't have a locale per dictionary, the following * will also set the program's locale. */ dict->locale = linkgrammar_get_dict_locale(dict); set_utf8_program_locale(); #ifdef HAVE_LOCALE_T /* We have a locale per dictionary. */ if (NULL != dict->locale) dict->locale_t = newlocale_LC_CTYPE(dict->locale); /* If we didn't succeed to set the dictionary locale, the program will * SEGFAULT when it tries to use it with the isw*() functions. * So set it to the current program's locale as a last resort. */ if (NULL == dict->locale) { dict->locale = setlocale(LC_CTYPE, NULL); dict->locale_t = newlocale_LC_CTYPE(setlocale(LC_CTYPE, NULL)); prt_error("Warning: Couldn't set dictionary locale! " "Using current program locale %s", dict->locale); } /* If dict->locale is still not set, there is a bug. */ assert((locale_t)0 != dict->locale_t, "Dictionary locale is not set."); #else /* We don't have a locale per dictionary - but anyway make sure * dict->locale is consistent with the current program's locale, * and especially that it is not NULL. It still indicates the intended * locale of this dictionary and the locale of the compiled regexs. */ dict->locale = setlocale(LC_CTYPE, NULL); #endif /* HAVE_LOCALE_T */ dict->affix_table = dictionary_six(lang, affix_name, NULL, NULL, NULL, NULL); if (dict->affix_table == NULL) { prt_error("Error: Could not open affix file %s", affix_name); goto failure; } if (! afdict_init(dict)) goto failure; /* * Process the regex file. * We have to compile regexs using the dictionary locale, * so make a temporary locale swap. */ if (read_regex_file(dict, regex_name)) goto failure; const char *locale = setlocale(LC_CTYPE, NULL); locale = strdupa(locale); /* setlocale() uses static memory. */ setlocale(LC_CTYPE, dict->locale); lgdebug(+D_DICT, "Regexs locale %s\n", setlocale(LC_CTYPE, NULL)); if (compile_regexs(dict->regex_root, dict)) { locale = setlocale(LC_CTYPE, locale); goto failure; } locale = setlocale(LC_CTYPE, locale); assert(NULL != locale, "Cannot restore program locale\n"); #ifdef USE_CORPUS dict->corpus = lg_corpus_new(); #endif dict->left_wall_defined = boolean_dictionary_lookup(dict, LEFT_WALL_WORD); dict->right_wall_defined = boolean_dictionary_lookup(dict, RIGHT_WALL_WORD); dict->empty_word_defined = boolean_dictionary_lookup(dict, EMPTY_WORD_MARK); dict->base_knowledge = pp_knowledge_open(pp_name); dict->hpsg_knowledge = pp_knowledge_open(cons_name); dict->unknown_word_defined = boolean_dictionary_lookup(dict, UNKNOWN_WORD); dict->use_unknown_word = true; dict_node = dictionary_lookup_list(dict, UNLIMITED_CONNECTORS_WORD); if (dict_node != NULL) dict->unlimited_connector_set = connector_set_create(dict_node->exp); free_lookup(dict_node); return dict; failure: string_set_delete(dict->string_set); if (dict->affix_table) xfree(dict->affix_table, sizeof(struct Dictionary_s)); xfree(dict, sizeof(struct Dictionary_s)); return NULL; }
/** * Read dictionary entries from a wide-character string "input". * All other parts are read from files. */ static Dictionary dictionary_six_str(const char * lang, const char * input, const char * dict_name, const char * pp_name, const char * cons_name, const char * affix_name, const char * regex_name) { const char * t; Dictionary dict; Dict_node *dict_node; dict = (Dictionary) xalloc(sizeof(struct Dictionary_s)); memset(dict, 0, sizeof(struct Dictionary_s)); dict->num_entries = 0; dict->is_special = false; dict->already_got_it = '\0'; dict->line_number = 0; dict->root = NULL; dict->regex_root = NULL; dict->word_file_header = NULL; dict->exp_list = NULL; dict->affix_table = NULL; dict->recursive_error = false; dict->version = NULL; #ifdef HAVE_SQLITE dict->db_handle = NULL; #endif #ifdef USE_ANYSPLIT dict->anysplit = NULL; #endif /* Language and file-name stuff */ dict->string_set = string_set_create(); dict->lang = lang; t = strrchr (lang, '/'); if (t) dict->lang = string_set_add(t+1, dict->string_set); dict->name = string_set_add(dict_name, dict->string_set); /* * A special setup per dictionary type. The check here assumes the affix * dictionary name contains "affix". FIXME: For not using this * assumption, the dictionary creating stuff needs a rearrangement. */ if (0 == strstr(dict->name, "affix")) { /* To disable spell-checking, just set the checker to NULL */ dict->spell_checker = spellcheck_create(dict->lang); dict->insert_entry = insert_list; dict->lookup_list = lookup_list; dict->free_lookup = free_llist; dict->lookup = boolean_lookup; } else { /* * Affix dictionary. */ size_t i; dict->insert_entry = load_affix; dict->lookup = return_true; /* initialize the class table */ dict->afdict_class = malloc(sizeof(*dict->afdict_class) * NUMELEMS(afdict_classname)); for (i = 0; i < NUMELEMS(afdict_classname); i++) { dict->afdict_class[i].mem_elems = 0; dict->afdict_class[i].length = 0; dict->afdict_class[i].string = NULL; } } dict->affix_table = NULL; /* Read dictionary from the input string. */ dict->input = input; dict->pin = dict->input; if (!read_dictionary(dict)) { dict->pin = NULL; dict->input = NULL; goto failure; } dict->pin = NULL; dict->input = NULL; if (NULL == affix_name) { /* * The affix table is handled alone in this invocation. * Skip the rest of processing! * FIXME: The dictionary creating stuff needs a rearrangement. */ return dict; } dict->affix_table = dictionary_six(lang, affix_name, NULL, NULL, NULL, NULL); if (dict->affix_table == NULL) { prt_error("Error: Could not open affix file %s", affix_name); goto failure; } if (! afdict_init(dict)) goto failure; if (read_regex_file(dict, regex_name)) goto failure; if (compile_regexs(dict->regex_root, dict)) goto failure; #ifdef USE_CORPUS dict->corpus = lg_corpus_new(); #endif dict->left_wall_defined = boolean_dictionary_lookup(dict, LEFT_WALL_WORD); dict->right_wall_defined = boolean_dictionary_lookup(dict, RIGHT_WALL_WORD); dict->empty_word_defined = boolean_dictionary_lookup(dict, EMPTY_WORD_MARK); dict->base_knowledge = pp_knowledge_open(pp_name); dict->hpsg_knowledge = pp_knowledge_open(cons_name); dict->unknown_word_defined = boolean_dictionary_lookup(dict, UNKNOWN_WORD); dict->use_unknown_word = true; dict_node = dictionary_lookup_list(dict, UNLIMITED_CONNECTORS_WORD); if (dict_node != NULL) { dict->unlimited_connector_set = connector_set_create(dict_node->exp); } else { dict->unlimited_connector_set = NULL; } free_lookup(dict_node); return dict; failure: string_set_delete(dict->string_set); if (dict->affix_table) xfree(dict->affix_table, sizeof(struct Dictionary_s)); xfree(dict, sizeof(struct Dictionary_s)); return NULL; }