/** * connector() -- make a node for a connector or dictionary word. * * Assumes the current token is a connector or dictionary word. */ static Exp * connector(Dictionary dict) { Exp * n; Dict_node *dn, *dn_head; int i; i = strlen(dict->token) - 1; /* this must be + or - if a connector */ if ((dict->token[i] != '+') && (dict->token[i] != '-')) { /* If we are here, token is a word */ dn_head = abridged_lookup_list(dict, dict->token); dn = dn_head; while ((dn != NULL) && (strcmp(dn->string, dict->token) != 0)) { dn = dn->right; } if (dn == NULL) { free_lookup_list(dn_head); dict_error(dict, "\nPerhaps missing + or - in a connector.\n" "Or perhaps you forgot the suffix on a word.\n" "Or perhaps a word is used before it is defined.\n"); return NULL; } n = make_unary_node(dict, dn->exp); free_lookup_list(dn_head); } else { /* If we are here, token is a connector */ if (!check_connector(dict, dict->token)) { return NULL; } n = Exp_create(dict); n->dir = dict->token[i]; dict->token[i] = '\0'; /* get rid of the + or - */ if (dict->token[0] == '@') { n->u.string = string_set_add(dict->token+1, dict->string_set); n->multi = TRUE; } else { n->u.string = string_set_add(dict->token, dict->string_set); n->multi = FALSE; } n->type = CONNECTOR_type; n->cost = 0.0f; } if (!link_advance(dict)) { exp_free(n); return NULL; } return n; }
const char * linkgrammar_get_dict_version(Dictionary dict) { static char * ver = NULL; char * p; Dict_node *dn; Exp *e; if (ver) return ver; /* The newer dictionaries should contain a macro of the form: * <dictionary-version-number>: V4v6v6+; * which would indicate dictionary verison 4.6.6 * Older dictionaries contain no version info. */ dn = dictionary_lookup_list(dict, "<dictionary-version-number>"); if (NULL == dn) return "[unknown]"; e = dn->exp; ver = strdup(&e->u.string[1]); p = strchr(ver, 'v'); while (p) { *p = '.'; p = strchr(p+1, 'v'); } free_lookup_list(dn); return ver; }
int boolean_dictionary_lookup(Dictionary dict, const char *s) { Dict_node *llist = dictionary_lookup_list(dict, s); int boool = (llist != NULL); free_lookup_list(llist); return boool; }
/** * dict_display_word_info() - display the information about the given word. */ void dict_display_word_info(Dictionary dict, const char * s) { Dict_node *dn, *dn_head; Disjunct * d1, * d2; int len; dn_head = dictionary_lookup_list(dict, s); if (dn_head == NULL) { printf(" \"%s\" matches nothing in the dictionary.\n", s); return; } printf("Matches:\n"); for (dn = dn_head; dn != NULL; dn = dn->right) { len = 0; d1 = build_disjuncts_for_dict_node(dn); for(d2 = d1 ; d2 != NULL; d2 = d2->next) { len++; } free_disjuncts(d1); printf(" "); left_print_string(stdout, dn->string, " "); printf(" %5d disjuncts ", len); if (dn->file != NULL) { printf("<%s>", dn->file->file); } printf("\n"); } free_lookup_list(dn_head); return; }
Dictionary dictionary_create_from_db(const char *lang) { char *dbname; const char * t; Dictionary dict; Dict_node *dict_node; dict = (Dictionary) xalloc(sizeof(struct Dictionary_s)); memset(dict, 0, sizeof(struct Dictionary_s)); dict->version = NULL; dict->num_entries = 0; dict->affix_table = NULL; dict->regex_root = NULL; /* Language and file-name stuff */ dict->string_set = string_set_create(); dict->lang = lang; t = strrchr (lang, '/'); if (t) dict->lang = string_set_add(t+1, dict->string_set); /* To disable spell-checking, just set the checker to NULL */ dict->spell_checker = spellcheck_create(dict->lang); dict->base_knowledge = NULL; dict->hpsg_knowledge = NULL; dbname = join_path (lang, "dict.db"); dict->name = string_set_add(dbname, dict->string_set); free(dbname); /* Set up the database */ dict->db_handle = object_open(dict->name, db_open, NULL); dict->lookup_list = db_lookup_list; dict->free_lookup = db_free_llist; dict->lookup = db_lookup; dict->close = db_close; /* Misc remaining common (generic) dict setup work */ dict->left_wall_defined = boolean_dictionary_lookup(dict, LEFT_WALL_WORD); dict->right_wall_defined = boolean_dictionary_lookup(dict, RIGHT_WALL_WORD); dict->empty_word_defined = boolean_dictionary_lookup(dict, EMPTY_WORD_MARK); dict->unknown_word_defined = boolean_dictionary_lookup(dict, UNKNOWN_WORD); dict->use_unknown_word = true; dict_node = dictionary_lookup_list(dict, UNLIMITED_CONNECTORS_WORD); if (dict_node != NULL) { dict->unlimited_connector_set = connector_set_create(dict_node->exp); } else { dict->unlimited_connector_set = NULL; } free_lookup_list(dict, dict_node); return dict; }
/** * insert_list() - * p points to a list of dict_nodes connected by their left pointers. * l is the length of this list (the last ptr may not be NULL). * It inserts the list into the dictionary. * It does the middle one first, then the left half, then the right. * * Note: I think this insert middle, then left, then right, has * its origins as a lame attempt to hack around the fact that the * resulting binary tree is rather badly unbalanced. This has been * fixed by using the DSW rebalancing algo. Now, that would seem * to render this crazy bisected-insertion algo obsoloete, but .. * oddly enough, it seems to make the DSW balancing go really fast! * Faster than a simple insertion. Go figure. I think this has * something to do with the fact that the dictionaries are in * alphabetical order! This subdivision helps randomize a bit. */ static void insert_list(Dictionary dict, Dict_node * p, int l) { Dict_node * dn, *dn_head, *dn_second_half; int k, i; /* length of first half */ if (l == 0) return; k = (l-1)/2; dn = p; for (i = 0; i < k; i++) { dn = dn->left; } /* dn now points to the middle element */ dn_second_half = dn->left; dn->left = dn->right = NULL; if (contains_underbar(dn->string)) { insert_idiom(dict, dn); } else if (is_idiom_word(dn->string)) { err_ctxt ec; ec.sent = NULL; err_msg(&ec, Warn, "Warning: Word \"%s\" found near line %d.\n" "\tWords ending \".Ix\" (x a number) are reserved for idioms.\n" "\tThis word will be ignored.\n", dn->string, dict->line_number); free_dict_node(dn); } else if ((dn_head = abridged_lookup_list(dict, dn->string)) != NULL) { Dict_node *dnx; err_ctxt ec; ec.sent = NULL; err_msg(&ec, Warn, "Warning: The word \"%s\" " "found near line %d of %s matches the following words:\n", dn->string, dict->line_number, dict->name); for (dnx = dn_head; dnx != NULL; dnx = dnx->right) { fprintf(stderr, "\t%s", dnx->string); } fprintf(stderr, "\n\tThis word will be ignored.\n"); free_lookup_list(dn_head); free_dict_node(dn); } else { dict->root = insert_dict(dict, dict->root, dn); dict->num_entries++; } insert_list(dict, p, k); insert_list(dict, dn_second_half, l-k-1); }
Dictionary dictionary_create_from_db(const char *lang) { char *dbname; const char * t; Dictionary dict; Dict_node *dict_node; dict = (Dictionary) xalloc(sizeof(struct Dictionary_s)); memset(dict, 0, sizeof(struct Dictionary_s)); /* Language and file-name stuff */ dict->string_set = string_set_create(); t = strrchr (lang, '/'); t = (NULL == t) ? lang : t+1; dict->lang = string_set_add(t, dict->string_set); lgdebug(D_USER_FILES, "Debug: Language: %s\n", dict->lang); /* To disable spell-checking, just set the checker to NULL */ dict->spell_checker = spellcheck_create(dict->lang); #if defined HAVE_HUNSPELL || defined HAVE_ASPELL if (NULL == dict->spell_checker) prt_error("Info: Spell checker disabled."); #endif dict->base_knowledge = NULL; dict->hpsg_knowledge = NULL; dbname = join_path (lang, "dict.db"); dict->name = string_set_add(dbname, dict->string_set); free(dbname); /* Set up the database */ dict->db_handle = object_open(dict->name, db_open, NULL); dict->lookup_list = db_lookup_list; dict->free_lookup = db_free_llist; dict->lookup = db_lookup; dict->close = db_close; /* Misc remaining common (generic) dict setup work */ dict->left_wall_defined = boolean_dictionary_lookup(dict, LEFT_WALL_WORD); dict->right_wall_defined = boolean_dictionary_lookup(dict, RIGHT_WALL_WORD); dict->empty_word_defined = boolean_dictionary_lookup(dict, EMPTY_WORD_MARK); dict->unknown_word_defined = boolean_dictionary_lookup(dict, UNKNOWN_WORD); dict->use_unknown_word = true; dict_node = dictionary_lookup_list(dict, UNLIMITED_CONNECTORS_WORD); if (dict_node != NULL) dict->unlimited_connector_set = connector_set_create(dict_node->exp); free_lookup_list(dict, dict_node); return dict; }
Sentence sentence_create(char *input_string, Dictionary dict) { Sentence sent; int i; free_lookup_list(); sent = (Sentence) xalloc(sizeof(struct Sentence_s)); sent->dict = dict; sent->length = 0; sent->num_linkages_found = 0; sent->num_linkages_alloced = 0; sent->num_linkages_post_processed = 0; sent->num_valid_linkages = 0; sent->link_info = NULL; sent->deletable = NULL; sent->effective_dist = NULL; sent->num_valid_linkages = 0; sent->null_count = 0; sent->parse_info = NULL; sent->string_set = string_set_create(); if (!separate_sentence(input_string, sent)) { string_set_delete(sent->string_set); xfree(sent, sizeof(struct Sentence_s)); return NULL; } sent->q_pruned_rules = FALSE; /* for post processing */ sent->is_conjunction = (char *) xalloc(sizeof(char)*sent->length); set_is_conjunction(sent); initialize_conjunction_tables(sent); for (i=0; i<sent->length; i++) { /* in case we free these before they set to anything else */ sent->word[i].x = NULL; sent->word[i].d = NULL; } if (!(dict->unknown_word_defined && dict->use_unknown_word)) { if (!sentence_in_dictionary(sent)) { sentence_delete(sent); return NULL; } } if (!build_sentence_expressions(sent)) { sentence_delete(sent); return NULL; } return sent; }
void sentence_delete(Sentence sent) { /*free_andlists(sent); */ free_sentence_disjuncts(sent); free_sentence_expressions(sent); string_set_delete(sent->string_set); free_parse_set(sent); free_post_processing(sent); post_process_close_sentence(sent->dict->postprocessor); free_lookup_list(); free_deletable(sent); free_effective_dist(sent); xfree(sent->is_conjunction, sizeof(char)*sent->length); xfree((char *) sent, sizeof(struct Sentence_s)); }
/** * build_idiom_word_name() -- return idiomized name of given string. * * Allocates string space and returns a pointer to it. * In this string is placed the idiomized name of the given string s. * This is the same as s, but with a postfix of ".Ix", where x is an * appropriate number. x is the minimum number that distinguishes * this word from others in the dictionary. */ static const char * build_idiom_word_name(Dictionary dict, const char * s) { char buff[2*MAX_WORD]; char *x; int count; Dict_node *dn = dictionary_lookup_list(dict, s); count = max_postfix_found(dn)+1; free_lookup_list(dn); x = buff; while((*s != '\0') && (*s != '.')) { *x = *s; x++; s++; } sprintf(x, ".I%d",count); return string_set_add(buff, dict->string_set); }
/** * build_word_expressions() -- build list of expressions for a word * * Looks up the word s in the dictionary. Returns NULL if it's not there. * If there, it builds the list of expressions for the word, and returns * a pointer to it. */ X_node * build_word_expressions(Sentence sent, const char * s) { Dict_node * dn, *dn_head; X_node * x, * y; dn_head = dictionary_lookup_list(sent->dict, s); dn = dn_head; x = NULL; while (dn != NULL) { y = (X_node *) xalloc(sizeof(X_node)); y->next = x; x = y; x->exp = copy_Exp(dn->exp); x->string = dn->string; dn = dn->right; } free_lookup_list (dn_head); return x; }
/** * dict_display_word_expr() - display the connector info for a given word. */ void dict_display_word_expr(Dictionary dict, const char * s) { Dict_node *dn, *dn_head; dn_head = dictionary_lookup_list(dict, s); if (dn_head == NULL) { printf(" \"%s\" matches nothing in the dictionary.\n", s); return; } printf("\nExpressions:\n"); for (dn = dn_head; dn != NULL; dn = dn->right) { printf(" "); left_print_string(stdout, dn->string, " "); print_expression(dn->exp); printf("\n\n"); } free_lookup_list(dn_head); return; }
/* The following function is dictionary_create with an extra paramater called "path". If this is non-null, then the path used to find the file is taken from that path. Otherwise the path is taken from the dict_name. This is only needed because an affix_file is opened by a recursive call to this function. */ static Dictionary internal_dictionary_create(char * dict_name, char * pp_name, char * cons_name, char * affix_name, char * path) { Dictionary dict; static int rand_table_inited=FALSE; Dict_node *dict_node; char * dictionary_path_name; dict = (Dictionary) xalloc(sizeof(struct Dictionary_s)); if (!rand_table_inited) { init_randtable(); rand_table_inited=TRUE; } dict->string_set = string_set_create(); dict->name = string_set_add(dict_name, dict->string_set); dict->num_entries = 0; dict->is_special = FALSE; dict->already_got_it = '\0'; dict->line_number = 1; dict->root = NULL; dict->word_file_header = NULL; dict->exp_list = NULL; dict->affix_table = NULL; /* *DS* remove this if (pp_name != NULL) { dict->post_process_filename = string_set_add(pp_name, dict->string_set); } else { dict->post_process_filename = NULL; } */ if (path != NULL) dictionary_path_name = path; else dictionary_path_name = dict_name; if (!open_dictionary(dictionary_path_name, dict)) { lperror(NODICT, dict_name); string_set_delete(dict->string_set); xfree(dict, sizeof(struct Dictionary_s)); return NULL; } if (!read_dictionary(dict)) { string_set_delete(dict->string_set); xfree(dict, sizeof(struct Dictionary_s)); return NULL; } dict->left_wall_defined = boolean_dictionary_lookup(dict, LEFT_WALL_WORD); dict->right_wall_defined = boolean_dictionary_lookup(dict, RIGHT_WALL_WORD); dict->postprocessor = post_process_open(dict->name, pp_name); dict->constituent_pp = post_process_open(dict->name, cons_name); dict->affix_table = NULL; if (affix_name != NULL) { dict->affix_table = internal_dictionary_create(affix_name, NULL, NULL, NULL, dict_name); if (dict->affix_table == NULL) { fprintf(stderr, "%s\n", lperrmsg); exit(-1); } } dict->unknown_word_defined = boolean_dictionary_lookup(dict, UNKNOWN_WORD); dict->use_unknown_word = TRUE; dict->capitalized_word_defined = boolean_dictionary_lookup(dict, PROPER_WORD); dict->pl_capitalized_word_defined = boolean_dictionary_lookup(dict, PL_PROPER_WORD); dict->hyphenated_word_defined = boolean_dictionary_lookup(dict, HYPHENATED_WORD); dict->number_word_defined = boolean_dictionary_lookup(dict, NUMBER_WORD); dict->ing_word_defined = boolean_dictionary_lookup(dict, ING_WORD); dict->s_word_defined = boolean_dictionary_lookup(dict, S_WORD); dict->ed_word_defined = boolean_dictionary_lookup(dict, ED_WORD); dict->ly_word_defined = boolean_dictionary_lookup(dict, LY_WORD); dict->max_cost = 1000; if ((dict_node = dictionary_lookup(dict, ANDABLE_CONNECTORS_WORD)) != NULL) { dict->andable_connector_set = connector_set_create(dict_node->exp); } else { dict->andable_connector_set = NULL; } if ((dict_node = dictionary_lookup(dict, UNLIMITED_CONNECTORS_WORD)) != NULL) { dict->unlimited_connector_set = connector_set_create(dict_node->exp); } else { dict->unlimited_connector_set = NULL; } free_lookup_list(); return dict; }
/** * read_entry() -- read one dictionary entry * Starting with the current token, parse one dictionary entry. * A single dictionary entry must have one and only one colon in it, * and is terminated by a semi-colon. * Add these words to the dictionary. */ static int read_entry(Dictionary dict) { Exp *n; int i; Dict_node *dn_new, *dnx, *dn = NULL; /* Reset multi-byte shift state every line. */ memset(&dict->mbss, 0, sizeof(dict->mbss)); while (!is_equal(dict, ':')) { if (dict->is_special) { dict_error(dict, "I expected a word but didn\'t get it."); return 0; } /* If it's a word-file name */ /* However, be careful to reject "/.v" which is the division symbol * used in equations (.v means verb-like) */ if ((dict->token[0] == '/') && (dict->token[1] != '.')) { dn = read_word_file(dict, dn, dict->token); if (dn == NULL) { err_ctxt ec; ec.sent = NULL; err_msg(&ec, Error, "Error opening word file %s\n", dict->token); return 0; } } else { dn_new = dict_node_new(); dn_new->left = dn; dn = dn_new; dn->file = NULL; dn->string = string_set_add(dict->token, dict->string_set); } /* Advance to next entry, unless error */ if (0 == link_advance(dict)) goto syntax_error; } /* pass the : */ if (!link_advance(dict)) { goto syntax_error; } n = expression(dict); if (n == NULL) { goto syntax_error; } if (!is_equal(dict, ';')) { dict_error(dict, "Expecting \";\" at the end of an entry."); goto syntax_error; } /* pass the ; */ if (!link_advance(dict)) { goto syntax_error; } /* At this point, dn points to a list of Dict_nodes connected by * their left pointers. These are to be inserted into the dictionary */ i = 0; for (dnx = dn; dnx != NULL; dnx = dnx->left) { dnx->exp = n; i++; } insert_list(dict, dn, i); return 1; syntax_error: free_lookup_list(dn); return 0; }