/** * insert_list() - * p points to a list of dict_nodes connected by their left pointers. * l is the length of this list (the last ptr may not be NULL). * It inserts the list into the dictionary. * It does the middle one first, then the left half, then the right. * * Note: I think this insert middle, then left, then right, has * its origins as a lame attempt to hack around the fact that the * resulting binary tree is rather badly unbalanced. This has been * fixed by using the DSW rebalancing algo. Now, that would seem * to render this crazy bisected-insertion algo obsoloete, but .. * oddly enough, it seems to make the DSW balancing go really fast! * Faster than a simple insertion. Go figure. I think this has * something to do with the fact that the dictionaries are in * alphabetical order! This subdivision helps randomize a bit. */ static void insert_list(Dictionary dict, Dict_node * p, int l) { Dict_node * dn, *dn_head, *dn_second_half; int k, i; /* length of first half */ if (l == 0) return; k = (l-1)/2; dn = p; for (i = 0; i < k; i++) { dn = dn->left; } /* dn now points to the middle element */ dn_second_half = dn->left; dn->left = dn->right = NULL; if (contains_underbar(dn->string)) { insert_idiom(dict, dn); } else if (is_idiom_word(dn->string)) { err_ctxt ec; ec.sent = NULL; err_msg(&ec, Warn, "Warning: Word \"%s\" found near line %d.\n" "\tWords ending \".Ix\" (x a number) are reserved for idioms.\n" "\tThis word will be ignored.\n", dn->string, dict->line_number); free_dict_node(dn); } else if ((dn_head = abridged_lookup_list(dict, dn->string)) != NULL) { Dict_node *dnx; err_ctxt ec; ec.sent = NULL; err_msg(&ec, Warn, "Warning: The word \"%s\" " "found near line %d of %s matches the following words:\n", dn->string, dict->line_number, dict->name); for (dnx = dn_head; dnx != NULL; dnx = dnx->right) { fprintf(stderr, "\t%s", dnx->string); } fprintf(stderr, "\n\tThis word will be ignored.\n"); free_lookup_list(dn_head); free_dict_node(dn); } else { dict->root = insert_dict(dict, dict->root, dn); dict->num_entries++; } insert_list(dict, p, k); insert_list(dict, dn_second_half, l-k-1); }
static void load_affix(Dictionary afdict, Dict_node *dn, int l) { Dict_node * dnx = NULL; for (; NULL != dn; dn = dnx) { char *string; const char *con = word_only_connector(dn); if (NULL == con) { /* ??? should we support here more than one class? */ prt_error("Warning: Word \"%s\" found near line %d of %s.\n" "\tWord has more than one connector.\n" "\tThis word will be ignored.", dn->string, afdict->line_number, afdict->name); return; } /* The affix files serve a dual purpose: they indicate both * what a unit is, connector-wise, and what is strippable, as * a string. When the unit is an 'idiom' (i.e. two words, * e.g. base_pair or degrees_C) then only the first word can * be stripped away from a run-on expression (e.g. "86degrees C") */ if (contains_underbar(dn->string)) { char *p; string = strdup(dn->string); p = string+1; while (*p != '_' && *p != '\0') p++; *p = '\0'; } else { string = deinflect(dn->string); } affix_list_add(afdict, afdict_find(afdict, con, /*notify_err*/true), string); free(string); dnx = dn->left; xfree((char *)dn, sizeof(Dict_node)); } }