static void load_affix(Dictionary afdict, Dict_node *dn, int l) { Dict_node * dnx = NULL; for (; NULL != dn; dn = dnx) { char *string; const char *con = word_only_connector(dn); if (NULL == con) { /* ??? should we support here more than one class? */ prt_error("Warning: Word \"%s\" found near line %d of %s.\n" "\tWord has more than one connector.\n" "\tThis word will be ignored.", dn->string, afdict->line_number, afdict->name); return; } /* The affix files serve a dual purpose: they indicate both * what a unit is, connector-wise, and what is strippable, as * a string. When the unit is an 'idiom' (i.e. two words, * e.g. base_pair or degrees_C) then only the first word can * be stripped away from a run-on expression (e.g. "86degrees C") */ if (contains_underbar(dn->string)) { char *p; string = strdup(dn->string); p = string+1; while (*p != '_' && *p != '\0') p++; *p = '\0'; } else { string = deinflect(dn->string); } affix_list_add(afdict, afdict_find(afdict, con, /*notify_err*/true), string); free(string); dnx = dn->left; xfree((char *)dn, sizeof(Dict_node)); } }
/** * Compare a portion of the tokenized string, starting at word_stat with length * of numchar, to the dictionary or affix class word that is defined in the * capture group whose info is pointed to by cgnump. * * FIXME: Return int instead of bool, see the comment at E1 below. */ static bool is_word(const char *word_start, int numchar, cgnum_t *cgnump) { Dictionary const dict = cgnump->dict; const char * const afclass = cgnump->afclass; const int lookup_mark_len = (NULL != cgnump->lookup_mark) ? strlen(cgnump->lookup_mark) : 0; char * const word = alloca(numchar+lookup_mark_len+1); #ifdef AFFIX_DICTIONARY_TREE const Dict_node *dn; #endif const Afdict_class *ac; size_t i; /* Append/prepend stem/infix marks. */ if (NULL == cgnump->lookup_mark) { strncpy(word, word_start, numchar); word[numchar] = '\0'; } else { switch (cgnump->lookup_mark_pos) { case 'p': /* prepend a mark */ strcpy(word, cgnump->lookup_mark); strncat(word, word_start, numchar); word[numchar+lookup_mark_len] = '\0'; break; case 'a': /* append a mark */ strncpy(word, word_start, numchar); strcpy(word+numchar, cgnump->lookup_mark); break; default: printf("is_word:E3('%x' %s)", cgnump->lookup_mark_pos, cgnump->lookup_mark); strncpy(word, word_start, numchar); word[numchar] = '\0'; } } lgdebug(7, "LOOKUP '%s' in %s: ", word, dict->name); if (0 == afclass) return boolean_dictionary_lookup(dict, word); /* We don't have for now a tree representation of the affix file, only lists */ #ifdef AFFIX_DICTIONARY_TREE dn = lookup_list(dict, word); printf("WORD %s afclass %s dn %p\n", word, afclass, dn); if (NULL == dn) return false; for (; NULL != dn; dn = dn->left) { const char *con = word_only_connector(dn); if (NULL == con) { /* Internal error - nothing else to do for now unless we don't * rerun bool, but return an int so -1 signifies an error. */ printf("is_word(%s):E1 ", word); } printf("CON '%s'\n", con); if (0 == strcmp(afclass, con)) return true; } #else /* Make it the hard way. */ ac = afdict_find(dict, afclass, /*notify_err*/false); if (NULL == ac) { /* Internal error - nothing else to do for now unless we don't * rerun bool, but return an int so -1 signifies an error. */ printf("is_word(%s):E2 ", word); } for (i = 0; i < ac->length; i++) { if (0 == strcmp(ac->string[i], word)) return true; } #endif return false; }