int main(int argc, char** argv) { int i; int al, wl; FILE * wrdlst; FILE * afflst; char *wf, *af; char * ap; char ts[MAX_LN_LEN]; (void)argc; /* first parse the command line options */ /* arg1 - munched wordlist, arg2 - affix file */ if (argv[1]) { wf = mystrdup(argv[1]); } else { fprintf(stderr,"correct syntax is:\n"); fprintf(stderr,"unmunch dic_file affix_file\n"); exit(1); } if (argv[2]) { af = mystrdup(argv[2]); } else { fprintf(stderr,"correct syntax is:\n"); fprintf(stderr,"unmunch dic_file affix_file\n"); exit(1); } /* open the affix file */ afflst = fopen(af,"r"); if (!afflst) { fprintf(stderr,"Error - could not open affix description file\n"); exit(1); } /* step one is to parse the affix file building up the internal affix data structures */ numpfx = 0; numsfx = 0; fullstrip = 0; if (parse_aff_file(afflst)) { fprintf(stderr,"Error - in affix file loading\n"); exit(1); } fclose(afflst); fprintf(stderr,"parsed in %d prefixes and %d suffixes\n",numpfx,numsfx); /* affix file is now parsed so create hash table of wordlist on the fly */ /* open the wordlist */ wrdlst = fopen(wf,"r"); if (!wrdlst) { fprintf(stderr,"Error - could not open word list file\n"); exit(1); } /* skip over the hash table size */ if (! fgets(ts, MAX_LN_LEN-1,wrdlst)) { fclose(wrdlst); return 2; } mychomp(ts); while (fgets(ts,MAX_LN_LEN-1,wrdlst)) { mychomp(ts); /* split each line into word and affix char strings */ ap = strchr(ts,'/'); if (ap) { *ap = '\0'; ap++; al = strlen(ap); } else { al = 0; ap = NULL; } wl = strlen(ts); numwords = 0; wlist[numwords].word = mystrdup(ts); wlist[numwords].pallow = 0; numwords++; if (al) expand_rootword(ts,wl,ap); for (i=0; i < numwords; i++) { fprintf(stdout,"%s\n",wlist[i].word); free(wlist[i].word); wlist[i].word = NULL; wlist[i].pallow = 0; } } fclose(wrdlst); return 0; }
int main(int argc, char** argv) { int i, j, k, n; int rl, p , nwl; int al; FILE * wrdlst; FILE * afflst; char *nword, *wf, *af; char as[(MAX_PREFIXES + MAX_SUFFIXES)]; char * ap; struct hentry * ep; struct hentry * ep1; struct affent * pfxp; struct affent * sfxp; /* first parse the command line options */ /* arg1 - wordlist, arg2 - affix file */ if (argv[1]) { wf = mystrdup(argv[1]); } else { fprintf(stderr,"correct syntax is:\n"); fprintf(stderr,"munch word_list_file affix_file\n"); exit(1); } if (argv[2]) { af = mystrdup(argv[2]); } else { fprintf(stderr,"correct syntax is:\n"); fprintf(stderr,"munch word_list_file affix_file\n"); exit(1); } /* open the affix file */ afflst = fopen(af,"r"); if (!afflst) { fprintf(stderr,"Error - could not open affix description file\n"); exit(1); } /* step one is to parse the affix file building up the internal affix data structures */ numpfx = 0; numsfx = 0; parse_aff_file(afflst); fclose(afflst); fprintf(stderr,"parsed in %d prefixes and %d suffixes\n",numpfx,numsfx); /* affix file is now parsed so create hash table of wordlist on the fly */ /* open the wordlist */ wrdlst = fopen(wf,"r"); if (!wrdlst) { fprintf(stderr,"Error - could not open word list file\n"); exit(1); } if (load_tables(wrdlst)) { fprintf(stderr,"Error building hash tables\n"); exit(1); } fclose(wrdlst); for (i=0; i< tablesize; i++) { ep = &tableptr[i]; if (ep->word == NULL) continue; for ( ; ep != NULL; ep = ep->next) { numroots = 0; aff_chk(ep->word,strlen(ep->word)); if (numroots) { /* now there might be a number of combinations */ /* of prefixes and suffixes that might match this */ /* word. So how to choose? As a first shot look */ /* for the shortest remaining root word to */ /* to maximize the combinatorial power */ /* but be careful, do not REQUIRE a specific combination */ /* of a prefix and a suffix to generate the word since */ /* that violates the rule that the root word with just */ /* the prefix or just the suffix must also exist in the */ /* wordlist as well */ /* in fact because of the cross product issue, this not a */ /* simple choice since some combinations of previous */ /* prefixes and new suffixes may not be valid. */ /* The only way to know is to simply try them all */ rl = 1000; p = -1; for (j = 0; j < numroots; j++){ /* first collect the root word info and build up */ /* the potential new affix string */ nword = (roots[j].hashent)->word; nwl = strlen(nword); *as = '\0'; al = 0; ap = as; if (roots[j].prefix) *ap++ = (roots[j].prefix)->achar; if (roots[j].suffix) *ap++ = (roots[j].suffix)->achar; if ((roots[j].hashent)->affstr) { strcpy(ap,(roots[j].hashent)->affstr); } else { *ap = '\0'; } al =strlen(as); /* now expand the potential affix string to generate */ /* all legal words and make sure they all exist in the */ /* word list */ numwords = 0; wlist[numwords].word = mystrdup(nword); wlist[numwords].pallow = 0; numwords++; n = 0; if (al) expand_rootword(nword,nwl,as,al); for (k=0; k<numwords; k++) { if (lookup(wlist[k].word)) n++; free(wlist[k].word); wlist[k].word = NULL; wlist[k].pallow = 0; } /* if all exist in word list then okay */ if (n == numwords) { if (nwl < rl) { rl = nwl; p = j; } } } if (p != -1) { ep1 = roots[p].hashent; pfxp = roots[p].prefix; sfxp = roots[p].suffix; ep1->keep = 1; if (pfxp != NULL) add_affix_char(ep1,pfxp->achar); if (sfxp != NULL) add_affix_char(ep1,sfxp->achar); } else { ep->keep = 1; } } else { ep->keep = 1; } } } /* now output only the words to keep along with affixes info */ /* first count how many words that is */ k = 0; for (i=0; i< tablesize; i++) { ep = &tableptr[i]; if (ep->word == NULL) continue; for ( ; ep != NULL; ep = ep->next) { if (ep->keep > 0) k++; } } fprintf(stdout,"%d\n",k); for (i=0; i< tablesize; i++) { ep = &tableptr[i]; if (ep->word == NULL) continue; for ( ; ep != NULL; ep = ep->next) { if (ep->keep > 0) { if (ep->affstr != NULL) { fprintf(stdout,"%s/%s\n",ep->word,ep->affstr); } else { fprintf(stdout,"%s\n",ep->word); } } } } return 0; }