// // this function reads words in the word file and try analyse them // void analyse_word_list(Dictionary* d, U_FILE* words, U_FILE* result, U_FILE* debug, U_FILE* new_unknown_words, const Alphabet* alph, const bool* prefix,const bool* suffix, struct utags UTAG, vector_ptr* rules, vector_ptr* entries) { u_printf("Analysing russian unknown words...\n"); int n=0; int words_done = 0; Ustring* s=new_Ustring(MAX_WORD_LENGTH); while (EOF!=readline(s,words)) { if (!analyse_word(s->str,d,debug,result,prefix,suffix,alph,UTAG,rules,entries)) { // if the analysis has failed, we store the word in the new unknown word file u_fprintf(new_unknown_words,"%S\n",s->str); } else { n++; } if ( (++words_done % 10000) == 0) u_printf("%d words done", words_done); } free_Ustring(s); u_printf("%d words decomposed as compound words\n",n); }
// // this function reads words in the word file and try analyse them // void analyse_word_list(const unsigned char* tableau_bin, const struct INF_codes* inf, U_FILE* words, U_FILE* result, U_FILE* debug, U_FILE* new_unknown_words, const Alphabet* alph, const bool* prefix,const bool* suffix, struct utags UTAG, vector_ptr* rules, vector_ptr* entries) { unichar s[MAX_WORD_LENGTH]; u_printf("Analysing russian unknown words...\n"); int n=0; int words_done = 0; while (EOF!=u_fgets_limit2(s,MAX_WORD_LENGTH,words)) { if (!analyse_word(s,tableau_bin,debug,result,inf,prefix,suffix,alph,UTAG,rules,entries)) { // if the analysis has failed, we store the word in the new unknown word file u_fprintf(new_unknown_words,"%S\n",s); } else { n++; } if ( (++words_done % 10000) == 0) u_printf("%d words done", words_done); } u_printf("%d words decomposed as compound words\n",n); }