Exemplo n.º 1
0
//
// this function reads words in the word file and try analyse them
//
void analyse_word_list(Dictionary* d,
			       U_FILE* words,
			       U_FILE* result,
			       U_FILE* debug,
			       U_FILE* new_unknown_words,
			       const Alphabet* alph,
			       const bool* prefix,const bool* suffix,
			       struct utags UTAG,
			       vector_ptr* rules,
			       vector_ptr* entries)
{
  u_printf("Analysing russian unknown words...\n");
  int n=0;
  int words_done = 0;
  Ustring* s=new_Ustring(MAX_WORD_LENGTH);
  while (EOF!=readline(s,words)) {
    if (!analyse_word(s->str,d,debug,result,prefix,suffix,alph,UTAG,rules,entries)) {
      // if the analysis has failed, we store the word in the new unknown word file
      u_fprintf(new_unknown_words,"%S\n",s->str);
    } else {
      n++;
    }
    if ( (++words_done % 10000) == 0)
      u_printf("%d words done", words_done);
  }
  free_Ustring(s);
  u_printf("%d words decomposed as compound words\n",n);
}
Exemplo n.º 2
0
//
// this function reads words in the word file and try analyse them
//
void analyse_word_list(const unsigned char* tableau_bin,
			       const struct INF_codes* inf,
			       U_FILE* words,
			       U_FILE* result,
			       U_FILE* debug,
			       U_FILE* new_unknown_words,
			       const Alphabet* alph,
			       const bool* prefix,const bool* suffix,
			       struct utags UTAG,
			       vector_ptr* rules,
			       vector_ptr* entries)
{
  unichar s[MAX_WORD_LENGTH];
  u_printf("Analysing russian unknown words...\n");
  int n=0;
  int words_done = 0;
  while (EOF!=u_fgets_limit2(s,MAX_WORD_LENGTH,words)) {
    if (!analyse_word(s,tableau_bin,debug,result,inf,prefix,suffix,alph,UTAG,rules,entries)) {
      // if the analysis has failed, we store the word in the new unknown word file
      u_fprintf(new_unknown_words,"%S\n",s);
    } else {
      n++;
    }
    if ( (++words_done % 10000) == 0)
      u_printf("%d words done", words_done);
  }
  u_printf("%d words decomposed as compound words\n",n);
}