/**
 * Returns 1 if the given .fst2 corresponds to a valid sentence automaton; 0
 * otherwise. Following conditions must be true:
 * 
 * 1) there must be only one graph
 * 2) it must be acyclic
 * 3) there must not be any <E> transition with an ouput
 * 4) <E> must the only tag without output
 * 5) all other tags must have an ouput of the form w x y z f g, with
 *    w and y being integers >=0, and x, z, f and g being integers >=-1 
 */
int valid_sentence_automaton_write_error(const VersatileEncodingConfig* vec,const char* name,U_FILE*) {
struct FST2_free_info fst2_free;
Fst2* fst2=load_abstract_fst2(vec,name,0,&fst2_free);
if (fst2==NULL) return 0;
/* Condition 1 */
if (fst2->number_of_graphs!=1) {
   free_abstract_Fst2(fst2,&fst2_free);
   return 0;
}
/* Condition 2 */
if (!is_acyclic(fst2,1)) {
   free_abstract_Fst2(fst2,&fst2_free);
   return 0;
}
/* Conditions 3, 4 & 5 */
if (!valid_outputs(fst2)) {
   free_abstract_Fst2(fst2,&fst2_free);
   return 0;
}
/* Victory! */
return 1;
}
//
// this function constructs a token tree from a normalization grammar
// tokens are represented by strings
//
struct normalization_tree* load_normalization_transducer_string(const VersatileEncodingConfig* vec,const char* name) {
struct FST2_free_info fst2_free;
Fst2* automate=load_abstract_fst2(vec,name,0,&fst2_free);
if (automate==NULL) {
   // if the loading of the normalization transducer has failed, we return
   return NULL;
}
struct normalization_tree* root=new_normalization_tree();
unichar a[1];
a[0]='\0';
explorer_automate_normalization_string(automate,automate->initial_states[1],root,a);
free_abstract_Fst2(automate,&fst2_free);
return root;
}
Exemple #3
0
int main_fst2txt(struct fst2txt_parameters* p) {
    p->f_input=u_fopen_existing_versatile_encoding(p->mask_encoding_compatibility_input,p->text_file,U_READ);
    if (p->f_input==NULL) {
        error("Cannot open file %s\n",p->text_file);
        return 1;
    }

    p->text_buffer=new_buffer_for_file(UNICHAR_BUFFER,p->f_input,CAPACITY_LIMIT);
    p->buffer=p->text_buffer->unichar_buffer;

    p->f_output=u_fopen_creating_versatile_encoding(p->encoding_output,p->bom_output,p->temp_file,U_WRITE);
    if (p->f_output==NULL) {
        error("Cannot open temporary file %s\n",p->temp_file);
        u_fclose(p->f_input);
        return 1;
    }

    p->fst2=load_abstract_fst2(p->fst_file,1,NULL);
    if (p->fst2==NULL) {
        error("Cannot load grammar %s\n",p->fst_file);
        u_fclose(p->f_input);
        u_fclose(p->f_output);
        return 1;
    }

    if (p->alphabet_file!=NULL && p->alphabet_file[0]!='\0') {
       p->alphabet=load_alphabet(p->alphabet_file);
       if (p->alphabet==NULL) {
          error("Cannot load alphabet file %s\n",p->alphabet_file);
          u_fclose(p->f_input);
          u_fclose(p->f_output);
          free_abstract_Fst2(p->fst2,NULL);
          return 1;
       }
    }

    u_printf("Applying %s in %s mode...\n",p->fst_file,(p->output_policy==MERGE_OUTPUTS)?"merge":"replace");
    build_state_token_trees(p);
    parse_text(p);
    u_fclose(p->f_input);
    u_fclose(p->f_output);
    af_remove(p->text_file);
    af_rename(p->temp_file,p->text_file);
    u_printf("Done.\n");
    return 0;
}
/**
 * This function constructs and returns a token tree from a normalization grammar.
 * Tokens are represented by integers.
 */
struct normalization_tree* load_normalization_fst2(const VersatileEncodingConfig* vec,const char* grammar,
		const Alphabet* alph,struct text_tokens* tok) {
struct FST2_free_info fst2_free;
Fst2* fst2=load_abstract_fst2(vec,grammar,0,&fst2_free);
if (fst2==NULL) {
   return NULL;
}
struct string_hash* hash=new_string_hash(DONT_USE_VALUES);
/* We create the token tree to speed up the consultation */
for (int i=0;i<tok->N;i++) {
   get_value_index(tok->token[i],hash);
}
struct normalization_tree* root=new_normalization_tree();
explore_normalization_fst2(fst2,fst2->initial_states[1],root,hash,U_EMPTY,alph,NULL);
free_abstract_Fst2(fst2,&fst2_free);
free_string_hash(hash);
return root;
}
Exemple #5
0
/**
 * Frees the given structure
 */
void free_fst2txt_parameters(struct fst2txt_parameters* p) {
if (p==NULL) return;
free(p->text_file);
free(p->temp_file);
free(p->fst_file);
free(p->alphabet_file);
for (int i=0;i<p->n_token_trees;i++) {
   free_fst2txt_token_tree(p->token_tree[i]);
}
if (p->token_tree!=NULL) {
   free(p->token_tree);
}
free_Variables(p->variables);
free_buffer(p->text_buffer);
free_abstract_Fst2(p->fst2,NULL);
free_alphabet(p->alphabet);
free_stack_unichar(p->stack);
free(p);
}
Exemple #6
0
int display_fst2_file_stat(const char* name,U_FILE*ferr)
{
struct FST2_free_info fst2_free;
Fst2* fst2=load_abstract_fst2(name,1,&fst2_free);
char name_without_path[FILENAME_MAX];
remove_path(name,name_without_path);
if (fst2==NULL) {
	error("Cannot load graph %s\n",name);
    if (ferr != NULL)
        u_fprintf(ferr,"Cannot load graph %s\n",name);
    return 0;
}
u_printf("Statistics of graph %s: ",name_without_path);
if (ferr != NULL)
  u_fprintf(ferr,"Statistics of graph %s: ",name_without_path);
display_fst2_stat(fst2,ferr);

free_abstract_Fst2(fst2,&fst2_free);
return 1;
}
int main_Flatten(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

int RTN=1;
int depth=10;
VersatileEncodingConfig vec=VEC_DEFAULT;
int val,index=-1;
char foo;
bool only_verify_arguments = false;
UnitexGetOpt options;

while (EOF!=(val=options.parse_long(argc,argv,optstring_Flatten,lopts_Flatten,&index))) {
   switch(val) {
   case 'f': RTN=0; break;
   case 'r': RTN=1; break;
   case 'd': if (1!=sscanf(options.vars()->optarg,"%d%c",&depth,&foo) || depth<=0) {
                /* foo is used to check that the depth is not like "45gjh" */
                error("Invalid depth argument: %s\n",options.vars()->optarg);
                return USAGE_ERROR_CODE;
             }
             break;
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case 'h': usage(); return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_Flatten[index].name);
             return USAGE_ERROR_CODE;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
             break;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

u_printf("Loading %s...\n",argv[options.vars()->optind]);
struct FST2_free_info fst2_free;
Fst2* origin=load_abstract_fst2(&vec,argv[options.vars()->optind],1,&fst2_free);
if (origin==NULL) {
   error("Cannot load %s\n",argv[options.vars()->optind]);
   return DEFAULT_ERROR_CODE;
}

char temp[FILENAME_MAX];
strcpy(temp,argv[options.vars()->optind]);
strcat(temp,".tmp.fst2");

switch (flatten_fst2(origin,depth,temp,&vec,RTN)) {
   case EQUIVALENT_FST:
      u_printf("The resulting grammar is an equivalent finite-state transducer.\n");
      break;
   case APPROXIMATIVE_FST:
      u_printf("The resulting grammar is a finite-state approximation.\n");
      break;
   case EQUIVALENT_RTN:
      u_printf("The resulting grammar is an equivalent FST2 (RTN).\n");
      break;
   default: 
      error("Internal state error in Flatten's main\n");
      free_abstract_Fst2(origin,&fst2_free);
      return DEFAULT_ERROR_CODE;
}

free_abstract_Fst2(origin,&fst2_free);
af_remove(argv[options.vars()->optind]);
af_rename(temp,argv[options.vars()->optind]);

return SUCCESS_RETURN_CODE;
}
int main_MultiFlex(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

char output[FILENAME_MAX]="";
char config_dir[FILENAME_MAX]="";
char alphabet[FILENAME_MAX]="";
char pkgdir[FILENAME_MAX]="";
char* named=NULL;
int is_korean=0;
// default policy is to compile only out of date graphs
GraphRecompilationPolicy graph_recompilation_policy = ONLY_OUT_OF_DATE;
//Current language's alphabet
int error_check_status=SIMPLE_AND_COMPOUND_WORDS;
VersatileEncodingConfig vec=VEC_DEFAULT;
int val,index=-1;
bool only_verify_arguments = false;
UnitexGetOpt options;
while (EOF!=(val=options.parse_long(argc,argv,optstring_MultiFlex,lopts_MultiFlex,&index))) {
   switch(val) {
   case 'o': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty DELAF file name\n");
                free(named);
                return USAGE_ERROR_CODE;
             }
             strcpy(output,options.vars()->optarg);
             break;
   case 'a': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty alphabet file name\n");
                free(named);
                return USAGE_ERROR_CODE;
             }
             strcpy(alphabet,options.vars()->optarg);
             break;
   case 'd': strcpy(config_dir,options.vars()->optarg); break;
   case 'K': is_korean=1;
             break;
   case 's': error_check_status=ONLY_SIMPLE_WORDS; break;
   case 'c': error_check_status=ONLY_COMPOUND_WORDS; break;
   case 'f': graph_recompilation_policy = ALWAYS_RECOMPILE; break;
   case 'n': graph_recompilation_policy = NEVER_RECOMPILE;  break;
   case 't': graph_recompilation_policy = ONLY_OUT_OF_DATE; break;
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                free(named);
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                free(named);
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case 'p': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty package directory name\n");
                free(named);
                return USAGE_ERROR_CODE;
             }
             strcpy(pkgdir,options.vars()->optarg);
             break;
   case 'r': if (named==NULL) {
                  named=strdup(options.vars()->optarg);
                  if (named==NULL) {
                     alloc_error("main_Grf2Fst2");
                     return ALLOC_ERROR_CODE;
                  }
             } else {
                   char* more_names = (char*)realloc((void*)named,strlen(named)+strlen(options.vars()->optarg)+2);
                 if (more_names) {
                  named = more_names;
                 } else {
                  alloc_error("main_MultiFlex");
                  free(named);
                  return ALLOC_ERROR_CODE;
                 }
                 strcat(named,";");
                 strcat(named,options.vars()->optarg);
             }
             break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage();
             free(named);
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_MultiFlex[index].name);
             free(named);
             return USAGE_ERROR_CODE;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             free(named);
             return USAGE_ERROR_CODE;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   free(named);
   return USAGE_ERROR_CODE;
}

if (output[0]=='\0') {
   error("You must specify the output DELAF name\n");
   free(named);
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  free(named);
  return SUCCESS_RETURN_CODE;
}

//Load morphology description
char morphology[FILENAME_MAX];
new_file(config_dir,"Morphology.txt",morphology);
//int config_files_status=CONFIG_FILES_OK;
Alphabet* alph=NULL;
if (alphabet[0]!='\0') {
   //Load alphabet
   alph=load_alphabet(&vec,alphabet,1);  //To be done once at the beginning of the inflection
   if (alph==NULL) {
      error("Cannot open alphabet file %s\n",alphabet);
      free(named);
      return DEFAULT_ERROR_CODE;
   }
}
//Init equivalence files
char equivalences[FILENAME_MAX];
new_file(config_dir,"Equivalences.txt",equivalences);

/* Korean */
Korean* korean=NULL;
if (is_korean) {
   if (alph==NULL) {
      error("Cannot initialize Korean data with a NULL alphabet\n");
      free(named);
      return DEFAULT_ERROR_CODE;
   }
    korean=new Korean(alph);
}
MultiFlex_ctx* p_multiFlex_ctx=new_MultiFlex_ctx(config_dir,
                                                 morphology,
                                                 equivalences,
                                                 &vec,
                                                 korean,
                                                 pkgdir,
                                                 named,
                                                 graph_recompilation_policy);

//DELAC inflection
int return_value = inflect(argv[options.vars()->optind],output,p_multiFlex_ctx,alph,error_check_status);

free(named);

for (int count_free_fst2=0;count_free_fst2<p_multiFlex_ctx->n_fst2;count_free_fst2++) {
    free_abstract_Fst2(p_multiFlex_ctx->fst2[count_free_fst2],&(p_multiFlex_ctx->fst2_free[count_free_fst2]));
    p_multiFlex_ctx->fst2[count_free_fst2] = NULL;
}

free_alphabet(alph);

free_MultiFlex_ctx(p_multiFlex_ctx);

if (korean!=NULL) {
    delete korean;
}

u_printf("Done.\n");
return return_value;
}
Exemple #9
0
int locate_pattern(const char* text_cod,const char* tokens,const char* fst2_name,const char* dlf,const char* dlc,const char* err,
                   const char* alphabet,MatchPolicy match_policy,OutputPolicy output_policy,
                   Encoding encoding_output,int bom_output,int mask_encoding_compatibility_input,
                   const char* dynamicDir,TokenizationPolicy tokenization_policy,
                   SpacePolicy space_policy,int search_limit,const char* morpho_dic_list,
                   AmbiguousOutputPolicy ambiguous_output_policy,
                   VariableErrorPolicy variable_error_policy,int protect_dic_chars,
                   int is_korean,int max_count_call,int max_count_call_warning,
                   char* arabic_rules,int tilde_negation_operator,int useLocateCache,int allow_trace) {

    U_FILE* out;
    U_FILE* info;
    struct locate_parameters* p=new_locate_parameters();
    p->text_cod=af_open_mapfile(text_cod,MAPFILE_OPTION_READ,0);
    p->buffer=(int*)af_get_mapfile_pointer(p->text_cod);
    long text_size=(long)af_get_mapfile_size(p->text_cod)/sizeof(int);
    p->buffer_size=(int)text_size;
    p->tilde_negation_operator=tilde_negation_operator;
    p->useLocateCache=useLocateCache;
    if (max_count_call == -1) {
        max_count_call = (int)text_size;
    }
    if (max_count_call_warning == -1) {
        max_count_call_warning = (int)text_size;
    }
    p->match_policy=match_policy;
    p->tokenization_policy=tokenization_policy;
    p->space_policy=space_policy;
    p->output_policy=output_policy;
    p->search_limit=search_limit;
    p->ambiguous_output_policy=ambiguous_output_policy;
    p->variable_error_policy=variable_error_policy;
    p->protect_dic_chars=protect_dic_chars;
    p->mask_encoding_compatibility_input = mask_encoding_compatibility_input;
    p->max_count_call = max_count_call;
    p->max_count_call_warning = max_count_call_warning;
    p->token_filename = tokens;
    char concord[FILENAME_MAX];
    char concord_info[FILENAME_MAX];

    strcpy(concord,dynamicDir);
    strcat(concord,"concord.ind");

    strcpy(concord_info,dynamicDir);
    strcat(concord_info,"concord.n");

    char morpho_bin[FILENAME_MAX];
    strcpy(morpho_bin,dynamicDir);
    strcat(morpho_bin,"morpho.bin");
    if (arabic_rules!=NULL && arabic_rules[0]!='\0') {
        load_arabic_typo_rules(arabic_rules,&(p->arabic));
    }
    out=u_fopen_versatile_encoding(encoding_output,bom_output,mask_encoding_compatibility_input,concord,U_WRITE);
    if (out==NULL) {
        error("Cannot write %s\n",concord);
        af_release_mapfile_pointer(p->text_cod,p->buffer);
        af_close_mapfile(p->text_cod);
        free_stack_unichar(p->stack);
        free_locate_parameters(p);
        u_fclose(out);
        return 0;
    }
    info=u_fopen_versatile_encoding(encoding_output,bom_output,mask_encoding_compatibility_input,concord_info,U_WRITE);
    if (info==NULL) {
        error("Cannot write %s\n",concord_info);
    }
    switch(output_policy) {
    case IGNORE_OUTPUTS:
        u_fprintf(out,"#I\n");
        break;
    case MERGE_OUTPUTS:
        u_fprintf(out,"#M\n");
        break;
    case REPLACE_OUTPUTS:
        u_fprintf(out,"#R\n");
        break;
    }
    if (alphabet!=NULL && alphabet[0]!='\0') {
        u_printf("Loading alphabet...\n");
        p->alphabet=load_alphabet(alphabet,is_korean);
        if (p->alphabet==NULL) {
            error("Cannot load alphabet file %s\n",alphabet);
            af_release_mapfile_pointer(p->text_cod,p->buffer);
            af_close_mapfile(p->text_cod);
            free_stack_unichar(p->stack);
            free_locate_parameters(p);
            if (info!=NULL) u_fclose(info);
            u_fclose(out);
            return 0;
        }
    }
    struct string_hash* semantic_codes=new_string_hash();
    extract_semantic_codes(dlf,semantic_codes);
    extract_semantic_codes(dlc,semantic_codes);

    if (is_cancelling_requested() != 0) {
        error("user cancel request.\n");
        free_alphabet(p->alphabet);
        free_string_hash(semantic_codes);
        af_release_mapfile_pointer(p->text_cod,p->buffer);
        af_close_mapfile(p->text_cod);
        free_stack_unichar(p->stack);
        free_locate_parameters(p);
        if (info!=NULL) u_fclose(info);
        u_fclose(out);
        return 0;
    }

    u_printf("Loading fst2...\n");
    struct FST2_free_info fst2load_free;
    Fst2* fst2load=load_abstract_fst2(fst2_name,1,&fst2load_free);
    if (fst2load==NULL) {
        error("Cannot load grammar %s\n",fst2_name);
        free_alphabet(p->alphabet);
        free_string_hash(semantic_codes);
        af_release_mapfile_pointer(p->text_cod,p->buffer);
        af_close_mapfile(p->text_cod);
        free_stack_unichar(p->stack);
        free_locate_parameters(p);
        if (info!=NULL) u_fclose(info);
        u_fclose(out);
        return 0;
    }

    Abstract_allocator locate_abstract_allocator=create_abstract_allocator("locate_pattern",AllocatorCreationFlagAutoFreePrefered);


    p->fst2=new_Fst2_clone(fst2load,locate_abstract_allocator);
    free_abstract_Fst2(fst2load,&fst2load_free);

    if (is_cancelling_requested() != 0) {
        error("User cancel request..\n");
        free_alphabet(p->alphabet);
        free_string_hash(semantic_codes);
        free_Fst2(p->fst2,locate_abstract_allocator);
        close_abstract_allocator(locate_abstract_allocator);
        af_release_mapfile_pointer(p->text_cod,p->buffer);
        af_close_mapfile(p->text_cod);
        free_stack_unichar(p->stack);
        free_locate_parameters(p);
        if (info!=NULL) u_fclose(info);
        u_fclose(out);
        return 0;
    }

    p->tags=p->fst2->tags;
#ifdef TRE_WCHAR
    p->filters=new_FilterSet(p->fst2,p->alphabet);
    if (p->filters==NULL) {
        error("Cannot compile filter(s)\n");
        free_alphabet(p->alphabet);
        free_string_hash(semantic_codes);
        free_Fst2(p->fst2,locate_abstract_allocator);
        close_abstract_allocator(locate_abstract_allocator);
        free_stack_unichar(p->stack);
        free_locate_parameters(p);
        af_release_mapfile_pointer(p->text_cod,p->buffer);
        af_close_mapfile(p->text_cod);
        if (info!=NULL) u_fclose(info);
        u_fclose(out);
        return 0;
    }
#endif
    u_printf("Loading token list...\n");
    int n_text_tokens=0;

    p->tokens=load_text_tokens_hash(tokens,mask_encoding_compatibility_input,&(p->SENTENCE),&(p->STOP),&n_text_tokens);
    if (p->tokens==NULL) {
        error("Cannot load token list %s\n",tokens);
        free_alphabet(p->alphabet);
        free_string_hash(semantic_codes);
        free_Fst2(p->fst2,locate_abstract_allocator);
        close_abstract_allocator(locate_abstract_allocator);
        free_locate_parameters(p);
        af_release_mapfile_pointer(p->text_cod,p->buffer);
        af_close_mapfile(p->text_cod);
        if (info!=NULL) u_fclose(info);
        u_fclose(out);
        return 0;
    }
    Abstract_allocator locate_work_abstract_allocator = locate_abstract_allocator;

    p->match_cache=(LocateCache*)malloc_cb(p->tokens->size * sizeof(LocateCache),locate_work_abstract_allocator);
    memset(p->match_cache,0,p->tokens->size * sizeof(LocateCache));
    if (p->match_cache==NULL) {
        fatal_alloc_error("locate_pattern");
    }

#ifdef TRE_WCHAR
    p->filter_match_index=new_FilterMatchIndex(p->filters,p->tokens);
    if (p->filter_match_index==NULL) {
        error("Cannot optimize filter(s)\n");
        free_alphabet(p->alphabet);
        free_string_hash(semantic_codes);
        free_string_hash(p->tokens);
        close_abstract_allocator(locate_abstract_allocator);
        free_locate_parameters(p);
        af_release_mapfile_pointer(p->text_cod,p->buffer);
        af_close_mapfile(p->text_cod);
        if (info!=NULL) u_fclose(info);
        u_fclose(out);
        return 0;
    }
#endif

    if (allow_trace!=0) {
        open_locate_trace(p,&p->fnc_locate_trace_step,&p->private_param_locate_trace);
    }
    extract_semantic_codes_from_tokens(p->tokens,semantic_codes,locate_abstract_allocator);
    u_printf("Loading morphological dictionaries...\n");
    load_morphological_dictionaries(morpho_dic_list,p,morpho_bin);
    extract_semantic_codes_from_morpho_dics(p->morpho_dic_inf,p->n_morpho_dics,semantic_codes,locate_abstract_allocator);
    p->token_control=(unsigned char*)malloc(n_text_tokens*sizeof(unsigned char));
    if (p->token_control==NULL) {
        fatal_alloc_error("locate_pattern");
    }
    p->matching_patterns=(struct bit_array**)malloc(n_text_tokens*sizeof(struct bit_array*));
    if (p->matching_patterns==NULL) {
        fatal_alloc_error("locate_pattern");
    }
    for (int i=0; i<n_text_tokens; i++) {
        p->token_control[i]=0;
        p->matching_patterns[i]=NULL;
    }
    compute_token_controls(p->alphabet,err,p);
    int number_of_patterns,is_DIC,is_CDIC,is_SDIC;
    p->pattern_tree_root=new_pattern_node(locate_abstract_allocator);
    u_printf("Computing fst2 tags...\n");
    process_tags(&number_of_patterns,semantic_codes,&is_DIC,&is_CDIC,&is_SDIC,p,locate_abstract_allocator);
    p->current_compound_pattern=number_of_patterns;
    p->DLC_tree=new_DLC_tree(p->tokens->size);
    struct lemma_node* root=new_lemma_node();
    u_printf("Loading dlf...\n");
    load_dic_for_locate(dlf,mask_encoding_compatibility_input,p->alphabet,number_of_patterns,is_DIC,is_CDIC,root,p);
    u_printf("Loading dlc...\n");
    load_dic_for_locate(dlc,mask_encoding_compatibility_input,p->alphabet,number_of_patterns,is_DIC,is_CDIC,root,p);
    /* We look if tag tokens like "{today,.ADV}" verify some patterns */
    check_patterns_for_tag_tokens(p->alphabet,number_of_patterns,root,p,locate_abstract_allocator);
    u_printf("Optimizing fst2 pattern tags...\n");
    optimize_pattern_tags(p->alphabet,root,p,locate_abstract_allocator);
    u_printf("Optimizing compound word dictionary...\n");
    optimize_DLC(p->DLC_tree);
    free_string_hash(semantic_codes);
    int nb_input_variable=0;
    p->input_variables=new_Variables(p->fst2->input_variables,&nb_input_variable);
    p->output_variables=new_OutputVariables(p->fst2->output_variables,&p->nb_output_variables);


    Abstract_allocator locate_recycle_abstract_allocator=NULL;
    locate_recycle_abstract_allocator=create_abstract_allocator("locate_pattern_recycle",
                                      AllocatorFreeOnlyAtAllocatorDelete|AllocatorTipOftenRecycledObject,
                                      get_prefered_allocator_item_size_for_nb_variable(nb_input_variable));

    u_printf("Optimizing fst2...\n");
    p->optimized_states=build_optimized_fst2_states(p->input_variables,p->output_variables,p->fst2,locate_abstract_allocator);
    if (is_korean) {
        p->korean=new Korean(p->alphabet);
        p->jamo_tags=create_jamo_tags(p->korean,p->tokens);
    }
    p->failfast=new_bit_array(n_text_tokens,ONE_BIT);

    u_printf("Working...\n");
    p->prv_alloc=locate_work_abstract_allocator;
    p->prv_alloc_recycle=locate_recycle_abstract_allocator;
    launch_locate(out,text_size,info,p);
    if (allow_trace!=0) {
        close_locate_trace(p,p->fnc_locate_trace_step,p->private_param_locate_trace);
    }
    free_bit_array(p->failfast);
    free_Variables(p->input_variables);
    free_OutputVariables(p->output_variables);
    af_release_mapfile_pointer(p->text_cod,p->buffer);
    af_close_mapfile(p->text_cod);
    if (info!=NULL) u_fclose(info);
    u_fclose(out);

    if (p->match_cache!=NULL) {
        for (int i=0; i<p->tokens->size; i++) {
            free_LocateCache(p->match_cache[i],locate_work_abstract_allocator);
        }
        free_cb(p->match_cache,locate_work_abstract_allocator);
    }
    int free_abstract_allocator_item=(get_allocator_cb_flag(locate_abstract_allocator) & AllocatorGetFlagAutoFreePresent) ? 0 : 1;

    if (free_abstract_allocator_item) {
        free_optimized_states(p->optimized_states,p->fst2->number_of_states,locate_abstract_allocator);
    }
    free_stack_unichar(p->stack);
    /** Too long to free the DLC tree if it is big
     * free_DLC_tree(p->DLC_tree);
     */
    if (free_abstract_allocator_item) {
        free_pattern_node(p->pattern_tree_root,locate_abstract_allocator);
        free_Fst2(p->fst2,locate_abstract_allocator);
        free_list_int(p->tag_token_list,locate_abstract_allocator);
    }
    close_abstract_allocator(locate_abstract_allocator);
    close_abstract_allocator(locate_recycle_abstract_allocator);
    locate_recycle_abstract_allocator=locate_abstract_allocator=NULL;

    /* We don't free 'parameters->tags' because it was just a link on 'parameters->fst2->tags' */
    free_alphabet(p->alphabet);
    if (p->korean!=NULL) {
        delete p->korean;
    }
    if (p->jamo_tags!=NULL) {
        /* jamo tags must be freed before tokens, because we need to know how
         * many jamo tags there are, and this number is the number of tokens */
        for (int i=0; i<p->tokens->size; i++) {
            free(p->jamo_tags[i]);
        }
        free(p->jamo_tags);
    }
    free_string_hash(p->tokens);
    free_lemma_node(root);
    free(p->token_control);
    for (int i=0; i<n_text_tokens; i++) {
        free_bit_array(p->matching_patterns[i]);
    }
    free(p->matching_patterns);
#ifdef TRE_WCHAR
    free_FilterSet(p->filters);
    free_FilterMatchIndex(p->filter_match_index);
#endif
    for (int i=0; i<p->n_morpho_dics; i++) {
        free_abstract_INF(p->morpho_dic_inf[i],&(p->morpho_dic_inf_free[i]));
        free_abstract_BIN(p->morpho_dic_bin[i],&(p->morpho_dic_bin_free[i]));
    }
    free(p->morpho_dic_inf);
    free(p->morpho_dic_inf_free);
    free(p->morpho_dic_bin);
    free(p->morpho_dic_bin_free);
#if (defined(UNITEX_LIBRARY) || defined(UNITEX_RELEASE_MEMORY_AT_EXIT))
    free_DLC_tree(p->DLC_tree);
#endif
    free_locate_parameters(p);
    u_printf("Done.\n");
    return 1;
}
int main_RebuildTfst(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

VersatileEncodingConfig vec=VEC_DEFAULT;
int val, index=-1;
bool only_verify_arguments = false;
UnitexGetOpt options;
int save_statistics=1;
while (EOF!=(val=options.parse_long(argc,argv,optstring_RebuildTfst,lopts_RebuildTfst,&index))) {
   switch (val) {
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case 'S': save_statistics = 0;
             break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h':
      usage();
      return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n", options.vars()->optopt) :
                         error("Missing argument for option --%s\n", lopts_RebuildTfst[index].name);
     return USAGE_ERROR_CODE;
   case '?': index==-1 ? error("Invalid option -%c\n", options.vars()->optopt) :
                         error("Invalid option --%s\n", options.vars()->optarg);
     return USAGE_ERROR_CODE;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

char input_tfst[FILENAME_MAX];
char input_tind[FILENAME_MAX];
strcpy(input_tfst,argv[options.vars()->optind]);
remove_extension(input_tfst,input_tind);
strcat(input_tind,".tind");

u_printf("Loading %s...\n",input_tfst);

Tfst* tfst = open_text_automaton(&vec,input_tfst);
if (tfst==NULL) {
   error("Unable to load %s automaton\n",input_tfst);
   return DEFAULT_ERROR_CODE;
}

char basedir[FILENAME_MAX];
get_path(input_tfst,basedir);
char output_tfst[FILENAME_MAX];
sprintf(output_tfst, "%s.new.tfst",input_tfst);
char output_tind[FILENAME_MAX];
sprintf(output_tind, "%s.new.tind",input_tfst);

U_FILE* f_tfst;
if ((f_tfst = u_fopen(&vec,output_tfst,U_WRITE)) == NULL) {
   error("Unable to open %s for writing\n", output_tfst);
   close_text_automaton(tfst);
   return DEFAULT_ERROR_CODE;
}

U_FILE* f_tind;
if ((f_tind = u_fopen(BINARY,output_tind,U_WRITE)) == NULL) {
   u_fclose(f_tfst);
   close_text_automaton(tfst);
   error("Unable to open %s for writing\n", output_tind);
   return DEFAULT_ERROR_CODE;
}
/* We use this hash table to rebuild files tfst_tags_by_freq/alph.txt */
struct hash_table* form_frequencies=new_hash_table((HASH_FUNCTION)hash_unichar,(EQUAL_FUNCTION)u_equal,
        (FREE_FUNCTION)free,NULL,(KEYCOPY_FUNCTION)keycopy);

u_fprintf(f_tfst,"%010d\n",tfst->N);
for (int i = 1; i <= tfst->N; i++) {
   if ((i % 100) == 0) {
      u_printf("%d/%d sentences rebuilt...\n", i, tfst->N);
   }
   load_sentence(tfst,i);

   char grfname[FILENAME_MAX];
   sprintf(grfname, "%ssentence%d.grf", basedir, i);
   unichar** tags=NULL;
   int n_tags=-1;
   if (fexists(grfname)) {
      /* If there is a .grf for the current sentence, then we must
       * take it into account */
      if (0==pseudo_main_Grf2Fst2(&vec,grfname,0,NULL,1,1,NULL,NULL,0)) {
         /* We proceed only if the graph compilation was a success */
         char fst2name[FILENAME_MAX];
         sprintf(fst2name, "%ssentence%d.fst2", basedir, i);
         struct FST2_free_info fst2_free;
         Fst2* fst2=load_abstract_fst2(&vec,fst2name,0,&fst2_free);
         af_remove(fst2name);
         free_SingleGraph(tfst->automaton,NULL);
         tfst->automaton=create_copy_of_fst2_subgraph(fst2,1);
         tags=create_tfst_tags(fst2,&n_tags);
         free_abstract_Fst2(fst2,&fst2_free);
      } else {
         error("Error: %s is not a valid sentence automaton\n",grfname);
      }
   }
   save_current_sentence(tfst,f_tfst,f_tind,tags,n_tags,form_frequencies);
   if (tags!=NULL) {
      /* If necessary, we free the tags we created */
      for (int count_tags=0;count_tags<n_tags;count_tags++) {
         free(tags[count_tags]);
      }
      free(tags);
   }
}

u_printf("Text automaton rebuilt.\n");

u_fclose(f_tind);
u_fclose(f_tfst);
close_text_automaton(tfst);

/* Finally, we save statistics */
if (save_statistics) {
    char tfst_tags_by_freq[FILENAME_MAX];
    char tfst_tags_by_alph[FILENAME_MAX];
    strcpy(tfst_tags_by_freq, basedir);
    strcat(tfst_tags_by_freq, "tfst_tags_by_freq.txt");
    strcpy(tfst_tags_by_alph, basedir);
    strcat(tfst_tags_by_alph, "tfst_tags_by_alph.txt");
    U_FILE* f_tfst_tags_by_freq = u_fopen(&vec, tfst_tags_by_freq, U_WRITE);
    if (f_tfst_tags_by_freq == NULL) {
        error("Cannot open %s\n", tfst_tags_by_freq);
    }
    U_FILE* f_tfst_tags_by_alph = u_fopen(&vec, tfst_tags_by_alph, U_WRITE);
    if (f_tfst_tags_by_alph == NULL) {
        error("Cannot open %s\n", tfst_tags_by_alph);
    }
    sort_and_save_tfst_stats(form_frequencies, f_tfst_tags_by_freq, f_tfst_tags_by_alph);
    u_fclose(f_tfst_tags_by_freq);
    u_fclose(f_tfst_tags_by_alph);
}
free_hash_table(form_frequencies);

/* make a backup and replace old automaton with new */
char backup_tfst[FILENAME_MAX];
char backup_tind[FILENAME_MAX];
sprintf(backup_tfst,"%s.bck",input_tfst);
sprintf(backup_tind,"%s.bck",input_tind);
/* We remove the existing backup files, if any */
af_remove(backup_tfst);
af_remove(backup_tind);
af_rename(input_tfst,backup_tfst);
af_rename(input_tind,backup_tind);
af_rename(output_tfst,input_tfst);
af_rename(output_tind,input_tind);
u_printf("\nYou can find a backup of the original files in:\n    %s\nand %s\n",
         backup_tfst,backup_tind);

return SUCCESS_RETURN_CODE;
}
Exemple #11
0
/**
 * The same than main, but no call to setBufferMode.
 */
int main_BuildKrMwuDic(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return 0;
}


int val,index=-1;
char output[FILENAME_MAX]="";
char inflection_dir[FILENAME_MAX]="";
char alphabet[FILENAME_MAX]="";
char dic_bin[FILENAME_MAX]="";
char dic_inf[FILENAME_MAX]="";
Encoding encoding_output = DEFAULT_ENCODING_OUTPUT;
int bom_output = DEFAULT_BOM_OUTPUT;
int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT;
struct OptVars* vars=new_OptVars();
while (EOF!=(val=getopt_long_TS(argc,argv,optstring_BuildKrMwuDic,lopts_BuildKrMwuDic,&index,vars))) {
   switch(val) {
   case 'o': if (vars->optarg[0]=='\0') {
                fatal_error("You must specify a non empty output file name\n");
             }
             strcpy(output,vars->optarg);
             break;
   case 'd': if (vars->optarg[0]=='\0') {
                fatal_error("Empty inflection directory\n");
             }
             strcpy(inflection_dir,vars->optarg);
             break;
   case 'a': if (vars->optarg[0]=='\0') {
                fatal_error("You must specify a non empty alphabet file name\n");
             }
             strcpy(alphabet,vars->optarg);
             break;
   case 'b': if (vars->optarg[0]=='\0') {
                fatal_error("You must specify a non empty binary dictionary name\n");
             }
             strcpy(dic_bin,vars->optarg);
             remove_extension(dic_bin,dic_inf);
             strcat(dic_inf,".inf");
             break;
   case 'h': usage(); return 0;
   case ':': if (index==-1) fatal_error("Missing argument for option -%c\n",vars->optopt);
             else fatal_error("Missing argument for option --%s\n",lopts_BuildKrMwuDic[index].name);
   case '?': if (index==-1) fatal_error("Invalid option -%c\n",vars->optopt);
             else fatal_error("Invalid option --%s\n",vars->optarg);
             break;
   case 'k': if (vars->optarg[0]=='\0') {
                fatal_error("Empty input_encoding argument\n");
             }
             decode_reading_encoding_parameter(&mask_encoding_compatibility_input,vars->optarg);
             break;
   case 'q': if (vars->optarg[0]=='\0') {
                fatal_error("Empty output_encoding argument\n");
             }
             decode_writing_encoding_parameter(&encoding_output,&bom_output,vars->optarg);
             break;
   }
   index=-1;
}
if (vars->optind!=argc-1) {
   fatal_error("Invalid arguments: rerun with --help\n");
}
if (output[0]=='\0') {
   fatal_error("Output file must be specified\n");
}
if (inflection_dir[0]=='\0') {
   fatal_error("Inflection directory must be specified\n");
}
if (alphabet[0]=='\0') {
   fatal_error("Alphabet file must be specified\n");
}
if (dic_bin[0]=='\0') {
   fatal_error("Binary dictionary must be specified\n");
}

U_FILE* delas=u_fopen_existing_versatile_encoding(mask_encoding_compatibility_input,argv[vars->optind],U_READ);
if (delas==NULL) {
   fatal_error("Cannot open %s\n",argv[vars->optind]);
}
U_FILE* grf=u_fopen_existing_versatile_encoding(mask_encoding_compatibility_input,output,U_WRITE);
if (grf==NULL) {
   fatal_error("Cannot open %s\n",output);
}
Alphabet* alph=load_alphabet(alphabet,1);
if (alph==NULL) {
   fatal_error("Cannot open alphabet file %s\n",alphabet);
}
Korean* korean=new Korean(alph);
MultiFlex_ctx* multiFlex_ctx = (MultiFlex_ctx*)malloc(sizeof(MultiFlex_ctx));
if (multiFlex_ctx==NULL) {
   fatal_alloc_error("main_BuildKrMwuDic");
}
strcpy(multiFlex_ctx->inflection_directory,inflection_dir);
if (init_transducer_tree(multiFlex_ctx)) {
   fatal_error("init_transducer_tree error\n");
}
struct l_morpho_t* pL_MORPHO=init_langage_morph();
if (pL_MORPHO == NULL) {
   fatal_error("init_langage_morph error\n");
}

unsigned char* bin=load_BIN_file(dic_bin);
struct INF_codes* inf=load_INF_file(dic_inf);

create_mwu_dictionary(delas,grf,multiFlex_ctx,korean,pL_MORPHO,encoding_output,
       bom_output,mask_encoding_compatibility_input,bin,inf);

free(bin);
free_INF_codes(inf);
u_fclose(delas);
u_fclose(grf);
free_alphabet(alph);
delete korean;
free_transducer_tree(multiFlex_ctx);
for (int count_free_fst2=0;count_free_fst2<multiFlex_ctx->n_fst2;count_free_fst2++) {
    free_abstract_Fst2(multiFlex_ctx->fst2[count_free_fst2],&(multiFlex_ctx->fst2_free[count_free_fst2]));
    multiFlex_ctx->fst2[count_free_fst2]=NULL;
}
free_language_morpho(pL_MORPHO);
free(multiFlex_ctx);
free_OptVars(vars);
u_printf("Done.\n");
return 0;
}
/**
 * The same than main, but no call to setBufferMode.
 */
int main_BuildKrMwuDic(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

int val,index=-1;
char output[FILENAME_MAX]="";
char inflection_dir[FILENAME_MAX]="";
char alphabet[FILENAME_MAX]="";
char dic_bin[FILENAME_MAX]="";
char dic_inf[FILENAME_MAX]="";

// default policy is to compile only out of date graphs
GraphRecompilationPolicy graph_recompilation_policy = ONLY_OUT_OF_DATE;

VersatileEncodingConfig vec=VEC_DEFAULT;

bool only_verify_arguments = false;

UnitexGetOpt options;

while (EOF!=(val=options.parse_long(argc,argv,optstring_BuildKrMwuDic,lopts_BuildKrMwuDic,&index))) {
   switch(val) {
   case 'o': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty output file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(output,options.vars()->optarg);
             break;
   case 'd': if (options.vars()->optarg[0]=='\0') {
                error("Empty inflection directory\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(inflection_dir,options.vars()->optarg);
             break;
   case 'a': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty alphabet file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(alphabet,options.vars()->optarg);
             break;
   case 'b': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty binary dictionary name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(dic_bin,options.vars()->optarg);
             remove_extension(dic_bin,dic_inf);
             strcat(dic_inf,".inf");
             break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage(); 
             return SUCCESS_RETURN_CODE;
   case 'f': graph_recompilation_policy = ALWAYS_RECOMPILE; break;
   case 'n': graph_recompilation_policy = NEVER_RECOMPILE;  break;
   case 't': graph_recompilation_policy = ONLY_OUT_OF_DATE; break;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_BuildKrMwuDic[index].name);
             return USAGE_ERROR_CODE;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   }
   index=-1;
}
if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}
if (output[0]=='\0') {
   error("Output file must be specified\n");
   return USAGE_ERROR_CODE;
}
if (inflection_dir[0]=='\0') {
   error("Inflection directory must be specified\n");
   return USAGE_ERROR_CODE;
}
if (alphabet[0]=='\0') {
   error("Alphabet file must be specified\n");
   return USAGE_ERROR_CODE;
}
if (dic_bin[0]=='\0') {
   error("Binary dictionary must be specified\n");
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory 
  return SUCCESS_RETURN_CODE;
}

U_FILE* delas=u_fopen(&vec,argv[options.vars()->optind],U_READ);
if (delas==NULL) {
   error("Cannot open %s\n",argv[options.vars()->optind]);
   return DEFAULT_ERROR_CODE;
}

U_FILE* grf=u_fopen(&vec,output,U_WRITE);
if (grf==NULL) {
   error("Cannot open %s\n",output);
   u_fclose(delas);  
   return DEFAULT_ERROR_CODE;
}

Alphabet* alph=load_alphabet(&vec,alphabet,1);
if (alph==NULL) {
   u_fclose(grf);
   u_fclose(delas);
   error("Cannot open alphabet file %s\n",alphabet);
   return DEFAULT_ERROR_CODE;
}
Korean* korean=new Korean(alph);

MultiFlex_ctx* multiFlex_ctx=new_MultiFlex_ctx(inflection_dir,
                                               NULL,
                                               NULL,
                                               &vec,
                                               korean,
                                               NULL,
                                               NULL,
                                               graph_recompilation_policy);

Dictionary* d=new_Dictionary(&vec,dic_bin,dic_inf);

create_mwu_dictionary(delas,grf,multiFlex_ctx,d);

free_Dictionary(d);
u_fclose(delas);
u_fclose(grf);
free_alphabet(alph);
delete korean;
for (int count_free_fst2=0;count_free_fst2<multiFlex_ctx->n_fst2;count_free_fst2++) {
    free_abstract_Fst2(multiFlex_ctx->fst2[count_free_fst2],&(multiFlex_ctx->fst2_free[count_free_fst2]));
    multiFlex_ctx->fst2[count_free_fst2]=NULL;
}
free_MultiFlex_ctx(multiFlex_ctx);
u_printf("Done.\n");
return SUCCESS_RETURN_CODE;
}
/**
 * Returns 1 if the given .fst2 is OK to be used by the Locate program; 0 otherwise.
 * Conditions are:
 *
 * 1) no left recursion
 * 2) no loop that can recognize the empty word (<E> with an output or subgraph
 *    that can match the empty word).
 */
int OK_for_Locate_write_error(const VersatileEncodingConfig* vec,const char* name,char no_empty_graph_warning,U_FILE* ferr) {
int RESULT=1;
struct FST2_free_info fst2_free;
Fst2* fst2=load_abstract_fst2(vec,name,1,&fst2_free);
if (fst2==NULL) {
	fatal_error("Cannot load graph %s\n",name);
}
u_printf("Creating condition sets...\n");
GrfCheckInfo* chk=new_GrfCheckInfo(fst2);
/* Now, we look for a fix point in the condition graphs */
struct list_int* list=NULL;
/* To do that, we start by creating a list of all the graphs we are sure about */
int unknown=0;
for (int i=1;i<fst2->number_of_graphs+1;i++) {
	if (chk->graphs_matching_E[i]!=CHK_DONT_KNOW) {
		list=new_list_int(i,list);
	} else {
		unknown++;
	}
}
/* While there is something to do for E matching */
u_printf("Checking empty word matching...\n");
while (resolve_all_conditions(chk,&list,&unknown)) {}
if (chk->graphs_matching_E[1]==CHK_MATCHES_E) {
	if (!no_empty_graph_warning) {
       error("ERROR: the main graph %S recognizes <E>\n",fst2->graph_names[1]);
       if (ferr!=NULL) {
    	   u_fprintf(ferr,"ERROR: the main graph %S recognizes <E>\n",fst2->graph_names[1]);
	   }
	}
	goto evil_goto;
}
if (!no_empty_graph_warning) {
	for (int i=2;i<fst2->number_of_graphs+1;i++) {
		if (chk->graphs_matching_E[i]==CHK_MATCHES_E) {
			error("WARNING: the graph %S recognizes <E>\n",fst2->graph_names[i]);
			if (ferr!=NULL) {
				u_fprintf(ferr,"WARNING: the graph %S recognizes <E>\n",fst2->graph_names[i]);
			}
		}
	}
}
/* Now, we look for E loops and left recursions. And to do that, we need a new version
 * of the condition graphs, because a graph that does not match E would have been emptied.
 * And obviously, we can not deduce anything from an empty graph. */
rebuild_condition_graphs(chk);
u_printf("Checking E loops...\n");
if (is_any_E_loop(chk)) {
	/* Error messages have already been printed */
	goto evil_goto;
}
u_printf("Checking left recursions...\n");
if (is_any_left_recursion(chk)) {
	/* Error messages have already been printed */
	goto evil_goto;
}
evil_goto:
/* There may be something unused in the list that we need to free */
free_list_int(list);
free_GrfCheckInfo(chk);
free_abstract_Fst2(fst2,&fst2_free);
return RESULT;
}
Exemple #14
0
/**
 * Returns 1 if the given .fst2 is OK to be used by the Locate program; 0 otherwise. 
 * Conditions are:
 * 
 * 1) no left recursion
 * 2) no loop that can recognize the empty word (<E> with an output or subgraph
 *    that can match the empty word).
 */
int OK_for_Locate_write_error(const char* name,char no_empty_graph_warning,U_FILE* ferr) {
ConditionList* conditions;
ConditionList* conditions_for_state;
int i,j;
int ERROR=0;
struct FST2_free_info fst2_free;
Fst2* fst2=load_abstract_fst2(name,1,&fst2_free);
if (fst2==NULL) {
	fatal_error("Cannot load graph %s\n",name);
}
u_printf("Recursion detection started\n");
int* graphs_matching_E=(int*)malloc(sizeof(int)*(fst2->number_of_graphs+1));
conditions=(ConditionList*)malloc(sizeof(ConditionList)*(fst2->number_of_graphs+1));
if (graphs_matching_E==NULL || conditions==NULL) {
   fatal_alloc_error("OK_for_Locate");
}
for (i=0;i<fst2->number_of_graphs+1;i++) {
   graphs_matching_E[i]=0;
   conditions[i]=NULL;
}
/* First, we look for tags that match the empty word <E> */
for (i=0;i<fst2->number_of_tags;i++) {
   check_epsilon_tag(fst2->tags[i]);
}
/* Then, we look for graphs that match <E> with or without conditions */
for (i=1;i<=fst2->number_of_graphs;i++) {
   conditions_for_state=(ConditionList*)malloc(sizeof(ConditionList)*fst2->number_of_states_per_graphs[i]);
   if (conditions_for_state==NULL) {
      fatal_alloc_error("OK_for_Locate");
   }
   for (j=0;j<fst2->number_of_states_per_graphs[i];j++) {
      conditions_for_state[j]=NULL;
   }
   graphs_matching_E[i]=graph_matches_E(fst2->initial_states[i],fst2->initial_states[i],
  								      fst2->states,fst2->tags,i,fst2->graph_names,
  								      conditions_for_state,&conditions[i]);
   /* If any, we remove the temp conditions */                        
   if (conditions[i]!=NULL) free_ConditionList(conditions[i]);
   /* And we way that the conditions for the current graph are its initial
    * state's ones. */
   conditions[i]=conditions_for_state[0];
   /* Then we perform cleaning */
   conditions_for_state[0]=NULL;
   for (j=1;j<fst2->number_of_states_per_graphs[i];j++) {
      free_ConditionList(conditions_for_state[j]);
   }
   free(conditions_for_state); 
}
/* Then, we use all our condition lists to determine which graphs match <E>.
 * We iterate until we find a fixed point. If some conditions remain non null
 * after this loop, it means that there are <E> dependencies between graphs
 * and this case will be dealt with later. */
u_printf("Resolving <E> conditions\n");
while (resolve_conditions(conditions,fst2->number_of_graphs,
							fst2->states,fst2->initial_states,ferr)) {}
if (is_bit_mask_set(fst2->states[fst2->initial_states[1]]->control,UNCONDITIONAL_E_MATCH)) {
   /* If the main graph matches <E> */
   if (!no_empty_graph_warning) {
       error("ERROR: the main graph %S recognizes <E>\n",fst2->graph_names[1]);
       if (ferr != NULL)
         u_fprintf(ferr,"ERROR: the main graph %S recognizes <E>\n",fst2->graph_names[1]);
   }
   ERROR=1;
}
if (!ERROR) {
   for (i=1;i<fst2->number_of_graphs+1;i++) {
      if (is_bit_mask_set(fst2->states[fst2->initial_states[i]]->control,UNCONDITIONAL_E_MATCH)) {
         /* If the graph matches <E> */
         if (!no_empty_graph_warning) {
             error("WARNING: the graph %S recognizes <E>\n",fst2->graph_names[i]);
             if (ferr != NULL)
                u_fprintf(ferr,"WARNING: the graph %S recognizes <E>\n",fst2->graph_names[i]);
         }
      }
   }
}
clean_controls(fst2,graphs_matching_E);
if (!ERROR) {
   u_printf("Looking for <E> loops\n");
   for (i=1;!ERROR && i<fst2->number_of_graphs+1;i++) {
      ERROR=look_for_E_loops(i,fst2,graphs_matching_E,ferr);
   }
}
clean_controls(fst2,NULL);
if (!ERROR) {
   u_printf("Looking for infinite recursions\n");
   for (i=1;!ERROR && i<fst2->number_of_graphs+1;i++) {
      ERROR=look_for_recursion(i,NULL,fst2,graphs_matching_E,ferr);
   }
}
for (i=1;i<fst2->number_of_graphs+1;i++) {
   free_ConditionList(conditions[i]);
}
free_abstract_Fst2(fst2,&fst2_free);
u_printf("Recursion detection completed\n");
free(conditions);
free(graphs_matching_E);
if (ERROR) return LEFT_RECURSION;
return NO_LEFT_RECURSION;
}