/** * Returns 1 if the given .fst2 corresponds to a valid sentence automaton; 0 * otherwise. Following conditions must be true: * * 1) there must be only one graph * 2) it must be acyclic * 3) there must not be any <E> transition with an ouput * 4) <E> must the only tag without output * 5) all other tags must have an ouput of the form w x y z f g, with * w and y being integers >=0, and x, z, f and g being integers >=-1 */ int valid_sentence_automaton_write_error(const VersatileEncodingConfig* vec,const char* name,U_FILE*) { struct FST2_free_info fst2_free; Fst2* fst2=load_abstract_fst2(vec,name,0,&fst2_free); if (fst2==NULL) return 0; /* Condition 1 */ if (fst2->number_of_graphs!=1) { free_abstract_Fst2(fst2,&fst2_free); return 0; } /* Condition 2 */ if (!is_acyclic(fst2,1)) { free_abstract_Fst2(fst2,&fst2_free); return 0; } /* Conditions 3, 4 & 5 */ if (!valid_outputs(fst2)) { free_abstract_Fst2(fst2,&fst2_free); return 0; } /* Victory! */ return 1; }
// // this function constructs a token tree from a normalization grammar // tokens are represented by strings // struct normalization_tree* load_normalization_transducer_string(const VersatileEncodingConfig* vec,const char* name) { struct FST2_free_info fst2_free; Fst2* automate=load_abstract_fst2(vec,name,0,&fst2_free); if (automate==NULL) { // if the loading of the normalization transducer has failed, we return return NULL; } struct normalization_tree* root=new_normalization_tree(); unichar a[1]; a[0]='\0'; explorer_automate_normalization_string(automate,automate->initial_states[1],root,a); free_abstract_Fst2(automate,&fst2_free); return root; }
int main_fst2txt(struct fst2txt_parameters* p) { p->f_input=u_fopen_existing_versatile_encoding(p->mask_encoding_compatibility_input,p->text_file,U_READ); if (p->f_input==NULL) { error("Cannot open file %s\n",p->text_file); return 1; } p->text_buffer=new_buffer_for_file(UNICHAR_BUFFER,p->f_input,CAPACITY_LIMIT); p->buffer=p->text_buffer->unichar_buffer; p->f_output=u_fopen_creating_versatile_encoding(p->encoding_output,p->bom_output,p->temp_file,U_WRITE); if (p->f_output==NULL) { error("Cannot open temporary file %s\n",p->temp_file); u_fclose(p->f_input); return 1; } p->fst2=load_abstract_fst2(p->fst_file,1,NULL); if (p->fst2==NULL) { error("Cannot load grammar %s\n",p->fst_file); u_fclose(p->f_input); u_fclose(p->f_output); return 1; } if (p->alphabet_file!=NULL && p->alphabet_file[0]!='\0') { p->alphabet=load_alphabet(p->alphabet_file); if (p->alphabet==NULL) { error("Cannot load alphabet file %s\n",p->alphabet_file); u_fclose(p->f_input); u_fclose(p->f_output); free_abstract_Fst2(p->fst2,NULL); return 1; } } u_printf("Applying %s in %s mode...\n",p->fst_file,(p->output_policy==MERGE_OUTPUTS)?"merge":"replace"); build_state_token_trees(p); parse_text(p); u_fclose(p->f_input); u_fclose(p->f_output); af_remove(p->text_file); af_rename(p->temp_file,p->text_file); u_printf("Done.\n"); return 0; }
/** * This function constructs and returns a token tree from a normalization grammar. * Tokens are represented by integers. */ struct normalization_tree* load_normalization_fst2(const VersatileEncodingConfig* vec,const char* grammar, const Alphabet* alph,struct text_tokens* tok) { struct FST2_free_info fst2_free; Fst2* fst2=load_abstract_fst2(vec,grammar,0,&fst2_free); if (fst2==NULL) { return NULL; } struct string_hash* hash=new_string_hash(DONT_USE_VALUES); /* We create the token tree to speed up the consultation */ for (int i=0;i<tok->N;i++) { get_value_index(tok->token[i],hash); } struct normalization_tree* root=new_normalization_tree(); explore_normalization_fst2(fst2,fst2->initial_states[1],root,hash,U_EMPTY,alph,NULL); free_abstract_Fst2(fst2,&fst2_free); free_string_hash(hash); return root; }
/** * Frees the given structure */ void free_fst2txt_parameters(struct fst2txt_parameters* p) { if (p==NULL) return; free(p->text_file); free(p->temp_file); free(p->fst_file); free(p->alphabet_file); for (int i=0;i<p->n_token_trees;i++) { free_fst2txt_token_tree(p->token_tree[i]); } if (p->token_tree!=NULL) { free(p->token_tree); } free_Variables(p->variables); free_buffer(p->text_buffer); free_abstract_Fst2(p->fst2,NULL); free_alphabet(p->alphabet); free_stack_unichar(p->stack); free(p); }
int display_fst2_file_stat(const char* name,U_FILE*ferr) { struct FST2_free_info fst2_free; Fst2* fst2=load_abstract_fst2(name,1,&fst2_free); char name_without_path[FILENAME_MAX]; remove_path(name,name_without_path); if (fst2==NULL) { error("Cannot load graph %s\n",name); if (ferr != NULL) u_fprintf(ferr,"Cannot load graph %s\n",name); return 0; } u_printf("Statistics of graph %s: ",name_without_path); if (ferr != NULL) u_fprintf(ferr,"Statistics of graph %s: ",name_without_path); display_fst2_stat(fst2,ferr); free_abstract_Fst2(fst2,&fst2_free); return 1; }
int main_Flatten(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } int RTN=1; int depth=10; VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; char foo; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_Flatten,lopts_Flatten,&index))) { switch(val) { case 'f': RTN=0; break; case 'r': RTN=1; break; case 'd': if (1!=sscanf(options.vars()->optarg,"%d%c",&depth,&foo) || depth<=0) { /* foo is used to check that the depth is not like "45gjh" */ error("Invalid depth argument: %s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_Flatten[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; break; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } u_printf("Loading %s...\n",argv[options.vars()->optind]); struct FST2_free_info fst2_free; Fst2* origin=load_abstract_fst2(&vec,argv[options.vars()->optind],1,&fst2_free); if (origin==NULL) { error("Cannot load %s\n",argv[options.vars()->optind]); return DEFAULT_ERROR_CODE; } char temp[FILENAME_MAX]; strcpy(temp,argv[options.vars()->optind]); strcat(temp,".tmp.fst2"); switch (flatten_fst2(origin,depth,temp,&vec,RTN)) { case EQUIVALENT_FST: u_printf("The resulting grammar is an equivalent finite-state transducer.\n"); break; case APPROXIMATIVE_FST: u_printf("The resulting grammar is a finite-state approximation.\n"); break; case EQUIVALENT_RTN: u_printf("The resulting grammar is an equivalent FST2 (RTN).\n"); break; default: error("Internal state error in Flatten's main\n"); free_abstract_Fst2(origin,&fst2_free); return DEFAULT_ERROR_CODE; } free_abstract_Fst2(origin,&fst2_free); af_remove(argv[options.vars()->optind]); af_rename(temp,argv[options.vars()->optind]); return SUCCESS_RETURN_CODE; }
int main_MultiFlex(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } char output[FILENAME_MAX]=""; char config_dir[FILENAME_MAX]=""; char alphabet[FILENAME_MAX]=""; char pkgdir[FILENAME_MAX]=""; char* named=NULL; int is_korean=0; // default policy is to compile only out of date graphs GraphRecompilationPolicy graph_recompilation_policy = ONLY_OUT_OF_DATE; //Current language's alphabet int error_check_status=SIMPLE_AND_COMPOUND_WORDS; VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_MultiFlex,lopts_MultiFlex,&index))) { switch(val) { case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty DELAF file name\n"); free(named); return USAGE_ERROR_CODE; } strcpy(output,options.vars()->optarg); break; case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet file name\n"); free(named); return USAGE_ERROR_CODE; } strcpy(alphabet,options.vars()->optarg); break; case 'd': strcpy(config_dir,options.vars()->optarg); break; case 'K': is_korean=1; break; case 's': error_check_status=ONLY_SIMPLE_WORDS; break; case 'c': error_check_status=ONLY_COMPOUND_WORDS; break; case 'f': graph_recompilation_policy = ALWAYS_RECOMPILE; break; case 'n': graph_recompilation_policy = NEVER_RECOMPILE; break; case 't': graph_recompilation_policy = ONLY_OUT_OF_DATE; break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); free(named); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); free(named); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'p': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty package directory name\n"); free(named); return USAGE_ERROR_CODE; } strcpy(pkgdir,options.vars()->optarg); break; case 'r': if (named==NULL) { named=strdup(options.vars()->optarg); if (named==NULL) { alloc_error("main_Grf2Fst2"); return ALLOC_ERROR_CODE; } } else { char* more_names = (char*)realloc((void*)named,strlen(named)+strlen(options.vars()->optarg)+2); if (more_names) { named = more_names; } else { alloc_error("main_MultiFlex"); free(named); return ALLOC_ERROR_CODE; } strcat(named,";"); strcat(named,options.vars()->optarg); } break; case 'V': only_verify_arguments = true; break; case 'h': usage(); free(named); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_MultiFlex[index].name); free(named); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); free(named); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); free(named); return USAGE_ERROR_CODE; } if (output[0]=='\0') { error("You must specify the output DELAF name\n"); free(named); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory free(named); return SUCCESS_RETURN_CODE; } //Load morphology description char morphology[FILENAME_MAX]; new_file(config_dir,"Morphology.txt",morphology); //int config_files_status=CONFIG_FILES_OK; Alphabet* alph=NULL; if (alphabet[0]!='\0') { //Load alphabet alph=load_alphabet(&vec,alphabet,1); //To be done once at the beginning of the inflection if (alph==NULL) { error("Cannot open alphabet file %s\n",alphabet); free(named); return DEFAULT_ERROR_CODE; } } //Init equivalence files char equivalences[FILENAME_MAX]; new_file(config_dir,"Equivalences.txt",equivalences); /* Korean */ Korean* korean=NULL; if (is_korean) { if (alph==NULL) { error("Cannot initialize Korean data with a NULL alphabet\n"); free(named); return DEFAULT_ERROR_CODE; } korean=new Korean(alph); } MultiFlex_ctx* p_multiFlex_ctx=new_MultiFlex_ctx(config_dir, morphology, equivalences, &vec, korean, pkgdir, named, graph_recompilation_policy); //DELAC inflection int return_value = inflect(argv[options.vars()->optind],output,p_multiFlex_ctx,alph,error_check_status); free(named); for (int count_free_fst2=0;count_free_fst2<p_multiFlex_ctx->n_fst2;count_free_fst2++) { free_abstract_Fst2(p_multiFlex_ctx->fst2[count_free_fst2],&(p_multiFlex_ctx->fst2_free[count_free_fst2])); p_multiFlex_ctx->fst2[count_free_fst2] = NULL; } free_alphabet(alph); free_MultiFlex_ctx(p_multiFlex_ctx); if (korean!=NULL) { delete korean; } u_printf("Done.\n"); return return_value; }
int locate_pattern(const char* text_cod,const char* tokens,const char* fst2_name,const char* dlf,const char* dlc,const char* err, const char* alphabet,MatchPolicy match_policy,OutputPolicy output_policy, Encoding encoding_output,int bom_output,int mask_encoding_compatibility_input, const char* dynamicDir,TokenizationPolicy tokenization_policy, SpacePolicy space_policy,int search_limit,const char* morpho_dic_list, AmbiguousOutputPolicy ambiguous_output_policy, VariableErrorPolicy variable_error_policy,int protect_dic_chars, int is_korean,int max_count_call,int max_count_call_warning, char* arabic_rules,int tilde_negation_operator,int useLocateCache,int allow_trace) { U_FILE* out; U_FILE* info; struct locate_parameters* p=new_locate_parameters(); p->text_cod=af_open_mapfile(text_cod,MAPFILE_OPTION_READ,0); p->buffer=(int*)af_get_mapfile_pointer(p->text_cod); long text_size=(long)af_get_mapfile_size(p->text_cod)/sizeof(int); p->buffer_size=(int)text_size; p->tilde_negation_operator=tilde_negation_operator; p->useLocateCache=useLocateCache; if (max_count_call == -1) { max_count_call = (int)text_size; } if (max_count_call_warning == -1) { max_count_call_warning = (int)text_size; } p->match_policy=match_policy; p->tokenization_policy=tokenization_policy; p->space_policy=space_policy; p->output_policy=output_policy; p->search_limit=search_limit; p->ambiguous_output_policy=ambiguous_output_policy; p->variable_error_policy=variable_error_policy; p->protect_dic_chars=protect_dic_chars; p->mask_encoding_compatibility_input = mask_encoding_compatibility_input; p->max_count_call = max_count_call; p->max_count_call_warning = max_count_call_warning; p->token_filename = tokens; char concord[FILENAME_MAX]; char concord_info[FILENAME_MAX]; strcpy(concord,dynamicDir); strcat(concord,"concord.ind"); strcpy(concord_info,dynamicDir); strcat(concord_info,"concord.n"); char morpho_bin[FILENAME_MAX]; strcpy(morpho_bin,dynamicDir); strcat(morpho_bin,"morpho.bin"); if (arabic_rules!=NULL && arabic_rules[0]!='\0') { load_arabic_typo_rules(arabic_rules,&(p->arabic)); } out=u_fopen_versatile_encoding(encoding_output,bom_output,mask_encoding_compatibility_input,concord,U_WRITE); if (out==NULL) { error("Cannot write %s\n",concord); af_release_mapfile_pointer(p->text_cod,p->buffer); af_close_mapfile(p->text_cod); free_stack_unichar(p->stack); free_locate_parameters(p); u_fclose(out); return 0; } info=u_fopen_versatile_encoding(encoding_output,bom_output,mask_encoding_compatibility_input,concord_info,U_WRITE); if (info==NULL) { error("Cannot write %s\n",concord_info); } switch(output_policy) { case IGNORE_OUTPUTS: u_fprintf(out,"#I\n"); break; case MERGE_OUTPUTS: u_fprintf(out,"#M\n"); break; case REPLACE_OUTPUTS: u_fprintf(out,"#R\n"); break; } if (alphabet!=NULL && alphabet[0]!='\0') { u_printf("Loading alphabet...\n"); p->alphabet=load_alphabet(alphabet,is_korean); if (p->alphabet==NULL) { error("Cannot load alphabet file %s\n",alphabet); af_release_mapfile_pointer(p->text_cod,p->buffer); af_close_mapfile(p->text_cod); free_stack_unichar(p->stack); free_locate_parameters(p); if (info!=NULL) u_fclose(info); u_fclose(out); return 0; } } struct string_hash* semantic_codes=new_string_hash(); extract_semantic_codes(dlf,semantic_codes); extract_semantic_codes(dlc,semantic_codes); if (is_cancelling_requested() != 0) { error("user cancel request.\n"); free_alphabet(p->alphabet); free_string_hash(semantic_codes); af_release_mapfile_pointer(p->text_cod,p->buffer); af_close_mapfile(p->text_cod); free_stack_unichar(p->stack); free_locate_parameters(p); if (info!=NULL) u_fclose(info); u_fclose(out); return 0; } u_printf("Loading fst2...\n"); struct FST2_free_info fst2load_free; Fst2* fst2load=load_abstract_fst2(fst2_name,1,&fst2load_free); if (fst2load==NULL) { error("Cannot load grammar %s\n",fst2_name); free_alphabet(p->alphabet); free_string_hash(semantic_codes); af_release_mapfile_pointer(p->text_cod,p->buffer); af_close_mapfile(p->text_cod); free_stack_unichar(p->stack); free_locate_parameters(p); if (info!=NULL) u_fclose(info); u_fclose(out); return 0; } Abstract_allocator locate_abstract_allocator=create_abstract_allocator("locate_pattern",AllocatorCreationFlagAutoFreePrefered); p->fst2=new_Fst2_clone(fst2load,locate_abstract_allocator); free_abstract_Fst2(fst2load,&fst2load_free); if (is_cancelling_requested() != 0) { error("User cancel request..\n"); free_alphabet(p->alphabet); free_string_hash(semantic_codes); free_Fst2(p->fst2,locate_abstract_allocator); close_abstract_allocator(locate_abstract_allocator); af_release_mapfile_pointer(p->text_cod,p->buffer); af_close_mapfile(p->text_cod); free_stack_unichar(p->stack); free_locate_parameters(p); if (info!=NULL) u_fclose(info); u_fclose(out); return 0; } p->tags=p->fst2->tags; #ifdef TRE_WCHAR p->filters=new_FilterSet(p->fst2,p->alphabet); if (p->filters==NULL) { error("Cannot compile filter(s)\n"); free_alphabet(p->alphabet); free_string_hash(semantic_codes); free_Fst2(p->fst2,locate_abstract_allocator); close_abstract_allocator(locate_abstract_allocator); free_stack_unichar(p->stack); free_locate_parameters(p); af_release_mapfile_pointer(p->text_cod,p->buffer); af_close_mapfile(p->text_cod); if (info!=NULL) u_fclose(info); u_fclose(out); return 0; } #endif u_printf("Loading token list...\n"); int n_text_tokens=0; p->tokens=load_text_tokens_hash(tokens,mask_encoding_compatibility_input,&(p->SENTENCE),&(p->STOP),&n_text_tokens); if (p->tokens==NULL) { error("Cannot load token list %s\n",tokens); free_alphabet(p->alphabet); free_string_hash(semantic_codes); free_Fst2(p->fst2,locate_abstract_allocator); close_abstract_allocator(locate_abstract_allocator); free_locate_parameters(p); af_release_mapfile_pointer(p->text_cod,p->buffer); af_close_mapfile(p->text_cod); if (info!=NULL) u_fclose(info); u_fclose(out); return 0; } Abstract_allocator locate_work_abstract_allocator = locate_abstract_allocator; p->match_cache=(LocateCache*)malloc_cb(p->tokens->size * sizeof(LocateCache),locate_work_abstract_allocator); memset(p->match_cache,0,p->tokens->size * sizeof(LocateCache)); if (p->match_cache==NULL) { fatal_alloc_error("locate_pattern"); } #ifdef TRE_WCHAR p->filter_match_index=new_FilterMatchIndex(p->filters,p->tokens); if (p->filter_match_index==NULL) { error("Cannot optimize filter(s)\n"); free_alphabet(p->alphabet); free_string_hash(semantic_codes); free_string_hash(p->tokens); close_abstract_allocator(locate_abstract_allocator); free_locate_parameters(p); af_release_mapfile_pointer(p->text_cod,p->buffer); af_close_mapfile(p->text_cod); if (info!=NULL) u_fclose(info); u_fclose(out); return 0; } #endif if (allow_trace!=0) { open_locate_trace(p,&p->fnc_locate_trace_step,&p->private_param_locate_trace); } extract_semantic_codes_from_tokens(p->tokens,semantic_codes,locate_abstract_allocator); u_printf("Loading morphological dictionaries...\n"); load_morphological_dictionaries(morpho_dic_list,p,morpho_bin); extract_semantic_codes_from_morpho_dics(p->morpho_dic_inf,p->n_morpho_dics,semantic_codes,locate_abstract_allocator); p->token_control=(unsigned char*)malloc(n_text_tokens*sizeof(unsigned char)); if (p->token_control==NULL) { fatal_alloc_error("locate_pattern"); } p->matching_patterns=(struct bit_array**)malloc(n_text_tokens*sizeof(struct bit_array*)); if (p->matching_patterns==NULL) { fatal_alloc_error("locate_pattern"); } for (int i=0; i<n_text_tokens; i++) { p->token_control[i]=0; p->matching_patterns[i]=NULL; } compute_token_controls(p->alphabet,err,p); int number_of_patterns,is_DIC,is_CDIC,is_SDIC; p->pattern_tree_root=new_pattern_node(locate_abstract_allocator); u_printf("Computing fst2 tags...\n"); process_tags(&number_of_patterns,semantic_codes,&is_DIC,&is_CDIC,&is_SDIC,p,locate_abstract_allocator); p->current_compound_pattern=number_of_patterns; p->DLC_tree=new_DLC_tree(p->tokens->size); struct lemma_node* root=new_lemma_node(); u_printf("Loading dlf...\n"); load_dic_for_locate(dlf,mask_encoding_compatibility_input,p->alphabet,number_of_patterns,is_DIC,is_CDIC,root,p); u_printf("Loading dlc...\n"); load_dic_for_locate(dlc,mask_encoding_compatibility_input,p->alphabet,number_of_patterns,is_DIC,is_CDIC,root,p); /* We look if tag tokens like "{today,.ADV}" verify some patterns */ check_patterns_for_tag_tokens(p->alphabet,number_of_patterns,root,p,locate_abstract_allocator); u_printf("Optimizing fst2 pattern tags...\n"); optimize_pattern_tags(p->alphabet,root,p,locate_abstract_allocator); u_printf("Optimizing compound word dictionary...\n"); optimize_DLC(p->DLC_tree); free_string_hash(semantic_codes); int nb_input_variable=0; p->input_variables=new_Variables(p->fst2->input_variables,&nb_input_variable); p->output_variables=new_OutputVariables(p->fst2->output_variables,&p->nb_output_variables); Abstract_allocator locate_recycle_abstract_allocator=NULL; locate_recycle_abstract_allocator=create_abstract_allocator("locate_pattern_recycle", AllocatorFreeOnlyAtAllocatorDelete|AllocatorTipOftenRecycledObject, get_prefered_allocator_item_size_for_nb_variable(nb_input_variable)); u_printf("Optimizing fst2...\n"); p->optimized_states=build_optimized_fst2_states(p->input_variables,p->output_variables,p->fst2,locate_abstract_allocator); if (is_korean) { p->korean=new Korean(p->alphabet); p->jamo_tags=create_jamo_tags(p->korean,p->tokens); } p->failfast=new_bit_array(n_text_tokens,ONE_BIT); u_printf("Working...\n"); p->prv_alloc=locate_work_abstract_allocator; p->prv_alloc_recycle=locate_recycle_abstract_allocator; launch_locate(out,text_size,info,p); if (allow_trace!=0) { close_locate_trace(p,p->fnc_locate_trace_step,p->private_param_locate_trace); } free_bit_array(p->failfast); free_Variables(p->input_variables); free_OutputVariables(p->output_variables); af_release_mapfile_pointer(p->text_cod,p->buffer); af_close_mapfile(p->text_cod); if (info!=NULL) u_fclose(info); u_fclose(out); if (p->match_cache!=NULL) { for (int i=0; i<p->tokens->size; i++) { free_LocateCache(p->match_cache[i],locate_work_abstract_allocator); } free_cb(p->match_cache,locate_work_abstract_allocator); } int free_abstract_allocator_item=(get_allocator_cb_flag(locate_abstract_allocator) & AllocatorGetFlagAutoFreePresent) ? 0 : 1; if (free_abstract_allocator_item) { free_optimized_states(p->optimized_states,p->fst2->number_of_states,locate_abstract_allocator); } free_stack_unichar(p->stack); /** Too long to free the DLC tree if it is big * free_DLC_tree(p->DLC_tree); */ if (free_abstract_allocator_item) { free_pattern_node(p->pattern_tree_root,locate_abstract_allocator); free_Fst2(p->fst2,locate_abstract_allocator); free_list_int(p->tag_token_list,locate_abstract_allocator); } close_abstract_allocator(locate_abstract_allocator); close_abstract_allocator(locate_recycle_abstract_allocator); locate_recycle_abstract_allocator=locate_abstract_allocator=NULL; /* We don't free 'parameters->tags' because it was just a link on 'parameters->fst2->tags' */ free_alphabet(p->alphabet); if (p->korean!=NULL) { delete p->korean; } if (p->jamo_tags!=NULL) { /* jamo tags must be freed before tokens, because we need to know how * many jamo tags there are, and this number is the number of tokens */ for (int i=0; i<p->tokens->size; i++) { free(p->jamo_tags[i]); } free(p->jamo_tags); } free_string_hash(p->tokens); free_lemma_node(root); free(p->token_control); for (int i=0; i<n_text_tokens; i++) { free_bit_array(p->matching_patterns[i]); } free(p->matching_patterns); #ifdef TRE_WCHAR free_FilterSet(p->filters); free_FilterMatchIndex(p->filter_match_index); #endif for (int i=0; i<p->n_morpho_dics; i++) { free_abstract_INF(p->morpho_dic_inf[i],&(p->morpho_dic_inf_free[i])); free_abstract_BIN(p->morpho_dic_bin[i],&(p->morpho_dic_bin_free[i])); } free(p->morpho_dic_inf); free(p->morpho_dic_inf_free); free(p->morpho_dic_bin); free(p->morpho_dic_bin_free); #if (defined(UNITEX_LIBRARY) || defined(UNITEX_RELEASE_MEMORY_AT_EXIT)) free_DLC_tree(p->DLC_tree); #endif free_locate_parameters(p); u_printf("Done.\n"); return 1; }
int main_RebuildTfst(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } VersatileEncodingConfig vec=VEC_DEFAULT; int val, index=-1; bool only_verify_arguments = false; UnitexGetOpt options; int save_statistics=1; while (EOF!=(val=options.parse_long(argc,argv,optstring_RebuildTfst,lopts_RebuildTfst,&index))) { switch (val) { case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'S': save_statistics = 0; break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n", options.vars()->optopt) : error("Missing argument for option --%s\n", lopts_RebuildTfst[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n", options.vars()->optopt) : error("Invalid option --%s\n", options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } char input_tfst[FILENAME_MAX]; char input_tind[FILENAME_MAX]; strcpy(input_tfst,argv[options.vars()->optind]); remove_extension(input_tfst,input_tind); strcat(input_tind,".tind"); u_printf("Loading %s...\n",input_tfst); Tfst* tfst = open_text_automaton(&vec,input_tfst); if (tfst==NULL) { error("Unable to load %s automaton\n",input_tfst); return DEFAULT_ERROR_CODE; } char basedir[FILENAME_MAX]; get_path(input_tfst,basedir); char output_tfst[FILENAME_MAX]; sprintf(output_tfst, "%s.new.tfst",input_tfst); char output_tind[FILENAME_MAX]; sprintf(output_tind, "%s.new.tind",input_tfst); U_FILE* f_tfst; if ((f_tfst = u_fopen(&vec,output_tfst,U_WRITE)) == NULL) { error("Unable to open %s for writing\n", output_tfst); close_text_automaton(tfst); return DEFAULT_ERROR_CODE; } U_FILE* f_tind; if ((f_tind = u_fopen(BINARY,output_tind,U_WRITE)) == NULL) { u_fclose(f_tfst); close_text_automaton(tfst); error("Unable to open %s for writing\n", output_tind); return DEFAULT_ERROR_CODE; } /* We use this hash table to rebuild files tfst_tags_by_freq/alph.txt */ struct hash_table* form_frequencies=new_hash_table((HASH_FUNCTION)hash_unichar,(EQUAL_FUNCTION)u_equal, (FREE_FUNCTION)free,NULL,(KEYCOPY_FUNCTION)keycopy); u_fprintf(f_tfst,"%010d\n",tfst->N); for (int i = 1; i <= tfst->N; i++) { if ((i % 100) == 0) { u_printf("%d/%d sentences rebuilt...\n", i, tfst->N); } load_sentence(tfst,i); char grfname[FILENAME_MAX]; sprintf(grfname, "%ssentence%d.grf", basedir, i); unichar** tags=NULL; int n_tags=-1; if (fexists(grfname)) { /* If there is a .grf for the current sentence, then we must * take it into account */ if (0==pseudo_main_Grf2Fst2(&vec,grfname,0,NULL,1,1,NULL,NULL,0)) { /* We proceed only if the graph compilation was a success */ char fst2name[FILENAME_MAX]; sprintf(fst2name, "%ssentence%d.fst2", basedir, i); struct FST2_free_info fst2_free; Fst2* fst2=load_abstract_fst2(&vec,fst2name,0,&fst2_free); af_remove(fst2name); free_SingleGraph(tfst->automaton,NULL); tfst->automaton=create_copy_of_fst2_subgraph(fst2,1); tags=create_tfst_tags(fst2,&n_tags); free_abstract_Fst2(fst2,&fst2_free); } else { error("Error: %s is not a valid sentence automaton\n",grfname); } } save_current_sentence(tfst,f_tfst,f_tind,tags,n_tags,form_frequencies); if (tags!=NULL) { /* If necessary, we free the tags we created */ for (int count_tags=0;count_tags<n_tags;count_tags++) { free(tags[count_tags]); } free(tags); } } u_printf("Text automaton rebuilt.\n"); u_fclose(f_tind); u_fclose(f_tfst); close_text_automaton(tfst); /* Finally, we save statistics */ if (save_statistics) { char tfst_tags_by_freq[FILENAME_MAX]; char tfst_tags_by_alph[FILENAME_MAX]; strcpy(tfst_tags_by_freq, basedir); strcat(tfst_tags_by_freq, "tfst_tags_by_freq.txt"); strcpy(tfst_tags_by_alph, basedir); strcat(tfst_tags_by_alph, "tfst_tags_by_alph.txt"); U_FILE* f_tfst_tags_by_freq = u_fopen(&vec, tfst_tags_by_freq, U_WRITE); if (f_tfst_tags_by_freq == NULL) { error("Cannot open %s\n", tfst_tags_by_freq); } U_FILE* f_tfst_tags_by_alph = u_fopen(&vec, tfst_tags_by_alph, U_WRITE); if (f_tfst_tags_by_alph == NULL) { error("Cannot open %s\n", tfst_tags_by_alph); } sort_and_save_tfst_stats(form_frequencies, f_tfst_tags_by_freq, f_tfst_tags_by_alph); u_fclose(f_tfst_tags_by_freq); u_fclose(f_tfst_tags_by_alph); } free_hash_table(form_frequencies); /* make a backup and replace old automaton with new */ char backup_tfst[FILENAME_MAX]; char backup_tind[FILENAME_MAX]; sprintf(backup_tfst,"%s.bck",input_tfst); sprintf(backup_tind,"%s.bck",input_tind); /* We remove the existing backup files, if any */ af_remove(backup_tfst); af_remove(backup_tind); af_rename(input_tfst,backup_tfst); af_rename(input_tind,backup_tind); af_rename(output_tfst,input_tfst); af_rename(output_tind,input_tind); u_printf("\nYou can find a backup of the original files in:\n %s\nand %s\n", backup_tfst,backup_tind); return SUCCESS_RETURN_CODE; }
/** * The same than main, but no call to setBufferMode. */ int main_BuildKrMwuDic(int argc,char* const argv[]) { if (argc==1) { usage(); return 0; } int val,index=-1; char output[FILENAME_MAX]=""; char inflection_dir[FILENAME_MAX]=""; char alphabet[FILENAME_MAX]=""; char dic_bin[FILENAME_MAX]=""; char dic_inf[FILENAME_MAX]=""; Encoding encoding_output = DEFAULT_ENCODING_OUTPUT; int bom_output = DEFAULT_BOM_OUTPUT; int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT; struct OptVars* vars=new_OptVars(); while (EOF!=(val=getopt_long_TS(argc,argv,optstring_BuildKrMwuDic,lopts_BuildKrMwuDic,&index,vars))) { switch(val) { case 'o': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty output file name\n"); } strcpy(output,vars->optarg); break; case 'd': if (vars->optarg[0]=='\0') { fatal_error("Empty inflection directory\n"); } strcpy(inflection_dir,vars->optarg); break; case 'a': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty alphabet file name\n"); } strcpy(alphabet,vars->optarg); break; case 'b': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty binary dictionary name\n"); } strcpy(dic_bin,vars->optarg); remove_extension(dic_bin,dic_inf); strcat(dic_inf,".inf"); break; case 'h': usage(); return 0; case ':': if (index==-1) fatal_error("Missing argument for option -%c\n",vars->optopt); else fatal_error("Missing argument for option --%s\n",lopts_BuildKrMwuDic[index].name); case '?': if (index==-1) fatal_error("Invalid option -%c\n",vars->optopt); else fatal_error("Invalid option --%s\n",vars->optarg); break; case 'k': if (vars->optarg[0]=='\0') { fatal_error("Empty input_encoding argument\n"); } decode_reading_encoding_parameter(&mask_encoding_compatibility_input,vars->optarg); break; case 'q': if (vars->optarg[0]=='\0') { fatal_error("Empty output_encoding argument\n"); } decode_writing_encoding_parameter(&encoding_output,&bom_output,vars->optarg); break; } index=-1; } if (vars->optind!=argc-1) { fatal_error("Invalid arguments: rerun with --help\n"); } if (output[0]=='\0') { fatal_error("Output file must be specified\n"); } if (inflection_dir[0]=='\0') { fatal_error("Inflection directory must be specified\n"); } if (alphabet[0]=='\0') { fatal_error("Alphabet file must be specified\n"); } if (dic_bin[0]=='\0') { fatal_error("Binary dictionary must be specified\n"); } U_FILE* delas=u_fopen_existing_versatile_encoding(mask_encoding_compatibility_input,argv[vars->optind],U_READ); if (delas==NULL) { fatal_error("Cannot open %s\n",argv[vars->optind]); } U_FILE* grf=u_fopen_existing_versatile_encoding(mask_encoding_compatibility_input,output,U_WRITE); if (grf==NULL) { fatal_error("Cannot open %s\n",output); } Alphabet* alph=load_alphabet(alphabet,1); if (alph==NULL) { fatal_error("Cannot open alphabet file %s\n",alphabet); } Korean* korean=new Korean(alph); MultiFlex_ctx* multiFlex_ctx = (MultiFlex_ctx*)malloc(sizeof(MultiFlex_ctx)); if (multiFlex_ctx==NULL) { fatal_alloc_error("main_BuildKrMwuDic"); } strcpy(multiFlex_ctx->inflection_directory,inflection_dir); if (init_transducer_tree(multiFlex_ctx)) { fatal_error("init_transducer_tree error\n"); } struct l_morpho_t* pL_MORPHO=init_langage_morph(); if (pL_MORPHO == NULL) { fatal_error("init_langage_morph error\n"); } unsigned char* bin=load_BIN_file(dic_bin); struct INF_codes* inf=load_INF_file(dic_inf); create_mwu_dictionary(delas,grf,multiFlex_ctx,korean,pL_MORPHO,encoding_output, bom_output,mask_encoding_compatibility_input,bin,inf); free(bin); free_INF_codes(inf); u_fclose(delas); u_fclose(grf); free_alphabet(alph); delete korean; free_transducer_tree(multiFlex_ctx); for (int count_free_fst2=0;count_free_fst2<multiFlex_ctx->n_fst2;count_free_fst2++) { free_abstract_Fst2(multiFlex_ctx->fst2[count_free_fst2],&(multiFlex_ctx->fst2_free[count_free_fst2])); multiFlex_ctx->fst2[count_free_fst2]=NULL; } free_language_morpho(pL_MORPHO); free(multiFlex_ctx); free_OptVars(vars); u_printf("Done.\n"); return 0; }
/** * The same than main, but no call to setBufferMode. */ int main_BuildKrMwuDic(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } int val,index=-1; char output[FILENAME_MAX]=""; char inflection_dir[FILENAME_MAX]=""; char alphabet[FILENAME_MAX]=""; char dic_bin[FILENAME_MAX]=""; char dic_inf[FILENAME_MAX]=""; // default policy is to compile only out of date graphs GraphRecompilationPolicy graph_recompilation_policy = ONLY_OUT_OF_DATE; VersatileEncodingConfig vec=VEC_DEFAULT; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_BuildKrMwuDic,lopts_BuildKrMwuDic,&index))) { switch(val) { case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output file name\n"); return USAGE_ERROR_CODE; } strcpy(output,options.vars()->optarg); break; case 'd': if (options.vars()->optarg[0]=='\0') { error("Empty inflection directory\n"); return USAGE_ERROR_CODE; } strcpy(inflection_dir,options.vars()->optarg); break; case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet file name\n"); return USAGE_ERROR_CODE; } strcpy(alphabet,options.vars()->optarg); break; case 'b': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty binary dictionary name\n"); return USAGE_ERROR_CODE; } strcpy(dic_bin,options.vars()->optarg); remove_extension(dic_bin,dic_inf); strcat(dic_inf,".inf"); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case 'f': graph_recompilation_policy = ALWAYS_RECOMPILE; break; case 'n': graph_recompilation_policy = NEVER_RECOMPILE; break; case 't': graph_recompilation_policy = ONLY_OUT_OF_DATE; break; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_BuildKrMwuDic[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (output[0]=='\0') { error("Output file must be specified\n"); return USAGE_ERROR_CODE; } if (inflection_dir[0]=='\0') { error("Inflection directory must be specified\n"); return USAGE_ERROR_CODE; } if (alphabet[0]=='\0') { error("Alphabet file must be specified\n"); return USAGE_ERROR_CODE; } if (dic_bin[0]=='\0') { error("Binary dictionary must be specified\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } U_FILE* delas=u_fopen(&vec,argv[options.vars()->optind],U_READ); if (delas==NULL) { error("Cannot open %s\n",argv[options.vars()->optind]); return DEFAULT_ERROR_CODE; } U_FILE* grf=u_fopen(&vec,output,U_WRITE); if (grf==NULL) { error("Cannot open %s\n",output); u_fclose(delas); return DEFAULT_ERROR_CODE; } Alphabet* alph=load_alphabet(&vec,alphabet,1); if (alph==NULL) { u_fclose(grf); u_fclose(delas); error("Cannot open alphabet file %s\n",alphabet); return DEFAULT_ERROR_CODE; } Korean* korean=new Korean(alph); MultiFlex_ctx* multiFlex_ctx=new_MultiFlex_ctx(inflection_dir, NULL, NULL, &vec, korean, NULL, NULL, graph_recompilation_policy); Dictionary* d=new_Dictionary(&vec,dic_bin,dic_inf); create_mwu_dictionary(delas,grf,multiFlex_ctx,d); free_Dictionary(d); u_fclose(delas); u_fclose(grf); free_alphabet(alph); delete korean; for (int count_free_fst2=0;count_free_fst2<multiFlex_ctx->n_fst2;count_free_fst2++) { free_abstract_Fst2(multiFlex_ctx->fst2[count_free_fst2],&(multiFlex_ctx->fst2_free[count_free_fst2])); multiFlex_ctx->fst2[count_free_fst2]=NULL; } free_MultiFlex_ctx(multiFlex_ctx); u_printf("Done.\n"); return SUCCESS_RETURN_CODE; }
/** * Returns 1 if the given .fst2 is OK to be used by the Locate program; 0 otherwise. * Conditions are: * * 1) no left recursion * 2) no loop that can recognize the empty word (<E> with an output or subgraph * that can match the empty word). */ int OK_for_Locate_write_error(const VersatileEncodingConfig* vec,const char* name,char no_empty_graph_warning,U_FILE* ferr) { int RESULT=1; struct FST2_free_info fst2_free; Fst2* fst2=load_abstract_fst2(vec,name,1,&fst2_free); if (fst2==NULL) { fatal_error("Cannot load graph %s\n",name); } u_printf("Creating condition sets...\n"); GrfCheckInfo* chk=new_GrfCheckInfo(fst2); /* Now, we look for a fix point in the condition graphs */ struct list_int* list=NULL; /* To do that, we start by creating a list of all the graphs we are sure about */ int unknown=0; for (int i=1;i<fst2->number_of_graphs+1;i++) { if (chk->graphs_matching_E[i]!=CHK_DONT_KNOW) { list=new_list_int(i,list); } else { unknown++; } } /* While there is something to do for E matching */ u_printf("Checking empty word matching...\n"); while (resolve_all_conditions(chk,&list,&unknown)) {} if (chk->graphs_matching_E[1]==CHK_MATCHES_E) { if (!no_empty_graph_warning) { error("ERROR: the main graph %S recognizes <E>\n",fst2->graph_names[1]); if (ferr!=NULL) { u_fprintf(ferr,"ERROR: the main graph %S recognizes <E>\n",fst2->graph_names[1]); } } goto evil_goto; } if (!no_empty_graph_warning) { for (int i=2;i<fst2->number_of_graphs+1;i++) { if (chk->graphs_matching_E[i]==CHK_MATCHES_E) { error("WARNING: the graph %S recognizes <E>\n",fst2->graph_names[i]); if (ferr!=NULL) { u_fprintf(ferr,"WARNING: the graph %S recognizes <E>\n",fst2->graph_names[i]); } } } } /* Now, we look for E loops and left recursions. And to do that, we need a new version * of the condition graphs, because a graph that does not match E would have been emptied. * And obviously, we can not deduce anything from an empty graph. */ rebuild_condition_graphs(chk); u_printf("Checking E loops...\n"); if (is_any_E_loop(chk)) { /* Error messages have already been printed */ goto evil_goto; } u_printf("Checking left recursions...\n"); if (is_any_left_recursion(chk)) { /* Error messages have already been printed */ goto evil_goto; } evil_goto: /* There may be something unused in the list that we need to free */ free_list_int(list); free_GrfCheckInfo(chk); free_abstract_Fst2(fst2,&fst2_free); return RESULT; }
/** * Returns 1 if the given .fst2 is OK to be used by the Locate program; 0 otherwise. * Conditions are: * * 1) no left recursion * 2) no loop that can recognize the empty word (<E> with an output or subgraph * that can match the empty word). */ int OK_for_Locate_write_error(const char* name,char no_empty_graph_warning,U_FILE* ferr) { ConditionList* conditions; ConditionList* conditions_for_state; int i,j; int ERROR=0; struct FST2_free_info fst2_free; Fst2* fst2=load_abstract_fst2(name,1,&fst2_free); if (fst2==NULL) { fatal_error("Cannot load graph %s\n",name); } u_printf("Recursion detection started\n"); int* graphs_matching_E=(int*)malloc(sizeof(int)*(fst2->number_of_graphs+1)); conditions=(ConditionList*)malloc(sizeof(ConditionList)*(fst2->number_of_graphs+1)); if (graphs_matching_E==NULL || conditions==NULL) { fatal_alloc_error("OK_for_Locate"); } for (i=0;i<fst2->number_of_graphs+1;i++) { graphs_matching_E[i]=0; conditions[i]=NULL; } /* First, we look for tags that match the empty word <E> */ for (i=0;i<fst2->number_of_tags;i++) { check_epsilon_tag(fst2->tags[i]); } /* Then, we look for graphs that match <E> with or without conditions */ for (i=1;i<=fst2->number_of_graphs;i++) { conditions_for_state=(ConditionList*)malloc(sizeof(ConditionList)*fst2->number_of_states_per_graphs[i]); if (conditions_for_state==NULL) { fatal_alloc_error("OK_for_Locate"); } for (j=0;j<fst2->number_of_states_per_graphs[i];j++) { conditions_for_state[j]=NULL; } graphs_matching_E[i]=graph_matches_E(fst2->initial_states[i],fst2->initial_states[i], fst2->states,fst2->tags,i,fst2->graph_names, conditions_for_state,&conditions[i]); /* If any, we remove the temp conditions */ if (conditions[i]!=NULL) free_ConditionList(conditions[i]); /* And we way that the conditions for the current graph are its initial * state's ones. */ conditions[i]=conditions_for_state[0]; /* Then we perform cleaning */ conditions_for_state[0]=NULL; for (j=1;j<fst2->number_of_states_per_graphs[i];j++) { free_ConditionList(conditions_for_state[j]); } free(conditions_for_state); } /* Then, we use all our condition lists to determine which graphs match <E>. * We iterate until we find a fixed point. If some conditions remain non null * after this loop, it means that there are <E> dependencies between graphs * and this case will be dealt with later. */ u_printf("Resolving <E> conditions\n"); while (resolve_conditions(conditions,fst2->number_of_graphs, fst2->states,fst2->initial_states,ferr)) {} if (is_bit_mask_set(fst2->states[fst2->initial_states[1]]->control,UNCONDITIONAL_E_MATCH)) { /* If the main graph matches <E> */ if (!no_empty_graph_warning) { error("ERROR: the main graph %S recognizes <E>\n",fst2->graph_names[1]); if (ferr != NULL) u_fprintf(ferr,"ERROR: the main graph %S recognizes <E>\n",fst2->graph_names[1]); } ERROR=1; } if (!ERROR) { for (i=1;i<fst2->number_of_graphs+1;i++) { if (is_bit_mask_set(fst2->states[fst2->initial_states[i]]->control,UNCONDITIONAL_E_MATCH)) { /* If the graph matches <E> */ if (!no_empty_graph_warning) { error("WARNING: the graph %S recognizes <E>\n",fst2->graph_names[i]); if (ferr != NULL) u_fprintf(ferr,"WARNING: the graph %S recognizes <E>\n",fst2->graph_names[i]); } } } } clean_controls(fst2,graphs_matching_E); if (!ERROR) { u_printf("Looking for <E> loops\n"); for (i=1;!ERROR && i<fst2->number_of_graphs+1;i++) { ERROR=look_for_E_loops(i,fst2,graphs_matching_E,ferr); } } clean_controls(fst2,NULL); if (!ERROR) { u_printf("Looking for infinite recursions\n"); for (i=1;!ERROR && i<fst2->number_of_graphs+1;i++) { ERROR=look_for_recursion(i,NULL,fst2,graphs_matching_E,ferr); } } for (i=1;i<fst2->number_of_graphs+1;i++) { free_ConditionList(conditions[i]); } free_abstract_Fst2(fst2,&fst2_free); u_printf("Recursion detection completed\n"); free(conditions); free(graphs_matching_E); if (ERROR) return LEFT_RECURSION; return NO_LEFT_RECURSION; }