/** * Frees all the memory associated to the given rule, except its * automaton. */ void free_elRule(elRule* rule) { if (rule==NULL) return; if (rule->name!=NULL) free(rule->name); for (int i=0;i<rule->nbContexts;i++) { free_SingleGraph(rule->contexts[i].right,free_symbol); free_SingleGraph(rule->contexts[i].left,free_symbol); } free(rule->contexts); free_Fst2Automaton(rule->automaton,free_symbol); free(rule); }
/** * Note that we don't free the fst2 field, since we did not create it. */ static void free_GrfCheckInfo(GrfCheckInfo* info) { if (info==NULL) { return; } free(info->graphs_matching_E); if (info->condition_graphs!=NULL) { for (int i=1;i<info->fst2->number_of_graphs+1;i++) { free_SingleGraph(info->condition_graphs[i],NULL); } free(info->condition_graphs); } free(info); }
/** * This function reads a file that contains a list of Elag grammar names, * and it compiles them into the file 'outname'. However, if the result * automaton is too big, it will be saved in several automata inside * the output file. */ int compile_elag_rules(char* rulesname,char* outname, const VersatileEncodingConfig* vec,language_t* language) { u_printf("Compilation of %s\n",rulesname); U_FILE* f=NULL; U_FILE* frules=u_fopen(ASCII,rulesname,U_READ); if (frules==NULL) { fatal_error("Cannot open file '%s'\n",rulesname); } U_FILE* out=u_fopen(ASCII,outname,U_WRITE); if (out==NULL) { fatal_error("cannot open file '%s'\n",outname); } /* Name of the file that contains the result automaton */ char fstoutname[FILENAME_MAX]; int nbRules=0; char buf[FILENAME_MAX]; time_t start_time=time(0); Fst2Automaton* res=NULL; Fst2Automaton* A; int fst_number=0; Ustring* ustr=new_Ustring(); char buf2[FILENAME_MAX]; char directory[FILENAME_MAX]; get_path(rulesname,directory); while (af_fgets(buf,FILENAME_MAX,frules->f)) { /* We read one by one the Elag grammar names in the .lst file */ chomp(buf); if (*buf=='\0') { /* If we have an empty line */ continue; } if (!is_absolute_path(buf)) { strcpy(buf2,buf); sprintf(buf,"%s%s",directory,buf2); } u_printf("\n%s...\n",buf); remove_extension(buf); strcat(buf,".elg"); if ((f=u_fopen(ASCII,buf,U_READ))==NULL) { /* If the .elg file doesn't exist, we create one */ remove_extension(buf); u_printf("Precompiling %s.fst2\n",buf); strcat(buf,".fst2"); elRule* rule=new_elRule(buf,vec,language); if (rule==NULL) { fatal_error("Unable to read grammar '%s'\n",buf); } if ((A=compile_elag_rule(rule,language))==NULL) { fatal_error("Unable to compile rule '%s'\n",buf); } free_elRule(rule); } else { /* If there is already .elg, we use it */ u_fclose(f); A=load_elag_grammar_automaton(vec,buf,language); if (A==NULL) { fatal_error("Unable to load '%s'\n",buf); } } if (A->automaton->number_of_states==0) { error("Grammar %s forbids everything!\n",buf); } if (res!=NULL) { /* If there is already an automaton, we intersect it with the new one */ SingleGraph tmp=res->automaton; res->automaton=elag_intersection(language,tmp,A->automaton,GRAMMAR_GRAMMAR); free_SingleGraph(tmp,NULL); free_Fst2Automaton(A,NULL); trim(res->automaton,NULL); } else { res=A; } nbRules++; if (res->automaton->number_of_states>MAX_GRAM_SIZE) { /* If the automaton is too large, we will split the grammar * into several automata */ elag_minimize(res->automaton,1); sprintf(fstoutname,"%s-%d.elg",outname,fst_number++); u_fprintf(out,"<%s>\n",fstoutname); u_printf("Splitting big grammar in '%s' (%d states)\n",fstoutname,res->automaton->number_of_states); u_sprintf(ustr,"%s: compiled elag grammar",fstoutname); free(res->name); res->name=u_strdup(ustr->str); save_automaton(res,fstoutname,vec,FST_GRAMMAR); free_Fst2Automaton(res,NULL); res=NULL; } } if (res!=NULL) { /* We save the last automaton, if any */ sprintf(fstoutname,"%s-%d.elg",outname,fst_number++); u_fprintf(out,"<%s>\n",fstoutname); u_printf("Saving grammar in '%s'(%d states)\n",fstoutname,res->automaton->number_of_states); elag_minimize(res->automaton,1); u_sprintf(ustr,"%s: compiled elag grammar",fstoutname); free(res->name); res->name=u_strdup(ustr->str); save_automaton(res,fstoutname,vec,FST_GRAMMAR); free_Fst2Automaton(res,free_symbol); } time_t end_time=time(0); u_fclose(frules); u_fclose(out); free_Ustring(ustr); u_printf("\nDone.\nElapsed time: %.0f s\n",difftime(end_time,start_time)); u_printf("\n%d rule%s from %s compiled in %s (%d automat%s)\n", nbRules,(nbRules>1)?"s":"",rulesname,outname,fst_number, (fst_number>1)?"a":"on"); return 0; }
int main_RebuildTfst(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } VersatileEncodingConfig vec=VEC_DEFAULT; int val, index=-1; bool only_verify_arguments = false; UnitexGetOpt options; int save_statistics=1; while (EOF!=(val=options.parse_long(argc,argv,optstring_RebuildTfst,lopts_RebuildTfst,&index))) { switch (val) { case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'S': save_statistics = 0; break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n", options.vars()->optopt) : error("Missing argument for option --%s\n", lopts_RebuildTfst[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n", options.vars()->optopt) : error("Invalid option --%s\n", options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } char input_tfst[FILENAME_MAX]; char input_tind[FILENAME_MAX]; strcpy(input_tfst,argv[options.vars()->optind]); remove_extension(input_tfst,input_tind); strcat(input_tind,".tind"); u_printf("Loading %s...\n",input_tfst); Tfst* tfst = open_text_automaton(&vec,input_tfst); if (tfst==NULL) { error("Unable to load %s automaton\n",input_tfst); return DEFAULT_ERROR_CODE; } char basedir[FILENAME_MAX]; get_path(input_tfst,basedir); char output_tfst[FILENAME_MAX]; sprintf(output_tfst, "%s.new.tfst",input_tfst); char output_tind[FILENAME_MAX]; sprintf(output_tind, "%s.new.tind",input_tfst); U_FILE* f_tfst; if ((f_tfst = u_fopen(&vec,output_tfst,U_WRITE)) == NULL) { error("Unable to open %s for writing\n", output_tfst); close_text_automaton(tfst); return DEFAULT_ERROR_CODE; } U_FILE* f_tind; if ((f_tind = u_fopen(BINARY,output_tind,U_WRITE)) == NULL) { u_fclose(f_tfst); close_text_automaton(tfst); error("Unable to open %s for writing\n", output_tind); return DEFAULT_ERROR_CODE; } /* We use this hash table to rebuild files tfst_tags_by_freq/alph.txt */ struct hash_table* form_frequencies=new_hash_table((HASH_FUNCTION)hash_unichar,(EQUAL_FUNCTION)u_equal, (FREE_FUNCTION)free,NULL,(KEYCOPY_FUNCTION)keycopy); u_fprintf(f_tfst,"%010d\n",tfst->N); for (int i = 1; i <= tfst->N; i++) { if ((i % 100) == 0) { u_printf("%d/%d sentences rebuilt...\n", i, tfst->N); } load_sentence(tfst,i); char grfname[FILENAME_MAX]; sprintf(grfname, "%ssentence%d.grf", basedir, i); unichar** tags=NULL; int n_tags=-1; if (fexists(grfname)) { /* If there is a .grf for the current sentence, then we must * take it into account */ if (0==pseudo_main_Grf2Fst2(&vec,grfname,0,NULL,1,1,NULL,NULL,0)) { /* We proceed only if the graph compilation was a success */ char fst2name[FILENAME_MAX]; sprintf(fst2name, "%ssentence%d.fst2", basedir, i); struct FST2_free_info fst2_free; Fst2* fst2=load_abstract_fst2(&vec,fst2name,0,&fst2_free); af_remove(fst2name); free_SingleGraph(tfst->automaton,NULL); tfst->automaton=create_copy_of_fst2_subgraph(fst2,1); tags=create_tfst_tags(fst2,&n_tags); free_abstract_Fst2(fst2,&fst2_free); } else { error("Error: %s is not a valid sentence automaton\n",grfname); } } save_current_sentence(tfst,f_tfst,f_tind,tags,n_tags,form_frequencies); if (tags!=NULL) { /* If necessary, we free the tags we created */ for (int count_tags=0;count_tags<n_tags;count_tags++) { free(tags[count_tags]); } free(tags); } } u_printf("Text automaton rebuilt.\n"); u_fclose(f_tind); u_fclose(f_tfst); close_text_automaton(tfst); /* Finally, we save statistics */ if (save_statistics) { char tfst_tags_by_freq[FILENAME_MAX]; char tfst_tags_by_alph[FILENAME_MAX]; strcpy(tfst_tags_by_freq, basedir); strcat(tfst_tags_by_freq, "tfst_tags_by_freq.txt"); strcpy(tfst_tags_by_alph, basedir); strcat(tfst_tags_by_alph, "tfst_tags_by_alph.txt"); U_FILE* f_tfst_tags_by_freq = u_fopen(&vec, tfst_tags_by_freq, U_WRITE); if (f_tfst_tags_by_freq == NULL) { error("Cannot open %s\n", tfst_tags_by_freq); } U_FILE* f_tfst_tags_by_alph = u_fopen(&vec, tfst_tags_by_alph, U_WRITE); if (f_tfst_tags_by_alph == NULL) { error("Cannot open %s\n", tfst_tags_by_alph); } sort_and_save_tfst_stats(form_frequencies, f_tfst_tags_by_freq, f_tfst_tags_by_alph); u_fclose(f_tfst_tags_by_freq); u_fclose(f_tfst_tags_by_alph); } free_hash_table(form_frequencies); /* make a backup and replace old automaton with new */ char backup_tfst[FILENAME_MAX]; char backup_tind[FILENAME_MAX]; sprintf(backup_tfst,"%s.bck",input_tfst); sprintf(backup_tind,"%s.bck",input_tind); /* We remove the existing backup files, if any */ af_remove(backup_tfst); af_remove(backup_tind); af_rename(input_tfst,backup_tfst); af_rename(input_tind,backup_tind); af_rename(output_tfst,input_tfst); af_rename(output_tind,input_tind); u_printf("\nYou can find a backup of the original files in:\n %s\nand %s\n", backup_tfst,backup_tind); return SUCCESS_RETURN_CODE; }
static void rebuild_condition_graphs(GrfCheckInfo* chk) { for (int i=1;i<chk->fst2->number_of_graphs+1;i++) { free_SingleGraph(chk->condition_graphs[i],NULL); chk->condition_graphs[i]=create_condition_graph(chk->fst2,i,0); } }
/** * Frees all the memory associated to the given automaton, except * the symbols. */ void free_Fst2Automaton(Fst2Automaton* A,void (*free_elag_symbol)(symbol_t*)) { if (A==NULL) return; if (A->name!=NULL) free(A->name); free_SingleGraph(A->automaton,free_elag_symbol); free(A); }