/** * Frees all the memory associated to the given rule, except its * automaton. */ void free_elRule(elRule* rule) { if (rule==NULL) return; if (rule->name!=NULL) free(rule->name); for (int i=0;i<rule->nbContexts;i++) { free_SingleGraph(rule->contexts[i].right,free_symbol); free_SingleGraph(rule->contexts[i].left,free_symbol); } free(rule->contexts); free_Fst2Automaton(rule->automaton,free_symbol); free(rule); }
/** * Compiles the given .fst2 grammar into the given .elg file. * Returns 0 in case of success; -1 otherwise. */ int compile_elag_grammar(char* grammar,char* elg_file, const VersatileEncodingConfig* vec, language_t* language) { elRule* rule=new_elRule(grammar,vec,language); if (rule==NULL) { error("Unable to read grammar '%s'\n",grammar); return -1; } Fst2Automaton* A=compile_elag_rule(rule,language); if (A==NULL) { fatal_error("Unable to compile rule '%s'\n",grammar); } free_elRule(rule); save_automaton(A,elg_file,vec,FST_GRAMMAR); free_Fst2Automaton(A,free_symbol); return 0; }
/** * This function analyzes the given Elag rule automaton to find * where the rule and constraint parts are. As a side effect, it builds * a fst2 grammar ("foo.fst2" => "foo-conc.fst2") that can be used by * the Locate program to match the <!> .... <!> .... <!> part of the rule. */ void split_elag_rule(elRule* rule, const VersatileEncodingConfig* vec,language_t* language) { int c; /* This array contains the numbers of the states that are pointed to by * middle '<=>' of the constraints */ int constraints[ELAG_MAX_CONSTRAINTS]; int nbConstraints=count_constraints(rule->automaton,constraints); /* +1 because we have to count the <!> .... <!> .... <!> part of the rule */ rule->nbContexts=nbConstraints+1; rule->contexts=(elContext*)malloc(rule->nbContexts*sizeof(elContext)); if (rule->contexts==NULL) { fatal_alloc_error("split_elag_rule"); } for (c=0;c<rule->nbContexts;c++) { rule->contexts[c].left=NULL; rule->contexts[c].right=NULL; } int endR1=ELAG_UNDEFINED; int endR2=ELAG_UNDEFINED; int endC2=ELAG_UNDEFINED; for (Transition* t=rule->automaton->automaton->states[0]->outgoing_transitions;t!=NULL;t=t->next) { symbol_t* symbol=t->label; switch (symbol->type) { /* We split the unique <!> .... <!> .... <!> part */ case S_EXCLAM: if (rule->contexts[0].left!=NULL) { fatal_error("Too much '<!>' tags\n",rule->name); } rule->contexts[0].left=new_SingleGraph(PTR_TAGS); /* We look for the end of the first part of the rule */ endR1=get_sub_automaton(rule->automaton->automaton,rule->contexts[0].left,t->state_number,0,S_EXCLAM); rule->contexts[0].right=new_SingleGraph(PTR_TAGS); endR2=get_sub_automaton(rule->automaton->automaton,rule->contexts[0].right,endR1,0,S_EXCLAM); if (endR1==ELAG_UNDEFINED || endR2==ELAG_UNDEFINED || !is_final_state(rule->automaton->automaton->states[endR2])) { fatal_error("split_elag_rule: %s: parse error in <!> part\n",rule->name); } break; /* We split the nbConstraints <=> .... <=> .... <=> parts */ case S_EQUAL: if (rule->contexts[1].left!=NULL) { fatal_error("Non deterministic .fst2 file\n"); } for (c=0;c<nbConstraints;c++) { rule->contexts[c+1].left=new_SingleGraph(PTR_TAGS); get_sub_automaton(rule->automaton->automaton,rule->contexts[c+1].left,t->state_number,1,constraints[c]); rule->contexts[c+1].right=new_SingleGraph(PTR_TAGS); endC2=get_sub_automaton(rule->automaton->automaton,rule->contexts[c+1].right,constraints[c],0,S_EQUAL); if (endC2==ELAG_UNDEFINED || !is_final_state(rule->automaton->automaton->states[endC2])) { fatal_error("split_elag_rule: %s: parse error in <=> part\n",rule->name); } } break; default: fatal_error("Left delimitor '<!>' or '<=>' missing\n"); } } if (rule->contexts[0].left==NULL) { fatal_error("In grammar '%s': symbol '<!>' not found.\n",rule->name); } char buf[FILENAME_MAX]; remove_extension(rule->name,buf); strcat(buf,"-conc.fst2"); /* We create the.fst2 to be used by Locate */ Fst2Automaton* locate=make_locate_automaton(rule,language); save_automaton(locate,buf,vec,FST_LOCATE); free_Fst2Automaton(locate,free_symbol); }
/** * This function reads a file that contains a list of Elag grammar names, * and it compiles them into the file 'outname'. However, if the result * automaton is too big, it will be saved in several automata inside * the output file. */ int compile_elag_rules(char* rulesname,char* outname, const VersatileEncodingConfig* vec,language_t* language) { u_printf("Compilation of %s\n",rulesname); U_FILE* f=NULL; U_FILE* frules=u_fopen(ASCII,rulesname,U_READ); if (frules==NULL) { fatal_error("Cannot open file '%s'\n",rulesname); } U_FILE* out=u_fopen(ASCII,outname,U_WRITE); if (out==NULL) { fatal_error("cannot open file '%s'\n",outname); } /* Name of the file that contains the result automaton */ char fstoutname[FILENAME_MAX]; int nbRules=0; char buf[FILENAME_MAX]; time_t start_time=time(0); Fst2Automaton* res=NULL; Fst2Automaton* A; int fst_number=0; Ustring* ustr=new_Ustring(); char buf2[FILENAME_MAX]; char directory[FILENAME_MAX]; get_path(rulesname,directory); while (af_fgets(buf,FILENAME_MAX,frules->f)) { /* We read one by one the Elag grammar names in the .lst file */ chomp(buf); if (*buf=='\0') { /* If we have an empty line */ continue; } if (!is_absolute_path(buf)) { strcpy(buf2,buf); sprintf(buf,"%s%s",directory,buf2); } u_printf("\n%s...\n",buf); remove_extension(buf); strcat(buf,".elg"); if ((f=u_fopen(ASCII,buf,U_READ))==NULL) { /* If the .elg file doesn't exist, we create one */ remove_extension(buf); u_printf("Precompiling %s.fst2\n",buf); strcat(buf,".fst2"); elRule* rule=new_elRule(buf,vec,language); if (rule==NULL) { fatal_error("Unable to read grammar '%s'\n",buf); } if ((A=compile_elag_rule(rule,language))==NULL) { fatal_error("Unable to compile rule '%s'\n",buf); } free_elRule(rule); } else { /* If there is already .elg, we use it */ u_fclose(f); A=load_elag_grammar_automaton(vec,buf,language); if (A==NULL) { fatal_error("Unable to load '%s'\n",buf); } } if (A->automaton->number_of_states==0) { error("Grammar %s forbids everything!\n",buf); } if (res!=NULL) { /* If there is already an automaton, we intersect it with the new one */ SingleGraph tmp=res->automaton; res->automaton=elag_intersection(language,tmp,A->automaton,GRAMMAR_GRAMMAR); free_SingleGraph(tmp,NULL); free_Fst2Automaton(A,NULL); trim(res->automaton,NULL); } else { res=A; } nbRules++; if (res->automaton->number_of_states>MAX_GRAM_SIZE) { /* If the automaton is too large, we will split the grammar * into several automata */ elag_minimize(res->automaton,1); sprintf(fstoutname,"%s-%d.elg",outname,fst_number++); u_fprintf(out,"<%s>\n",fstoutname); u_printf("Splitting big grammar in '%s' (%d states)\n",fstoutname,res->automaton->number_of_states); u_sprintf(ustr,"%s: compiled elag grammar",fstoutname); free(res->name); res->name=u_strdup(ustr->str); save_automaton(res,fstoutname,vec,FST_GRAMMAR); free_Fst2Automaton(res,NULL); res=NULL; } } if (res!=NULL) { /* We save the last automaton, if any */ sprintf(fstoutname,"%s-%d.elg",outname,fst_number++); u_fprintf(out,"<%s>\n",fstoutname); u_printf("Saving grammar in '%s'(%d states)\n",fstoutname,res->automaton->number_of_states); elag_minimize(res->automaton,1); u_sprintf(ustr,"%s: compiled elag grammar",fstoutname); free(res->name); res->name=u_strdup(ustr->str); save_automaton(res,fstoutname,vec,FST_GRAMMAR); free_Fst2Automaton(res,free_symbol); } time_t end_time=time(0); u_fclose(frules); u_fclose(out); free_Ustring(ustr); u_printf("\nDone.\nElapsed time: %.0f s\n",difftime(end_time,start_time)); u_printf("\n%d rule%s from %s compiled in %s (%d automat%s)\n", nbRules,(nbRules>1)?"s":"",rulesname,outname,fst_number, (fst_number>1)?"a":"on"); return 0; }
void free_Fst2Automaton_including_symbols(Fst2Automaton* A) { free_Fst2Automaton(A,free_symbol); }
void free_Fst2Automaton_excluding_symbols(Fst2Automaton* A) { free_Fst2Automaton(A,NULL); }