/** * This function builds and returns an automaton for pattern * matching of the rule's context. */ Fst2Automaton* make_locate_automaton(elRule* rule,language_t* language) { Fst2Automaton* res=new_Fst2Automaton(NULL,-1); res->automaton=clone(rule->contexts[0].left,dup_symbol); /* We concatenate the left and right contexts */ elag_concat(language,res->automaton,rule->contexts[0].right); /* Then we add loops with ignorable POS on each state */ for (int i=0;i<language->POSs->size;i++) { POS_t* PoS=(POS_t*)language->POSs->value[i]; if (PoS->ignorable) { /* If we have a POS that can be ignored, we add a transition tagged * by this symbol to each state */ for (int q=1;q<res->automaton->number_of_states;q++) { symbol_t* s=new_symbol_POS(PoS,-1); add_outgoing_transition(res->automaton->states[q],s,q); free_symbol(s); } } } return res; }
/** * This function takes an fst2 representing an Elag rule and returns * an automaton A so that the intersection of A and a sentence automaton * reject sequences that are not valid regarding this rule. */ Fst2Automaton* compile_elag_rule(elRule* rule,language_t* language) { u_printf("Compiling %s... (%d context%s)\n",rule->name,rule->nbContexts,(rule->nbContexts>1)?"s":""); /* Now, we will convert the automaton into the Elag format, i.e. with * transitions tagged with symbol_t* and not integers */ for (int c=0;c<rule->nbContexts;c++) { //convert_transitions_to_elag_ones(rule->contexts[c].left); elag_determinize(language,rule->contexts[c].left,free_symbol); trim(rule->contexts[c].left,free_symbol); //convert_transitions_to_elag_ones(rule->contexts[c].right); elag_determinize(language,rule->contexts[c].right,free_symbol); trim(rule->contexts[c].right,free_symbol); } /* We build A*.R1 */ prefix_with_everything(rule->contexts[0].left); //u_printf("------------- anything_R1 -------------\n"); //print_graph(rule->contexts[0].left); elag_determinize(language,rule->contexts[0].left,free_symbol); //print_graph(rule->contexts[0].left); elag_minimize(rule->contexts[0].left); SingleGraph anything_R1=rule->contexts[0].left; /* and R2.A* */ suffix_with_everything(rule->contexts[0].right); elag_determinize(language,rule->contexts[0].right,free_symbol); elag_minimize(rule->contexts[0].right); SingleGraph R2_anything=rule->contexts[0].right; /* We compute the number of constraint combinations */ int p=((rule->nbContexts-1)>=0) ? ((int)(1 << (rule->nbContexts-1))) : 0; /* We allocate the resulting automaton */ SingleGraph result=new_SingleGraph(PTR_TAGS); for (int ens=0;ens<p;ens++) { /* For each combination of constraints, we produce an automaton a1 * that does not match these constraints */ SingleGraph a1=combine_constraints(rule,ens,anything_R1,R2_anything,language); /* And we make the union of it with the current automaton */ build_union(result,a1); elag_determinize(language,result,free_symbol); elag_minimize(result); } /* Finally, we take the complement of the automaton that rejects wrong paths. * This new automaton recognizes correct paths, and so, the application of the * Elag rule will consists of intersecting this automaton with the sentence ones. */ //u_printf("------------- DUMP -------------\n"); //print_graph(result); elag_complementation(language,result); //u_printf("------------- AFTER COMPL -------------\n"); //print_graph(result); trim(result,free_symbol); if (result->number_of_states==0) { error("Grammar %s forbids everything\n",rule->name); } u_printf("Grammar %s compiled (%d states)\n",rule->name,result->number_of_states); Fst2Automaton* Result=new_Fst2Automaton(rule->automaton->name,-1); Result->automaton=result; return Result; }
/** * Loads and returns an automaton from the given .fst2. * Returns NULL if there is no more automaton to load. */ Fst2Automaton* load_automaton(Elag_fst_file_in* fstf) { if (fstf->pos>=fstf->nb_automata) { return NULL; } Ustring* ustr=new_Ustring(); readline(ustr,fstf->f); const unichar* p=ustr->str; if (p[0]!='-') { fatal_error("load_automaton: %s: bad file format\n",fstf->name); } p++; int i=u_parse_int(p,&p); if (i!=fstf->pos+1) { /* We make sure that the automaton number is what it should be */ fatal_error("load_automaton: %s: parsing error with line '%S' ('-%d ...' expected)\n",fstf->name,ustr->str,fstf->pos+1); } /* Now p points on the automaton name */ p++; Fst2Automaton* A=new_Fst2Automaton(p); while (readline(ustr,fstf->f) && ustr->str[0]!='f') { /* If there is a state to read */ p=ustr->str; SingleGraphState state=add_state(A->automaton); if (*p=='t') { /* If necessary, we set the state final */ set_final_state(state); } /* We puts p on the first digit */ while (*p!='\0' && !u_is_digit(*p)) { p++; } while (*p!='\0') { /* If there is a transition to read */ int tag_number=u_parse_int(p,&p); if (fstf->renumber!=NULL) { tag_number=fstf->renumber[tag_number]; } while (*p==' ') { p++; } if (!u_is_digit(*p)) { fatal_error("load_automaton: %s: bad file format (line='%S')\n",fstf->name,ustr->str); } int state_number=u_parse_int(p,&p); symbol_t* tmp=(symbol_t*)fstf->symbols->value[tag_number]; if (tmp!=NULL) { /* If it is a good symbol (successfully loaded), we add transition(s) */ if (fstf->type!=FST_TEXT) { add_all_outgoing_transitions(state,tmp,state_number); } else { /* In a text automaton, we add one transition per element of * the symbol list. For instance, if we have: * * tmp = "{domestique,.N:fs}" => "{domestique,.N:ms}" => NULL * * then we add two transitions. */ add_all_outgoing_transitions(state,tmp,state_number); } } while (*p==' ') { p++; } } } if (*ustr->str=='\0') { fatal_error("load_automaton: unexpected end of file\n"); } if (A->automaton->number_of_states==0) { error("load_automaton: automaton with no state\n"); } else { set_initial_state(A->automaton->states[0]); } fstf->pos++; free_Ustring(ustr); return A; }