/** * We create a copy of the given graph using the following rules if full_simplification is not null: * - <E> transitions and graph calls are kept * - all right contexts are ignored, replaced by an epsilon transition * - all tags that don't match anything in the text (like $* $< and $>) are kept, * because they can be involved into a E loop. We also add a real E transition. * - all other transitions that matches something from the text are removed * * As a consequence, the resulting graph is only made of real E transitions, * pseudo-E transitions, and graph calls and we can use it as follows: * - if no final state is accessible, it means that the graph cannot match E * - if the initial state is final, it means that the graph match E * - otherwise, we don't know yet * * * If full_simplification is null, we have to create a condition graph suitable for * E loop and left recursion detection. For that purpose, we keep the graph as is, * with only one modification: adding an E transition to skip right contexts. But still, * we keep the context, because we also have to look at it for E loops and left recursions. * */ static SingleGraph create_condition_graph(Fst2* fst2,int graph,int full_simplification) { SingleGraph g=new_SingleGraph(INT_TAGS); int initial_state=fst2->initial_states[graph]; int n_states=fst2->number_of_states_per_graphs[graph]; for (int i=initial_state;i<initial_state+n_states;i++) { SingleGraphState dst=add_state(g); Fst2State src=fst2->states[i]; if (is_initial_state(src)) { set_initial_state(dst); } if (is_final_state(src)) { set_final_state(dst); } Transition* t=src->transitions; while (t!=NULL) { if (full_simplification) { deal_with_transition_v1(fst2,t,dst,initial_state); } else { deal_with_transition_v2(fst2,t,dst,initial_state); } t=t->next; } } clean_condition_graph(g); return g; }
/** * Allocates, initializes and return a new .fst2 automaton. If size<0, * the automaton field is set to NULL. */ Fst2Automaton* new_Fst2Automaton(const unichar* name,int size) { Fst2Automaton* aut=(Fst2Automaton*)malloc(sizeof(Fst2Automaton)); if (aut==NULL) { fatal_alloc_error("new_Fst2Automaton"); } aut->name=u_strdup(name); if (size>=0) { aut->automaton=new_SingleGraph(size,PTR_TAGS); } else { aut->automaton=NULL; } return aut; }
/** * Creates a SingleGraph copy of the given .fst2 subgraph, using * the same tag numeration. */ SingleGraph create_copy_of_fst2_subgraph(Fst2* fst2,int n) { int n_states=fst2->number_of_states_per_graphs[n]; SingleGraph g=new_SingleGraph(n_states,INT_TAGS); int shift=fst2->initial_states[n]; for (int i=0;i<n_states;i++) { SingleGraphState dest=add_state(g); Fst2State src=fst2->states[i+shift]; if (is_initial_state(src)) { set_initial_state(dest); } if (is_final_state(src)) { set_final_state(dest); } Transition* t=src->transitions; while (t!=NULL) { add_outgoing_transition(dest,t->tag_number,t->state_number); t=t->next; } } return g; }
/** * This function analyzes the given Elag rule automaton to find * where the rule and constraint parts are. As a side effect, it builds * a fst2 grammar ("foo.fst2" => "foo-conc.fst2") that can be used by * the Locate program to match the <!> .... <!> .... <!> part of the rule. */ void split_elag_rule(elRule* rule, const VersatileEncodingConfig* vec,language_t* language) { int c; /* This array contains the numbers of the states that are pointed to by * middle '<=>' of the constraints */ int constraints[ELAG_MAX_CONSTRAINTS]; int nbConstraints=count_constraints(rule->automaton,constraints); /* +1 because we have to count the <!> .... <!> .... <!> part of the rule */ rule->nbContexts=nbConstraints+1; rule->contexts=(elContext*)malloc(rule->nbContexts*sizeof(elContext)); if (rule->contexts==NULL) { fatal_alloc_error("split_elag_rule"); } for (c=0;c<rule->nbContexts;c++) { rule->contexts[c].left=NULL; rule->contexts[c].right=NULL; } int endR1=ELAG_UNDEFINED; int endR2=ELAG_UNDEFINED; int endC2=ELAG_UNDEFINED; for (Transition* t=rule->automaton->automaton->states[0]->outgoing_transitions;t!=NULL;t=t->next) { symbol_t* symbol=t->label; switch (symbol->type) { /* We split the unique <!> .... <!> .... <!> part */ case S_EXCLAM: if (rule->contexts[0].left!=NULL) { fatal_error("Too much '<!>' tags\n",rule->name); } rule->contexts[0].left=new_SingleGraph(PTR_TAGS); /* We look for the end of the first part of the rule */ endR1=get_sub_automaton(rule->automaton->automaton,rule->contexts[0].left,t->state_number,0,S_EXCLAM); rule->contexts[0].right=new_SingleGraph(PTR_TAGS); endR2=get_sub_automaton(rule->automaton->automaton,rule->contexts[0].right,endR1,0,S_EXCLAM); if (endR1==ELAG_UNDEFINED || endR2==ELAG_UNDEFINED || !is_final_state(rule->automaton->automaton->states[endR2])) { fatal_error("split_elag_rule: %s: parse error in <!> part\n",rule->name); } break; /* We split the nbConstraints <=> .... <=> .... <=> parts */ case S_EQUAL: if (rule->contexts[1].left!=NULL) { fatal_error("Non deterministic .fst2 file\n"); } for (c=0;c<nbConstraints;c++) { rule->contexts[c+1].left=new_SingleGraph(PTR_TAGS); get_sub_automaton(rule->automaton->automaton,rule->contexts[c+1].left,t->state_number,1,constraints[c]); rule->contexts[c+1].right=new_SingleGraph(PTR_TAGS); endC2=get_sub_automaton(rule->automaton->automaton,rule->contexts[c+1].right,constraints[c],0,S_EQUAL); if (endC2==ELAG_UNDEFINED || !is_final_state(rule->automaton->automaton->states[endC2])) { fatal_error("split_elag_rule: %s: parse error in <=> part\n",rule->name); } } break; default: fatal_error("Left delimitor '<!>' or '<=>' missing\n"); } } if (rule->contexts[0].left==NULL) { fatal_error("In grammar '%s': symbol '<!>' not found.\n",rule->name); } char buf[FILENAME_MAX]; remove_extension(rule->name,buf); strcat(buf,"-conc.fst2"); /* We create the.fst2 to be used by Locate */ Fst2Automaton* locate=make_locate_automaton(rule,language); save_automaton(locate,buf,vec,FST_LOCATE); free_Fst2Automaton(locate,free_symbol); }
/** * This function takes an fst2 representing an Elag rule and returns * an automaton A so that the intersection of A and a sentence automaton * reject sequences that are not valid regarding this rule. */ Fst2Automaton* compile_elag_rule(elRule* rule,language_t* language) { u_printf("Compiling %s... (%d context%s)\n",rule->name,rule->nbContexts,(rule->nbContexts>1)?"s":""); /* Now, we will convert the automaton into the Elag format, i.e. with * transitions tagged with symbol_t* and not integers */ for (int c=0;c<rule->nbContexts;c++) { //convert_transitions_to_elag_ones(rule->contexts[c].left); elag_determinize(language,rule->contexts[c].left,free_symbol); trim(rule->contexts[c].left,free_symbol); //convert_transitions_to_elag_ones(rule->contexts[c].right); elag_determinize(language,rule->contexts[c].right,free_symbol); trim(rule->contexts[c].right,free_symbol); } /* We build A*.R1 */ prefix_with_everything(rule->contexts[0].left); //u_printf("------------- anything_R1 -------------\n"); //print_graph(rule->contexts[0].left); elag_determinize(language,rule->contexts[0].left,free_symbol); //print_graph(rule->contexts[0].left); elag_minimize(rule->contexts[0].left); SingleGraph anything_R1=rule->contexts[0].left; /* and R2.A* */ suffix_with_everything(rule->contexts[0].right); elag_determinize(language,rule->contexts[0].right,free_symbol); elag_minimize(rule->contexts[0].right); SingleGraph R2_anything=rule->contexts[0].right; /* We compute the number of constraint combinations */ int p=((rule->nbContexts-1)>=0) ? ((int)(1 << (rule->nbContexts-1))) : 0; /* We allocate the resulting automaton */ SingleGraph result=new_SingleGraph(PTR_TAGS); for (int ens=0;ens<p;ens++) { /* For each combination of constraints, we produce an automaton a1 * that does not match these constraints */ SingleGraph a1=combine_constraints(rule,ens,anything_R1,R2_anything,language); /* And we make the union of it with the current automaton */ build_union(result,a1); elag_determinize(language,result,free_symbol); elag_minimize(result); } /* Finally, we take the complement of the automaton that rejects wrong paths. * This new automaton recognizes correct paths, and so, the application of the * Elag rule will consists of intersecting this automaton with the sentence ones. */ //u_printf("------------- DUMP -------------\n"); //print_graph(result); elag_complementation(language,result); //u_printf("------------- AFTER COMPL -------------\n"); //print_graph(result); trim(result,free_symbol); if (result->number_of_states==0) { error("Grammar %s forbids everything\n",rule->name); } u_printf("Grammar %s compiled (%d states)\n",rule->name,result->number_of_states); Fst2Automaton* Result=new_Fst2Automaton(rule->automaton->name,-1); Result->automaton=result; return Result; }
/** * This function minimizes the given automaton. Note * that it must be deterministic. For more information, * see comments in this library's .h file. */ void elag_minimize(SingleGraph automaton,int level) { struct list_int* initials=get_initial_states(automaton); if (initials==NULL) { /* No initial state should mean 'empty automaton' */ if (automaton->number_of_states!=0) { /* If not, we fail */ fatal_error("No initial state in non empty automaton in elag_minimize\n"); } return; } if (initials->next!=NULL) { fatal_error("Non-deterministic automaton in elag_minimize\n"); } free_list_int(initials); if (level>0) { /* If necessary, we remove transitions that are included in the * default ones */ compact_default_transitions(automaton); } SymbolAlphabet* alph=build_symbol_alphabet(automaton); TransitionCollection** transitions=build_transition_collections(automaton,alph); /* Now that we have numbered transitions, we don't need the symbol * alphabet anymore */ free_SymbolAlphabet(alph); int nbColors; int nbShades; int* color=(int*)calloc(automaton->number_of_states,sizeof(int)); if (color==NULL) { fatal_alloc_error("elag_minimize"); } int* shade=init_colors(automaton,&nbShades); do { int s; /* We copy the shades into the color array */ for (s=0;s<automaton->number_of_states;s++) { color[s]=shade[s]; } nbColors=nbShades; nbShades=0; /* We update the colors of the transitions' destination states */ update_colors(transitions,color,automaton->number_of_states); /* Now, for each state #s, we look for its shade, comparing it with * all the states #i so that i<s */ for (s=0;s<automaton->number_of_states;s++) { shade[s]=get_shade(s,transitions,color,shade,&nbShades); } /* We stop when no more shades have been introduced */ } while (nbColors!=nbShades); int* chosen=choose_states(color,nbColors,automaton->number_of_states); for (int i=0;i<automaton->number_of_states;i++) { free_TransitionCollection(transitions[i]); } free(transitions); free(shade); /* We allocate the resulting automaton */ SingleGraph result=new_SingleGraph(nbColors,PTR_TAGS); for (int c=0;c<nbColors;c++) { SingleGraphState state=add_state(result); SingleGraphState original=automaton->states[chosen[c]]; /* We set the initiality and finality of the state */ state->control=original->control; state->outgoing_transitions=original->outgoing_transitions; original->outgoing_transitions=NULL; /* We renumber the transitions' destination states */ for (Transition* t1=state->outgoing_transitions;t1!=NULL;t1=t1->next) { t1->state_number=color[t1->state_number]; } state->default_state=original->default_state; } /* Now we have to replace the old automaton by the new one */ move_SingleGraph(automaton,&result,free_symbol); /* And we don't need these arrays anymore */ free(color); free(chosen); }