/** * Deals with the given transition, for full simplification, * i.e. for E-matching detection. */ static void deal_with_transition_v1(Fst2* fst2,Transition* t,SingleGraphState dst,int initial_state) { if (t->tag_number<=0) { /* For graphs and <E> we keep the transition as is, except for * the state number that we have to adjust */ add_outgoing_transition(dst,t->tag_number,t->state_number-initial_state); } else if (is_right_context_beginning(fst2,t->tag_number)) { /* Right contexts are a special case: we skip the whole context by * using an E transition that points to the end of the context */ int dst_state=get_end_of_context(fst2,t->state_number); if (dst_state!=-1) { add_outgoing_transition(dst,0,dst_state-initial_state); } else { /* If we cannot reach the end of the context, then this transition cannot * match anything anyway, so we can just ignore it */ } } else if (matches_E(fst2,t->tag_number)) { /* Tags like $*, variable tags, etc. can be considered * like E transitions, but they must be kept, because they * could be involved into an infinite E loop. However, we also have to * add a real E transition, so that the final state can be reached by the * regular E removal algorithm. */ add_outgoing_transition(dst,0,t->state_number-initial_state); add_outgoing_transition(dst,t->tag_number,t->state_number-initial_state); } else { /* If we have a transition does actually match something in the text, we just * ignore it */ } }
/** * Deals with the given transition, for light simplification, i.e. for * E loop/left recursion detection. */ static void deal_with_transition_v2(Fst2* fst2,Transition* t,SingleGraphState dst,int initial_state) { /* We always add the original transition */ add_outgoing_transition(dst,t->tag_number,t->state_number-initial_state); if (t->tag_number>0 && is_right_context_beginning(fst2,t->tag_number)) { /* Right contexts are a special case: we allow to skip the whole context by * adding an E transition that points to the end of the context */ int dst_state=get_end_of_context(fst2,t->state_number); if (dst_state!=-1) { add_outgoing_transition(dst,0,dst_state-initial_state); } else { /* If we cannot reach the end of the context, then this transition cannot * match anything anyway, so we can just ignore it */ } } }
/** * This function copies into 'aut_dest' the sub-automaton of 'aut_src' that * starts at the state #current_state and that ends at the state #z pointed * by transitions tagged with the given symbol. Note that these transitions are not copied. * The function returns z if such a state is found, ELAG_UNDEFINED otherwise. * The 'renumber' array is updated each time a new state is copied into 'aut_dest'. */ int get_sub_automaton(SingleGraph src,SingleGraph aut_dest,int current_state,SymbolType delim, int* renumber) { int f; int end=ELAG_UNDEFINED; for (Transition* t=src->states[current_state]->outgoing_transitions;t!=NULL;t=t->next) { symbol_t* symbol=t->label; if (symbol->type==delim) { /* If we have found a transition tagged by the delimitor */ if (end!=ELAG_UNDEFINED && end!=t->state_number) { /* For a given rule part, all the delimitors all supposed to * point on the same state */ fatal_error("get_sub_automaton: too much '<%c>' delimitors in rule\n",delim); } end=t->state_number; /* We set final the corresponding state in 'aut_dest' */ set_final_state(aut_dest->states[renumber[current_state]]); } else { /* If we have a normal transition, we just copy it */ if (renumber[t->state_number]==ELAG_UNDEFINED) { /* If we have to create a new state */ renumber[t->state_number]=aut_dest->number_of_states; add_state(aut_dest); SingleGraphState state=aut_dest->states[renumber[current_state]]; add_outgoing_transition(state,t->label,renumber[t->state_number]); /* We copy recursively this part of 'aut_src' that we don't yet know */ f=get_sub_automaton(src,aut_dest,t->state_number,delim,renumber); if (f!=ELAG_UNDEFINED) { if (end!=ELAG_UNDEFINED && f!=end) { fatal_error("get_sub_automaton: too much '<%c>' delimitors in rule\n",delim); } end=f; } } else { /* If the state already exists, we just add a transition, because * there is no need to explore again the state. */ SingleGraphState state=aut_dest->states[renumber[current_state]]; add_outgoing_transition(state,t->label,renumber[t->state_number]); } } } return end; }
/** * This function builds and returns an automaton for pattern * matching of the rule's context. */ Fst2Automaton* make_locate_automaton(elRule* rule,language_t* language) { Fst2Automaton* res=new_Fst2Automaton(NULL,-1); res->automaton=clone(rule->contexts[0].left,dup_symbol); /* We concatenate the left and right contexts */ elag_concat(language,res->automaton,rule->contexts[0].right); /* Then we add loops with ignorable POS on each state */ for (int i=0;i<language->POSs->size;i++) { POS_t* PoS=(POS_t*)language->POSs->value[i]; if (PoS->ignorable) { /* If we have a POS that can be ignored, we add a transition tagged * by this symbol to each state */ for (int q=1;q<res->automaton->number_of_states;q++) { symbol_t* s=new_symbol_POS(PoS,-1); add_outgoing_transition(res->automaton->states[q],s,q); free_symbol(s); } } } return res; }
/** * Creates a SingleGraph copy of the given .fst2 subgraph, using * the same tag numeration. */ SingleGraph create_copy_of_fst2_subgraph(Fst2* fst2,int n) { int n_states=fst2->number_of_states_per_graphs[n]; SingleGraph g=new_SingleGraph(n_states,INT_TAGS); int shift=fst2->initial_states[n]; for (int i=0;i<n_states;i++) { SingleGraphState dest=add_state(g); Fst2State src=fst2->states[i+shift]; if (is_initial_state(src)) { set_initial_state(dest); } if (is_final_state(src)) { set_final_state(dest); } Transition* t=src->transitions; while (t!=NULL) { add_outgoing_transition(dest,t->tag_number,t->state_number); t=t->next; } } return g; }
/** * Adds a transition to 'automaton'. */ void add_transition(SingleGraph automaton,struct string_hash_ptr* symbols,int from, symbol_t* label,int to) { if (label==SYMBOL_DEF) { if (automaton->states[from]->default_state!=-1) { fatal_error("add_transition: more than one default transition\n"); } automaton->states[from]->default_state=to; return; } while (label!=NULL) { if (label==SYMBOL_DEF) { fatal_error("add_transition: unexpected default transition\n"); } /* We build a string representation of the symbol to avoid * duplicates in the value array */ Ustring* u=new_Ustring(); symbol_to_str(label,u); int n=get_value_index(u->str,symbols,INSERT_IF_NEEDED,label); free_Ustring(u); add_outgoing_transition(automaton->states[from],n,to); label=label->next; } }