/** * Replaces the given automaton by its complement one. */ void elag_complementation(language_t* language,SingleGraph A) { int sink_state_index=A->number_of_states; SingleGraphState sink_state=add_state(A); /* The sink state is not final (because finalities will be reversed * below), and its default transition loops back on itself */ sink_state->default_state=sink_state_index; for (int q=0;q<A->number_of_states;q++) { /* We reverse the finality of each state */ if (is_final_state(A->states[q])) { unset_final_state(A->states[q]); } else { set_final_state(A->states[q]); } if (A->states[q]->default_state==-1) { /* If there is no default transition, we create one that is * tagged by anything but the non default ones */ symbol_t* s=LEXIC_minus_transitions(language,A->states[q]->outgoing_transitions); if (s!=NULL) { add_all_outgoing_transitions(A->states[q],s,sink_state_index); /* We have added a single transition tagged by a symbol list. Now * we replace it by a list of transitions, each one of them * tagged with a single symbol */ flatten_transition(A->states[q]->outgoing_transitions); /* Important to use free_symbols and not free_symbol, because * s represents a symbol list */ free_symbols(s); } } } }
/** * We create a copy of the given graph using the following rules if full_simplification is not null: * - <E> transitions and graph calls are kept * - all right contexts are ignored, replaced by an epsilon transition * - all tags that don't match anything in the text (like $* $< and $>) are kept, * because they can be involved into a E loop. We also add a real E transition. * - all other transitions that matches something from the text are removed * * As a consequence, the resulting graph is only made of real E transitions, * pseudo-E transitions, and graph calls and we can use it as follows: * - if no final state is accessible, it means that the graph cannot match E * - if the initial state is final, it means that the graph match E * - otherwise, we don't know yet * * * If full_simplification is null, we have to create a condition graph suitable for * E loop and left recursion detection. For that purpose, we keep the graph as is, * with only one modification: adding an E transition to skip right contexts. But still, * we keep the context, because we also have to look at it for E loops and left recursions. * */ static SingleGraph create_condition_graph(Fst2* fst2,int graph,int full_simplification) { SingleGraph g=new_SingleGraph(INT_TAGS); int initial_state=fst2->initial_states[graph]; int n_states=fst2->number_of_states_per_graphs[graph]; for (int i=initial_state;i<initial_state+n_states;i++) { SingleGraphState dst=add_state(g); Fst2State src=fst2->states[i]; if (is_initial_state(src)) { set_initial_state(dst); } if (is_final_state(src)) { set_final_state(dst); } Transition* t=src->transitions; while (t!=NULL) { if (full_simplification) { deal_with_transition_v1(fst2,t,dst,initial_state); } else { deal_with_transition_v2(fst2,t,dst,initial_state); } t=t->next; } } clean_condition_graph(g); return g; }
/** * This function concatenates B at the end of A. A is modified. */ void elag_concat(language_t* language,SingleGraph A,SingleGraph B) { int oldnb=A->number_of_states; int* renumber=(int*)malloc(B->number_of_states*sizeof(int)); if (renumber==NULL) { fatal_alloc_error("elag_concat"); } int q; /* We copy the states of B into A */ for (q=0;q<B->number_of_states;q++) { renumber[q]=A->number_of_states; add_state(A); } for (q=0;q<B->number_of_states;q++) { A->states[renumber[q]]->outgoing_transitions=clone_transition_list(B->states[q]->outgoing_transitions,renumber,dup_symbol); A->states[renumber[q]]->default_state=(B->states[q]->default_state!=-1)?renumber[B->states[q]->default_state]:-1; if (is_final_state(B->states[q])) { set_final_state(A->states[renumber[q]]); } } /* Then, we concatenate A and B. * 1) We replace default transitions that outgo from B's initial states * by explicit transitions */ struct list_int* initials=get_initial_states(B); for (struct list_int* tmp=initials;tmp!=NULL;tmp=tmp->next) { explicit_default_transition(language,A,renumber[tmp->n]); } for (q=0;q<oldnb;q++) { if (is_final_state(A->states[q])) { /* Each final state of A becomes non final. Moreover, we have * to explicit its default transition, because if not, the concatenation * algorithm will modify the recognized language. */ unset_final_state(A->states[q]); explicit_default_transition(language,A,q); for (struct list_int* tmp=initials;tmp!=NULL;tmp=tmp->next) { concat(&(A->states[q]->outgoing_transitions),clone_transition_list(A->states[renumber[tmp->n]]->outgoing_transitions,NULL,dup_symbol)); if (is_final_state(A->states[renumber[tmp->n]])) { set_final_state(A->states[q]); } } } } free(renumber); free_list_int(initials); }
/** * This function copies into 'aut_dest' the sub-automaton of 'aut_src' that * starts at the state #current_state and that ends at the state #z pointed * by transitions tagged with the given symbol. Note that these transitions are not copied. * The function returns z if such a state is found, ELAG_UNDEFINED otherwise. * The 'renumber' array is updated each time a new state is copied into 'aut_dest'. */ int get_sub_automaton(SingleGraph src,SingleGraph aut_dest,int current_state,SymbolType delim, int* renumber) { int f; int end=ELAG_UNDEFINED; for (Transition* t=src->states[current_state]->outgoing_transitions;t!=NULL;t=t->next) { symbol_t* symbol=t->label; if (symbol->type==delim) { /* If we have found a transition tagged by the delimitor */ if (end!=ELAG_UNDEFINED && end!=t->state_number) { /* For a given rule part, all the delimitors all supposed to * point on the same state */ fatal_error("get_sub_automaton: too much '<%c>' delimitors in rule\n",delim); } end=t->state_number; /* We set final the corresponding state in 'aut_dest' */ set_final_state(aut_dest->states[renumber[current_state]]); } else { /* If we have a normal transition, we just copy it */ if (renumber[t->state_number]==ELAG_UNDEFINED) { /* If we have to create a new state */ renumber[t->state_number]=aut_dest->number_of_states; add_state(aut_dest); SingleGraphState state=aut_dest->states[renumber[current_state]]; add_outgoing_transition(state,t->label,renumber[t->state_number]); /* We copy recursively this part of 'aut_src' that we don't yet know */ f=get_sub_automaton(src,aut_dest,t->state_number,delim,renumber); if (f!=ELAG_UNDEFINED) { if (end!=ELAG_UNDEFINED && f!=end) { fatal_error("get_sub_automaton: too much '<%c>' delimitors in rule\n",delim); } end=f; } } else { /* If the state already exists, we just add a transition, because * there is no need to explore again the state. */ SingleGraphState state=aut_dest->states[renumber[current_state]]; add_outgoing_transition(state,t->label,renumber[t->state_number]); } } } return end; }
/** * Creates a SingleGraph copy of the given .fst2 subgraph, using * the same tag numeration. */ SingleGraph create_copy_of_fst2_subgraph(Fst2* fst2,int n) { int n_states=fst2->number_of_states_per_graphs[n]; SingleGraph g=new_SingleGraph(n_states,INT_TAGS); int shift=fst2->initial_states[n]; for (int i=0;i<n_states;i++) { SingleGraphState dest=add_state(g); Fst2State src=fst2->states[i+shift]; if (is_initial_state(src)) { set_initial_state(dest); } if (is_final_state(src)) { set_final_state(dest); } Transition* t=src->transitions; while (t!=NULL) { add_outgoing_transition(dest,t->tag_number,t->state_number); t=t->next; } } return g; }
/** * Loads and returns an automaton from the given .fst2. * Returns NULL if there is no more automaton to load. */ Fst2Automaton* load_automaton(Elag_fst_file_in* fstf) { if (fstf->pos>=fstf->nb_automata) { return NULL; } Ustring* ustr=new_Ustring(); readline(ustr,fstf->f); const unichar* p=ustr->str; if (p[0]!='-') { fatal_error("load_automaton: %s: bad file format\n",fstf->name); } p++; int i=u_parse_int(p,&p); if (i!=fstf->pos+1) { /* We make sure that the automaton number is what it should be */ fatal_error("load_automaton: %s: parsing error with line '%S' ('-%d ...' expected)\n",fstf->name,ustr->str,fstf->pos+1); } /* Now p points on the automaton name */ p++; Fst2Automaton* A=new_Fst2Automaton(p); while (readline(ustr,fstf->f) && ustr->str[0]!='f') { /* If there is a state to read */ p=ustr->str; SingleGraphState state=add_state(A->automaton); if (*p=='t') { /* If necessary, we set the state final */ set_final_state(state); } /* We puts p on the first digit */ while (*p!='\0' && !u_is_digit(*p)) { p++; } while (*p!='\0') { /* If there is a transition to read */ int tag_number=u_parse_int(p,&p); if (fstf->renumber!=NULL) { tag_number=fstf->renumber[tag_number]; } while (*p==' ') { p++; } if (!u_is_digit(*p)) { fatal_error("load_automaton: %s: bad file format (line='%S')\n",fstf->name,ustr->str); } int state_number=u_parse_int(p,&p); symbol_t* tmp=(symbol_t*)fstf->symbols->value[tag_number]; if (tmp!=NULL) { /* If it is a good symbol (successfully loaded), we add transition(s) */ if (fstf->type!=FST_TEXT) { add_all_outgoing_transitions(state,tmp,state_number); } else { /* In a text automaton, we add one transition per element of * the symbol list. For instance, if we have: * * tmp = "{domestique,.N:fs}" => "{domestique,.N:ms}" => NULL * * then we add two transitions. */ add_all_outgoing_transitions(state,tmp,state_number); } } while (*p==' ') { p++; } } } if (*ustr->str=='\0') { fatal_error("load_automaton: unexpected end of file\n"); } if (A->automaton->number_of_states==0) { error("load_automaton: automaton with no state\n"); } else { set_initial_state(A->automaton->states[0]); } fstf->pos++; free_Ustring(ustr); return A; }