コード例 #1
0
/**
 * We create a copy of the given graph using the following rules if full_simplification is not null:
 * - <E> transitions and graph calls are kept
 * - all right contexts are ignored, replaced by an epsilon transition
 * - all tags that don't match anything in the text (like $* $< and $>) are kept,
 *   because they can be involved into a E loop. We also add a real E transition.
 * - all other transitions that matches something from the text are removed
 *
 * As a consequence, the resulting graph is only made of real E transitions,
 * pseudo-E transitions, and graph calls and we can use it as follows:
 * - if no final state is accessible, it means that the graph cannot match E
 * - if the initial state is final, it means that the graph match E
 * - otherwise, we don't know yet
 *
 *
 * If full_simplification is null, we have to create a condition graph suitable for
 * E loop and left recursion detection. For that purpose, we keep the graph as is,
 * with only one modification: adding an E transition to skip right contexts. But still,
 * we keep the context, because we also have to look at it for E loops and left recursions.
 *
 */
static SingleGraph create_condition_graph(Fst2* fst2,int graph,int full_simplification) {
SingleGraph g=new_SingleGraph(INT_TAGS);
int initial_state=fst2->initial_states[graph];
int n_states=fst2->number_of_states_per_graphs[graph];
for (int i=initial_state;i<initial_state+n_states;i++) {
	SingleGraphState dst=add_state(g);
	Fst2State src=fst2->states[i];
	if (is_initial_state(src)) {
		set_initial_state(dst);
	}
	if (is_final_state(src)) {
		set_final_state(dst);
	}
	Transition* t=src->transitions;
	while (t!=NULL) {
		if (full_simplification) {
			deal_with_transition_v1(fst2,t,dst,initial_state);
		} else {
			deal_with_transition_v2(fst2,t,dst,initial_state);
		}
		t=t->next;
	}
}
clean_condition_graph(g);
return g;
}
コード例 #2
0
ファイル: Fst2Automaton.cpp プロジェクト: adri87/Q-A
/**
 * Allocates, initializes and return a new .fst2 automaton. If size<0,
 * the automaton field is set to NULL.
 */
Fst2Automaton* new_Fst2Automaton(const unichar* name,int size) {
Fst2Automaton* aut=(Fst2Automaton*)malloc(sizeof(Fst2Automaton));
if (aut==NULL) {
   fatal_alloc_error("new_Fst2Automaton");
}
aut->name=u_strdup(name);
if (size>=0) {
   aut->automaton=new_SingleGraph(size,PTR_TAGS);
} else {
   aut->automaton=NULL;
}
return aut;
}
コード例 #3
0
/**
 * Creates a SingleGraph copy of the given .fst2 subgraph, using
 * the same tag numeration.
 */
SingleGraph create_copy_of_fst2_subgraph(Fst2* fst2,int n) {
int n_states=fst2->number_of_states_per_graphs[n];
SingleGraph g=new_SingleGraph(n_states,INT_TAGS);
int shift=fst2->initial_states[n];
for (int i=0;i<n_states;i++) {
   SingleGraphState dest=add_state(g);
   Fst2State src=fst2->states[i+shift];
   if (is_initial_state(src)) {
      set_initial_state(dest);
   }
   if (is_final_state(src)) {
      set_final_state(dest);
   }
   Transition* t=src->transitions;
   while (t!=NULL) {
      add_outgoing_transition(dest,t->tag_number,t->state_number);
      t=t->next;
   }
}
return g;
}
コード例 #4
0
/**
 * This function analyzes the given Elag rule automaton to find
 * where the rule and constraint parts are. As a side effect, it builds
 * a fst2 grammar ("foo.fst2" => "foo-conc.fst2") that can be used by
 * the Locate program to match the <!> .... <!> .... <!> part of the rule.
 */
void split_elag_rule(elRule* rule, const VersatileEncodingConfig* vec,language_t* language) {
int c;
/* This array contains the numbers of the states that are pointed to by
 * middle '<=>' of the constraints */
int constraints[ELAG_MAX_CONSTRAINTS];
int nbConstraints=count_constraints(rule->automaton,constraints);
/* +1 because we have to count the <!> .... <!> .... <!> part of the rule */
rule->nbContexts=nbConstraints+1;
rule->contexts=(elContext*)malloc(rule->nbContexts*sizeof(elContext));
if (rule->contexts==NULL) {
   fatal_alloc_error("split_elag_rule");
}
for (c=0;c<rule->nbContexts;c++) {
   rule->contexts[c].left=NULL;
   rule->contexts[c].right=NULL;
}
int endR1=ELAG_UNDEFINED;
int endR2=ELAG_UNDEFINED;
int endC2=ELAG_UNDEFINED;
for (Transition* t=rule->automaton->automaton->states[0]->outgoing_transitions;t!=NULL;t=t->next) {
   symbol_t* symbol=t->label;
   switch (symbol->type) {
      /* We split the unique <!> .... <!> .... <!> part */
      case S_EXCLAM:
         if (rule->contexts[0].left!=NULL) {
            fatal_error("Too much '<!>' tags\n",rule->name);
         }
         rule->contexts[0].left=new_SingleGraph(PTR_TAGS);
         /* We look for the end of the first part of the rule */
         endR1=get_sub_automaton(rule->automaton->automaton,rule->contexts[0].left,t->state_number,0,S_EXCLAM);
         rule->contexts[0].right=new_SingleGraph(PTR_TAGS);
         endR2=get_sub_automaton(rule->automaton->automaton,rule->contexts[0].right,endR1,0,S_EXCLAM);
         if (endR1==ELAG_UNDEFINED || endR2==ELAG_UNDEFINED
             || !is_final_state(rule->automaton->automaton->states[endR2])) {
            fatal_error("split_elag_rule: %s: parse error in <!> part\n",rule->name);
         }
         break;

      /* We split the nbConstraints <=> .... <=> .... <=> parts */
      case S_EQUAL:
         if (rule->contexts[1].left!=NULL) {
            fatal_error("Non deterministic .fst2 file\n");
         }
         for (c=0;c<nbConstraints;c++) {
            rule->contexts[c+1].left=new_SingleGraph(PTR_TAGS);
            get_sub_automaton(rule->automaton->automaton,rule->contexts[c+1].left,t->state_number,1,constraints[c]);
            rule->contexts[c+1].right=new_SingleGraph(PTR_TAGS);
            endC2=get_sub_automaton(rule->automaton->automaton,rule->contexts[c+1].right,constraints[c],0,S_EQUAL);
            if (endC2==ELAG_UNDEFINED || !is_final_state(rule->automaton->automaton->states[endC2])) {
               fatal_error("split_elag_rule: %s: parse error in <=> part\n",rule->name);
            }
         }
         break;

      default: fatal_error("Left delimitor '<!>' or '<=>' missing\n");
   }
}
if (rule->contexts[0].left==NULL) {
   fatal_error("In grammar '%s': symbol '<!>' not found.\n",rule->name);
}
char buf[FILENAME_MAX];
remove_extension(rule->name,buf);
strcat(buf,"-conc.fst2");

/* We create the.fst2 to be used by Locate */
Fst2Automaton* locate=make_locate_automaton(rule,language);
save_automaton(locate,buf,vec,FST_LOCATE);
free_Fst2Automaton(locate,free_symbol);
}
コード例 #5
0
/**
 * This function takes an fst2 representing an Elag rule and returns
 * an automaton A so that the intersection of A and a sentence automaton
 * reject sequences that are not valid regarding this rule.
 */
Fst2Automaton* compile_elag_rule(elRule* rule,language_t* language) {
u_printf("Compiling %s... (%d context%s)\n",rule->name,rule->nbContexts,(rule->nbContexts>1)?"s":"");
/* Now, we will convert the automaton into the Elag format, i.e. with
 * transitions tagged with symbol_t* and not integers */
for (int c=0;c<rule->nbContexts;c++) {
   //convert_transitions_to_elag_ones(rule->contexts[c].left);
   elag_determinize(language,rule->contexts[c].left,free_symbol);
   trim(rule->contexts[c].left,free_symbol);
   //convert_transitions_to_elag_ones(rule->contexts[c].right);
   elag_determinize(language,rule->contexts[c].right,free_symbol);
   trim(rule->contexts[c].right,free_symbol);
}
/* We build A*.R1 */
prefix_with_everything(rule->contexts[0].left);
//u_printf("------------- anything_R1 -------------\n");
//print_graph(rule->contexts[0].left);
elag_determinize(language,rule->contexts[0].left,free_symbol);
//print_graph(rule->contexts[0].left);
elag_minimize(rule->contexts[0].left);
SingleGraph anything_R1=rule->contexts[0].left;
/* and R2.A* */
suffix_with_everything(rule->contexts[0].right);
elag_determinize(language,rule->contexts[0].right,free_symbol);
elag_minimize(rule->contexts[0].right);
SingleGraph R2_anything=rule->contexts[0].right;
/* We compute the number of constraint combinations */
int p=((rule->nbContexts-1)>=0) ? ((int)(1 << (rule->nbContexts-1))) : 0;
/* We allocate the resulting automaton */
SingleGraph result=new_SingleGraph(PTR_TAGS);


for (int ens=0;ens<p;ens++) {
   /* For each combination of constraints, we produce an automaton a1
    * that does not match these constraints */
   SingleGraph a1=combine_constraints(rule,ens,anything_R1,R2_anything,language);
   /* And we make the union of it with the current automaton */
   build_union(result,a1);
   elag_determinize(language,result,free_symbol);
   elag_minimize(result);
}
/* Finally, we take the complement of the automaton that rejects wrong paths.
 * This new automaton recognizes correct paths, and so, the application of the
 * Elag rule will consists of intersecting this automaton with the sentence ones. */

//u_printf("------------- DUMP -------------\n");
//print_graph(result);

elag_complementation(language,result);

//u_printf("------------- AFTER COMPL -------------\n");
//print_graph(result);

trim(result,free_symbol);

if (result->number_of_states==0) {
   error("Grammar %s forbids everything\n",rule->name);
}
u_printf("Grammar %s compiled (%d states)\n",rule->name,result->number_of_states);
Fst2Automaton* Result=new_Fst2Automaton(rule->automaton->name,-1);
Result->automaton=result;
return Result;
}
コード例 #6
0
ファイル: AutMinimization.cpp プロジェクト: adri87/Q-A
/**
 * This function minimizes the given automaton. Note
 * that it must be deterministic. For more information,
 * see comments in this library's .h file.
 */
void elag_minimize(SingleGraph automaton,int level) {
struct list_int* initials=get_initial_states(automaton);
if (initials==NULL) {
   /* No initial state should mean 'empty automaton' */
   if (automaton->number_of_states!=0) {
      /* If not, we fail */
      fatal_error("No initial state in non empty automaton in elag_minimize\n");
   }
   return;
}
if (initials->next!=NULL) {
   fatal_error("Non-deterministic automaton in elag_minimize\n");
}
free_list_int(initials);
if (level>0) {
   /* If necessary, we remove transitions that are included in the
    * default ones */
   compact_default_transitions(automaton);
}
SymbolAlphabet* alph=build_symbol_alphabet(automaton);
TransitionCollection** transitions=build_transition_collections(automaton,alph);
/* Now that we have numbered transitions, we don't need the symbol
 * alphabet anymore */
free_SymbolAlphabet(alph);
int nbColors;
int nbShades;
int* color=(int*)calloc(automaton->number_of_states,sizeof(int));
if (color==NULL) {
   fatal_alloc_error("elag_minimize");
}
int* shade=init_colors(automaton,&nbShades);
do {
   int s;
   /* We copy the shades into the color array */
   for (s=0;s<automaton->number_of_states;s++) {
      color[s]=shade[s];
   }
   nbColors=nbShades;
   nbShades=0;
   /* We update the colors of the transitions' destination states */
   update_colors(transitions,color,automaton->number_of_states);
   /* Now, for each state #s, we look for its shade, comparing it with
    * all the states #i so that i<s */
   for (s=0;s<automaton->number_of_states;s++) {
      shade[s]=get_shade(s,transitions,color,shade,&nbShades);
   }
   /* We stop when no more shades have been introduced */
} while (nbColors!=nbShades);
int* chosen=choose_states(color,nbColors,automaton->number_of_states);
for (int i=0;i<automaton->number_of_states;i++) {
   free_TransitionCollection(transitions[i]);
}
free(transitions);
free(shade);
/* We allocate the resulting automaton */
SingleGraph result=new_SingleGraph(nbColors,PTR_TAGS);
for (int c=0;c<nbColors;c++) {
   SingleGraphState state=add_state(result);
   SingleGraphState original=automaton->states[chosen[c]];
   /* We set the initiality and finality of the state */
   state->control=original->control;
   state->outgoing_transitions=original->outgoing_transitions;
   original->outgoing_transitions=NULL;
   /* We renumber the transitions' destination states */
   for (Transition* t1=state->outgoing_transitions;t1!=NULL;t1=t1->next) {
      t1->state_number=color[t1->state_number];
   }
   state->default_state=original->default_state;
}
/* Now we have to replace the old automaton by the new one */
move_SingleGraph(automaton,&result,free_symbol);
/* And we don't need these arrays anymore */
free(color);
free(chosen);
}