C++ (Cpp) is_final_state 예제들

예제 #1

0

파일 보기

파일: args.c 프로젝트: blabos/Comp

void print_args(args_t* args) {
    int i, j;
    
    printf("\nSTATES:  %d/%d\n", args->num_fstates, args->num_states);
    for (i = 0; i < args->num_states; i++) {
        if (is_final_state(args, i)) {
            printf("(%s), ", args->states[i]);
        }
        else {
            printf("%s , ", args->states[i]);
        }
    }
    printf("\b\b \n");
    
    printf("\nSYMBOLS: %d\n", args->num_symbols);
    for (i = 0; i < args->num_symbols; i++) {
        printf("%c , ", args->symbols[i]);
    }
    printf("\b\b \n");
    
    printf("\nTRANSITIONS:\n");
    for (i = 0; i < args->num_states; i++) {
        for (j = 0; j < args->num_symbols; j++) {
            if (args->transitions[args->num_states * i + j] >= 0) {
                printf("%03d ", args->transitions[args->num_states * i + j]);
            }
            else {
                printf("--- ");
            }
        }
        printf("\n");
    }
    printf("\n");
}

예제 #2

0

파일 보기

파일: Fst2Check_lib.cpp 프로젝트: Rajat-dhyani/UnitexGramLab

/**
 * We create a copy of the given graph using the following rules if full_simplification is not null:
 * - <E> transitions and graph calls are kept
 * - all right contexts are ignored, replaced by an epsilon transition
 * - all tags that don't match anything in the text (like $* $< and $>) are kept,
 *   because they can be involved into a E loop. We also add a real E transition.
 * - all other transitions that matches something from the text are removed
 *
 * As a consequence, the resulting graph is only made of real E transitions,
 * pseudo-E transitions, and graph calls and we can use it as follows:
 * - if no final state is accessible, it means that the graph cannot match E
 * - if the initial state is final, it means that the graph match E
 * - otherwise, we don't know yet
 *
 *
 * If full_simplification is null, we have to create a condition graph suitable for
 * E loop and left recursion detection. For that purpose, we keep the graph as is,
 * with only one modification: adding an E transition to skip right contexts. But still,
 * we keep the context, because we also have to look at it for E loops and left recursions.
 *
 */
static SingleGraph create_condition_graph(Fst2* fst2,int graph,int full_simplification) {
SingleGraph g=new_SingleGraph(INT_TAGS);
int initial_state=fst2->initial_states[graph];
int n_states=fst2->number_of_states_per_graphs[graph];
for (int i=initial_state;i<initial_state+n_states;i++) {
	SingleGraphState dst=add_state(g);
	Fst2State src=fst2->states[i];
	if (is_initial_state(src)) {
		set_initial_state(dst);
	}
	if (is_final_state(src)) {
		set_final_state(dst);
	}
	Transition* t=src->transitions;
	while (t!=NULL) {
		if (full_simplification) {
			deal_with_transition_v1(fst2,t,dst,initial_state);
		} else {
			deal_with_transition_v2(fst2,t,dst,initial_state);
		}
		t=t->next;
	}
}
clean_condition_graph(g);
return g;
}

예제 #3

0

파일 보기

파일: AutComplementation.cpp 프로젝트: Rajat-dhyani/UnitexGramLab

/**
 * Replaces the given automaton by its complement one.
 */
void elag_complementation(language_t* language,SingleGraph A) {
int sink_state_index=A->number_of_states;
SingleGraphState sink_state=add_state(A);
/* The sink state is not final (because finalities will be reversed
 * below), and its default transition loops back on itself */
sink_state->default_state=sink_state_index;
for (int q=0;q<A->number_of_states;q++) {
   /* We reverse the finality of each state */
   if (is_final_state(A->states[q])) {
      unset_final_state(A->states[q]);
   } else {
      set_final_state(A->states[q]);
   }
   if (A->states[q]->default_state==-1) {
      /* If there is no default transition, we create one that is
       * tagged by anything but the non default ones */
      symbol_t* s=LEXIC_minus_transitions(language,A->states[q]->outgoing_transitions);
      if (s!=NULL) {
         add_all_outgoing_transitions(A->states[q],s,sink_state_index);
         /* We have added a single transition tagged by a symbol list. Now
          * we replace it by a list of transitions, each one of them
          * tagged with a single symbol */
         flatten_transition(A->states[q]->outgoing_transitions);
         /* Important to use free_symbols and not free_symbol, because
          * s represents a symbol list */
         free_symbols(s);
      }
   }
}
}

예제 #4

0

파일 보기

파일: ElagDebug.cpp 프로젝트: UnitexGramLab/unitex-core

/**
 * A debug printing of a SingleGraph.
 */
void print_graph(SingleGraph g) {
u_printf("--------------------------------\n");
if (g==NULL) {
   u_printf("NULL graph\n");
   u_printf("--------------------------------\n");
   return;
}
for (int i=0;i<g->number_of_states;i++) {
   SingleGraphState s=g->states[i];
   if (is_initial_state(s)) {
      u_printf("-> ");
   } else {u_printf("   ");}
   if (is_final_state(s)) {
      u_printf("%d t ",i);
   } else {
      u_printf("%d : ",i);
   }
   u_printf("(def %d) \n\t",s->default_state);
   Transition* t=s->outgoing_transitions;
   while (t!=NULL) {
      symbols_dump((symbol_t*)t->label);
      u_printf(",%d ",t->state_number);
      t=t->next;
   }
   u_printf("\n\n");
}
u_printf("--------------------------------\n");
}

예제 #5

0

파일 보기

파일: NTM.cpp 프로젝트: malkuh/TM

// simulate NTM until it halts or nr_steps steps are reached
void NTM::simulate(uint max_nr_steps, bool verbose)
{
    uint c_symbol;
    uint c_state;
    while(nr_steps <= max_nr_steps) {
        list<TM_State> new_tm_states;
        if ( verbose ) {
            cout << "Nr steps: " << nr_steps << endl;
            cout << "There are " << tm_states.size() << " states" << endl;
        }
        for (list<TM_State>::const_iterator it = tm_states.begin();
             it != tm_states.end(); ++it) {
            c_symbol = it->tape.read();
            c_state = it->state;
            if ( !is_final_state(c_state) ) {
                for(list<Action>::const_iterator act_it =
                    trans_actions[c_state][c_symbol].begin();
                    act_it != trans_actions[c_state][c_symbol].end();
                    ++act_it) {
                    TM_State tms(*it);
                    tms.state = act_it->state;
                    tms.tape.write(act_it->symbol);
                    tms.tape.move(act_it->move);
                    new_tm_states.push_front(tms);
//                     if(verbose) {
//                         print(tms);
//                     }
                }
            }
        }
        nr_steps++;
        swap(tm_states, new_tm_states);
    }
    cout << "Done simulating!" << endl;
}

예제 #6

0

파일 보기

파일: automata.c 프로젝트: mrLite/C-ohjelmointi

// Ex. 14
automaton* read_automaton(FILE* fptr) {	
	int state_c;
	int symbol_c;
	int initial_state;
	
	int final_state_c;
	int* final_states;
	
	s_table_element* symbol_table;
	state* state_table;
	
	char line[MAX_LEN+1];
	
	if(fseek(fptr, 0, SEEK_SET)) {
		puts("Couldn't seek the start of the file!");
		return NULL;
	}
	
	// Reading the number of states, number of symbols, and initial state
	fgets(line, (int)MAX_LEN, fptr);
	sscanf(line, "%d %d %d", &state_c, &symbol_c, &initial_state);
	
	// Constructing the table of final states
	final_states = read_final_states(fptr, &final_state_c);
	
	// Constructing the symbol table
	symbol_table = read_symbols(symbol_c, fptr);
	
	// Constructing the state table
	state_table = malloc(sizeof(state)*state_c);
	if(state_table == NULL) {
		puts("Couldn't allocate the state table");
		return NULL;
	}
	int type;
	for(int i = 0; i < state_c; ++i) {
		if((i+1) == initial_state)
			type = 1;
		else if(is_final_state(final_states, final_state_c, i+1))
			type = -1;
		else
			type = 0;
		state_table[i] = new_state(i+1, type, NULL);
	}
	while(read_transition(state_table, fptr));
	
	automaton* ret = malloc(sizeof(automaton));
	if(ret == NULL)
		return NULL;
	ret->initial_state = initial_state;
	ret->states = state_c;
	ret->symbol_c = symbol_c;
	ret->state_table = state_table;
	ret->symbol_table = symbol_table;
	ret->final_states = final_states;
	ret->final_state_c = final_state_c;
	
	return ret;
}

예제 #7

0

파일 보기

파일: AutConcat.cpp 프로젝트: adri87/Q-A

/**
 * This function concatenates B at the end of A. A is modified.
 */
void elag_concat(language_t* language,SingleGraph A,SingleGraph B) {
int oldnb=A->number_of_states;
int* renumber=(int*)malloc(B->number_of_states*sizeof(int));
if (renumber==NULL) {
   fatal_alloc_error("elag_concat");
}
int q;
/* We copy the states of B into A */
for (q=0;q<B->number_of_states;q++) {
   renumber[q]=A->number_of_states;
   add_state(A);
}
for (q=0;q<B->number_of_states;q++) {
   A->states[renumber[q]]->outgoing_transitions=clone_transition_list(B->states[q]->outgoing_transitions,renumber,dup_symbol);
   A->states[renumber[q]]->default_state=(B->states[q]->default_state!=-1)?renumber[B->states[q]->default_state]:-1;
   if (is_final_state(B->states[q])) {
      set_final_state(A->states[renumber[q]]);
   }
}
/* Then, we concatenate A and B.
 * 1) We replace default transitions that outgo from B's initial states
 *    by explicit transitions */
struct list_int* initials=get_initial_states(B);
for (struct list_int* tmp=initials;tmp!=NULL;tmp=tmp->next) {
   explicit_default_transition(language,A,renumber[tmp->n]);
}
for (q=0;q<oldnb;q++) {
   if (is_final_state(A->states[q])) {
      /* Each final state of A becomes non final. Moreover, we have
       * to explicit its default transition, because if not, the concatenation
       * algorithm will modify the recognized language. */
      unset_final_state(A->states[q]);
      explicit_default_transition(language,A,q);
      for (struct list_int* tmp=initials;tmp!=NULL;tmp=tmp->next) {
         concat(&(A->states[q]->outgoing_transitions),clone_transition_list(A->states[renumber[tmp->n]]->outgoing_transitions,NULL,dup_symbol));
         if (is_final_state(A->states[renumber[tmp->n]])) {
            set_final_state(A->states[q]);
         }
      }
   }
}
free(renumber);
free_list_int(initials);
}

예제 #8

0

파일 보기

파일: NormalizationFst2.cpp 프로젝트: Rajat-dhyani/UnitexGramLab

//
// this function explores a sub-graph, considering tokens as strings
//
void explorer_sub_automate_normalization_string(Fst2* automate,int n,
                                     struct normalization_tree* noeud_normalization,
                                     unichar* output,struct norm_info** TEMP_LIST) {
Fst2State etat;
etat=automate->states[n];
if (is_final_state(etat)) {
   // if we are in a final state
   (*TEMP_LIST)=insert_in_norm_info_list(output,noeud_normalization,(*TEMP_LIST));
}
Transition* trans;
trans=etat->transitions;
unichar tmp[1000];
while (trans!=NULL) {
   if (trans->tag_number<0) {
      // case of a sub-graph
      struct norm_info* TMP=NULL;
      explorer_sub_automate_normalization_string(automate,automate->initial_states[-(trans->tag_number)],noeud_normalization,
                                        output,&TMP);
      while (TMP!=NULL) {
         // we continue to explore the current automaton
         explorer_sub_automate_normalization_string(automate,trans->state_number,TMP->node,
                                        TMP->output,TEMP_LIST);
         struct norm_info* z=TMP;
         TMP=TMP->next;
         free_norm_info(z);
      }
   }
   else {
      // normal transition
      Fst2Tag etiq;
      etiq=automate->tags[trans->tag_number];
      u_strcpy(tmp,output);
      u_strcat(tmp," ");
      if (etiq->output!=NULL && u_strcmp(etiq->output,"")
          && u_strcmp(etiq->output,"<E>") && !only_spaces(etiq->output)) {
         // we append the output if it exists and is not epsilon
         u_strcat(tmp,etiq->output);
      }
      struct normalization_tree_transition* trans_norm;
      trans_norm=get_trans_arbre_normalization_string(etiq->input,noeud_normalization->trans);
      if (trans_norm==NULL) {
         // if the transition does not exist in the tree, we create it
         trans_norm=new_trans_arbre_normalization_string(etiq->input);
         // we also create the destination node
         trans_norm->node=new_normalization_tree();
         trans_norm->next=noeud_normalization->trans;
         noeud_normalization->trans=trans_norm;
      }
      explorer_sub_automate_normalization_string(automate,trans->state_number,trans_norm->node,
                                        tmp,TEMP_LIST);
   }
   trans=trans->next;
}
}

예제 #9

0

파일 보기

파일: AutMinimization.cpp 프로젝트: adri87/Q-A

/**
 * Allocates, initializes and returns an array that associates
 * a color (0 or 1) to each state of 'A', making sure that the
 * state #0 will be colored with 0. '*nbColors' will be set to
 * the number of colors that have been used (1 if all states
 * have the same finality; 2 otherwise).
 */
int* init_colors(SingleGraph A,int *nbColors) {
int* color=(int*)calloc(A->number_of_states,sizeof(int));
if (color==NULL) {
   fatal_alloc_error("init_colors");
}
/* bicolor will indicate if all states are of the same color (finality) or
 * not */
bool bicolor=false;
if (is_final_state(A->states[0])) {
   /* We distinguish two cases (initial state final or not), just
    * to ensure that the color of the initial state #0 will be 0 */
   for (int e=0;e<A->number_of_states;e++) {
      color[e]=is_final_state(A->states[e])?0:(bicolor=true,1);
   }
} else {
   for (int e=0;e<A->number_of_states;e++) {
      color[e]=is_final_state(A->states[e])?(bicolor=true,1):0;
   }
}
(*nbColors)=(bicolor?2:1);
return color;
}

예제 #10

0

파일 보기

파일: ElagRulesCompilation.cpp 프로젝트: Rajat-dhyani/UnitexGramLab

/**
 * An Elag constraints is of the form: <=> .... <=> .... <=>
 * This function looks for states that are pointed to by middle '<=>' transitions.
 * It places their numbers into 'constraints' and it returns the size of this
 * array, i.e. the number of constraints expressed by this Elag rule.
 */
int count_constraints(Fst2Automaton* aut,int* constraints) {
int source=0;
int e;
Transition* t;
int c;
int nbConstraints=0;
symbol_t* symbol;
SingleGraph automaton=aut->automaton;
for (t=automaton->states[0]->outgoing_transitions;t!=NULL && source==0;t=t->next) {
   symbol=t->label;
   if (symbol->type==S_EQUAL) {
      if (t->state_number==0) {
         fatal_error("Illegal cycle in grammar\n");
      }
      source=t->tag_number;
   }
}
if (source==0) {
   /* If there are no contraints */
   return 0;
}
/* We look for '<=>' transitions, but only from the state 1, because
 * from state 0, we would take into account all the '<=>' that begin rules. */
for (e=1;e<automaton->number_of_states;e++) {
   for (t=automaton->states[e]->outgoing_transitions;t!=NULL;t=t->next) {
      symbol=t->label;
      if (t->state_number!=source && symbol->type==S_EQUAL && !is_final_state(automaton->states[t->state_number])) {
         /* We don't take into account '<=>' transitions that go to final states because
          * they are not middle '<=>' transitions. */
         for (c=0;c<nbConstraints;c++) {
            if (constraints[c]==t->state_number) {
               /* We stop if the constraint is already in the list */
               break;
            }
         }
         if (c==nbConstraints) {
	         if (++nbConstraints>=ELAG_MAX_CONSTRAINTS) {
               fatal_error("Too many constraints with same condition\n");
            }
	         constraints[c]=t->state_number;
	      }
      }
   }
}
if (nbConstraints==0) {
   fatal_error("Middle delimitor '<=>' not found\n");
}
return nbConstraints;
}

예제 #11

0

파일 보기

파일: Fst2Check_lib.cpp 프로젝트: Rajat-dhyani/UnitexGramLab

static E_MATCHING_STATUS get_status(SingleGraph g) {
if (g->number_of_states==0) {
	return CHK_DOES_NOT_MATCH_E;
}
int i=get_initial_state(g);
if (i<0) {
	fatal_error("Internal error in get_status: invalid negative initial state %d\n",i);
}
SingleGraphState s=g->states[i];
if (is_final_state(s)) {
	/* If the initial state is final, it means that we can reach it without matching
	 * anything in the text */
	return CHK_MATCHES_E;
}
return CHK_DONT_KNOW;
}

예제 #12

0

파일 보기

파일: RebuildTfst.cpp 프로젝트: UnitexGramLab/unitex-core

/**
 * Creates a SingleGraph copy of the given .fst2 subgraph, using
 * the same tag numeration.
 */
SingleGraph create_copy_of_fst2_subgraph(Fst2* fst2,int n) {
int n_states=fst2->number_of_states_per_graphs[n];
SingleGraph g=new_SingleGraph(n_states,INT_TAGS);
int shift=fst2->initial_states[n];
for (int i=0;i<n_states;i++) {
   SingleGraphState dest=add_state(g);
   Fst2State src=fst2->states[i+shift];
   if (is_initial_state(src)) {
      set_initial_state(dest);
   }
   if (is_final_state(src)) {
      set_final_state(dest);
   }
   Transition* t=src->transitions;
   while (t!=NULL) {
      add_outgoing_transition(dest,t->tag_number,t->state_number);
      t=t->next;
   }
}
return g;
}

예제 #13

0

파일 보기

파일: Fst2Check_lib.cpp 프로젝트: adri87/Q-A

/**
 * Returns 1 if we can match <E> from the current state, with or without
 * conditions; 0 otherwise.
 */
int graph_matches_E(int initial_state,int current_state,const Fst2State* states,Fst2Tag* tags,
                      int current_graph,unichar** graph_names,
                      ConditionList conditions_for_states[],
                      ConditionList *graph_conditions) {
Transition* l;
Fst2State s;
int ret_value=0;
int ret;
*graph_conditions=NULL;
s=states[current_state];
if (is_final_state(s)) {
   /* If we are arrived in a final state, then the graph matches <E> */
   set_bit_mask(&(s->control),UNCONDITIONAL_E_MATCH);
   return 1;
}
if (is_bit_mask_set(s->control,TMP_LOOP_MARK)) {
   /* If we have a loop, we do nothing, because they will be
    * dealt with later. */
   return 0;
}
if (is_bit_mask_set(s->control,VISITED_MARK)) {
   /* If we are in state that has already been visited */
   if (is_bit_mask_set(s->control,UNCONDITIONAL_E_MATCH)) {
      /* If this state can match <E> without conditions, then we have finished */
      return 1;
   }
   if (is_bit_mask_set(s->control,CONDITIONAL_E_MATCH)) {
      /* If this state can match <E> with conditions, then we have finished, but
       * we copy the necessary conditions in 'graph_conditions'. */
      *graph_conditions=clone_ConditionList(conditions_for_states[current_state-initial_state]);
      return 1;
   }
   /* If the state has been visited and if it does not match <E>, then we return OK */
   return 0;
}
set_bit_mask(&(s->control),VISITED_MARK);
set_bit_mask(&(s->control),TMP_LOOP_MARK);
l=s->transitions;
/* We look all the outgoing transitions */
while (l!=NULL) {
   if (l->tag_number<0) {
      /* If we have a subgraph, we test if it matches <E> */
      *graph_conditions=NULL;
      ret=graph_matches_E(initial_state,l->state_number,states,tags,current_graph,graph_names,conditions_for_states,graph_conditions);
      if (ret==1) {
         /* If the subgraph matches <E>, we say that the current state matches
          * <E>, modulo the conditions to be verified */
         set_bit_mask(&(s->control),CONDITIONAL_E_MATCH);
         /* We insert the new condition in first position... */
         insert_graph_in_conditions(-(l->tag_number),graph_conditions);
         /* ...and we merge the new conditions with the existing ones for this state */
         merge_condition_lists(&conditions_for_states[current_state-initial_state],*graph_conditions);
         *graph_conditions=NULL;
      }
      ret_value=ret_value|ret;
   }
   else if (tags[l->tag_number]->control&1) {
      /* If we have an <E> transition, we explore the rest of the graph from it */
      *graph_conditions=NULL;
      ret=graph_matches_E(initial_state,l->state_number,states,tags,current_graph,graph_names,conditions_for_states,graph_conditions);
      if (ret==1) {
         /* If we can match <E> from the <E>-transition's destination state, then
          * we can match it from the current state. */
         if (*graph_conditions==NULL) {
            /* If there is no condition */
            set_bit_mask(&(s->control),UNCONDITIONAL_E_MATCH);
         }
         else {
            /* Otherwise, we add the condition to the existing ones */
            set_bit_mask(&(s->control),CONDITIONAL_E_MATCH);
            merge_condition_lists(&conditions_for_states[current_state-initial_state],*graph_conditions);
            *graph_conditions=NULL;
         }
      }
      ret_value=ret_value|ret;
   }
   l=l->next;
}
unset_bit_mask(&(s->control),TMP_LOOP_MARK);
*graph_conditions=clone_ConditionList(conditions_for_states[current_state-initial_state]);
return ret_value;
}

예제 #14

0

파일 보기

파일: ElagRulesCompilation.cpp 프로젝트: Rajat-dhyani/UnitexGramLab

/**
 * This function analyzes the given Elag rule automaton to find
 * where the rule and constraint parts are. As a side effect, it builds
 * a fst2 grammar ("foo.fst2" => "foo-conc.fst2") that can be used by
 * the Locate program to match the <!> .... <!> .... <!> part of the rule.
 */
void split_elag_rule(elRule* rule, const VersatileEncodingConfig* vec,language_t* language) {
int c;
/* This array contains the numbers of the states that are pointed to by
 * middle '<=>' of the constraints */
int constraints[ELAG_MAX_CONSTRAINTS];
int nbConstraints=count_constraints(rule->automaton,constraints);
/* +1 because we have to count the <!> .... <!> .... <!> part of the rule */
rule->nbContexts=nbConstraints+1;
rule->contexts=(elContext*)malloc(rule->nbContexts*sizeof(elContext));
if (rule->contexts==NULL) {
   fatal_alloc_error("split_elag_rule");
}
for (c=0;c<rule->nbContexts;c++) {
   rule->contexts[c].left=NULL;
   rule->contexts[c].right=NULL;
}
int endR1=ELAG_UNDEFINED;
int endR2=ELAG_UNDEFINED;
int endC2=ELAG_UNDEFINED;
for (Transition* t=rule->automaton->automaton->states[0]->outgoing_transitions;t!=NULL;t=t->next) {
   symbol_t* symbol=t->label;
   switch (symbol->type) {
      /* We split the unique <!> .... <!> .... <!> part */
      case S_EXCLAM:
         if (rule->contexts[0].left!=NULL) {
            fatal_error("Too much '<!>' tags\n",rule->name);
         }
         rule->contexts[0].left=new_SingleGraph(PTR_TAGS);
         /* We look for the end of the first part of the rule */
         endR1=get_sub_automaton(rule->automaton->automaton,rule->contexts[0].left,t->state_number,0,S_EXCLAM);
         rule->contexts[0].right=new_SingleGraph(PTR_TAGS);
         endR2=get_sub_automaton(rule->automaton->automaton,rule->contexts[0].right,endR1,0,S_EXCLAM);
         if (endR1==ELAG_UNDEFINED || endR2==ELAG_UNDEFINED
             || !is_final_state(rule->automaton->automaton->states[endR2])) {
            fatal_error("split_elag_rule: %s: parse error in <!> part\n",rule->name);
         }
         break;

      /* We split the nbConstraints <=> .... <=> .... <=> parts */
      case S_EQUAL:
         if (rule->contexts[1].left!=NULL) {
            fatal_error("Non deterministic .fst2 file\n");
         }
         for (c=0;c<nbConstraints;c++) {
            rule->contexts[c+1].left=new_SingleGraph(PTR_TAGS);
            get_sub_automaton(rule->automaton->automaton,rule->contexts[c+1].left,t->state_number,1,constraints[c]);
            rule->contexts[c+1].right=new_SingleGraph(PTR_TAGS);
            endC2=get_sub_automaton(rule->automaton->automaton,rule->contexts[c+1].right,constraints[c],0,S_EQUAL);
            if (endC2==ELAG_UNDEFINED || !is_final_state(rule->automaton->automaton->states[endC2])) {
               fatal_error("split_elag_rule: %s: parse error in <=> part\n",rule->name);
            }
         }
         break;

      default: fatal_error("Left delimitor '<!>' or '<=>' missing\n");
   }
}
if (rule->contexts[0].left==NULL) {
   fatal_error("In grammar '%s': symbol '<!>' not found.\n",rule->name);
}
char buf[FILENAME_MAX];
remove_extension(rule->name,buf);
strcat(buf,"-conc.fst2");

/* We create the.fst2 to be used by Locate */
Fst2Automaton* locate=make_locate_automaton(rule,language);
save_automaton(locate,buf,vec,FST_LOCATE);
free_Fst2Automaton(locate,free_symbol);
}

예제 #15

0

파일 보기

파일: make_with_funcs.c 프로젝트: maricapp/Compiladores

void make_with_func(args_t* args, char* outfile){
    
    int i, j, count;
    
    if (get_args("test.auto", args)) {
        
    }
    else {
        printf("Falha ao obter args\n");
    }
    
    //cria arquivo 
    FILE * arq = fopen(outfile, "w");
    
    //insere includes
    fprintf(arq, "#include <stdio.h>\n");
    fprintf(arq, "#include <stdlib.h>\n");
    fprintf(arq, "#define TAM 10\n");
    fprintf(arq, "\n");
    
    //variaveis globais
    fprintf(arq, "char v[TAM];\n");
    fprintf(arq, "int p = 0;\n");

    fprintf(arq, "\n");
    
    //protitipos das funcões
    for(i = 0; i < args->num_states; i++){
        fprintf(arq, "void e%d();\n", i);
    }    
    fprintf(arq, "void sucesso();\n");
    fprintf(arq, "void erro();\n");
    
    fprintf(arq, "\n");

    //main
    fprintf(arq, "int main(void) {\n");
    fprintf(arq, "    p = 0;\n");
    fprintf(arq, "    printf(\"Sequencia:\"); \n");
    fprintf(arq, "    fgets(v, TAM, stdin);\n");
    fprintf(arq, "    e0();\n");
    fprintf(arq, "    return(0);\n");
    fprintf(arq, "}\n");
    
    fprintf(arq, "\n");
    
    //funcoes dos estados
    for (i = 0; i < args->num_states; i++) {
        fprintf(arq, "\nvoid %s(){\n", args->states[i]);            
        count = 0;
                   
        for (j = 0; j < args->num_symbols; j++) {
            if (args.transitions[i + j * args->num_states] != -1) {
                if (count > 0){
                    fprintf(arq, " else \n");
                    fprintf(arq, "        if(v[p] == '%c'){ \n", args->symbols[j]);
                }
                else{
                       fprintf(arq, "    if(v[p] == '%c'){ \n", args->symbols[j]);
                }    
                    fprintf(arq, "        p++;\n");
                    fprintf(arq, "        e%d();\n", args->transitions[i + j * args->num_states]);
                    fprintf(arq, "    }");
                    count++;
                  
            }
        }
            
        if (count > 0) {
            fprintf(arq, " else {\n");
        }
            
        if (is_final_state(&args, i)){
            fprintf(arq, "    sucesso();\n");
            fprintf(arq, "    }\n");
            fprintf(arq, "}\n");   
        }
        else {
            fprintf(arq, "    erro();\n");
            fprintf(arq, "    }\n");            
            fprintf(arq, "}\n");   
        }
    }
    
    fprintf(arq, "\n");
    
    //cria funcao sucesso
    fprintf(arq, "void sucesso(){\n");    
    fprintf(arq, "    printf(\" Sucesso!\");\n");
    fprintf(arq, "}\n");
    
    fprintf(arq, "\n");
    
    //cria funcao erro
    fprintf(arq, "void erro(){\n");    
    fprintf(arq, "    printf(\" Erro!\");\n");
    fprintf(arq, "}\n");
    
    fprintf(arq, "\n");
    //fecha arquivo
    fclose(arq);
}

예제 #16

0

파일 보기

파일: ElagFstFilesIO.cpp 프로젝트: UnitexGramLab/unitex-core

/**
 * Saves the given automaton into the given .fst2 file.
 */
void fst_file_write(Elag_fst_file_out* fstf,const Fst2Automaton* A) {
Ustring* tag=new_Ustring();
void (*symbol_to_tag)(const symbol_t*,Ustring*)=NULL;
switch (fstf->type) {
   case FST_TEXT:
      symbol_to_tag=symbol_to_text_label;
      break;

   case FST_GRAMMAR:
      symbol_to_tag=symbol_to_grammar_label;
      break;

  case FST_LOCATE:
      symbol_to_tag=symbol_to_locate_label;
      break;

  default:
      fatal_error("fst_file_write: invalid fstf->type: %d\n",fstf->type);
}
/* We save the graph number and name */
u_fprintf(fstf->f,"-%d %S\n",fstf->nb_automata+1,A->name);
int index;
unichar deflabel[]={'<','d','e','f','>',0};
for (int q=0;q<A->automaton->number_of_states;q++) {
   SingleGraphState state=A->automaton->states[q];
   u_fprintf(fstf->f,"%C ",is_final_state(state)?'t':':');
   for (Transition* t=state->outgoing_transitions;t!=NULL;t=t->next) {
      if (t->tag_number==-1) {
         /* If we are in the case of an "EMPTY" transition created because
          * the automaton was emptied as trim time */
         u_strcpy(tag,"EMPTY");
      } else {
         symbol_t* symbol=t->label;
         symbol_to_tag(symbol,tag);
      }
      if (fstf->type==FST_LOCATE) {
         /* If we are saving a Locate .fst2, we have to perform
          * some special things */
         if (u_strcmp(tag->str, "<PNC>") == 0) {
            PNC_trans_write(fstf, t->state_number);
         } else if (u_strcmp(tag->str, "<CHFA>") == 0 || u_strcmp(tag->str, "<NB>") == 0) {
            CHFA_trans_write(fstf, t->state_number);
         } else if (u_strcmp(tag->str, "<.>") == 0) {
            LEXIC_trans_write(fstf, t->state_number);
         } else {
            goto normal_output;
         }
      } else {
         /* If we have a normal transition to print */
         normal_output:
         index=get_value_index(tag->str,fstf->labels);
         u_fprintf(fstf->f,"%d %d ",index,t->state_number);
      }
   }
   if (state->default_state!=-1) {
      if (fstf->type!=FST_GRAMMAR) {
         error("Unexpected <def> label in text/locate automaton\n");
      }
      index=get_value_index(deflabel,fstf->labels);
      u_fprintf(fstf->f,"%d %d ",index,state->default_state);
   }
   u_fputc('\n',fstf->f);
}
u_fprintf(fstf->f,"f \n");
free_Ustring(tag);
fstf->nb_automata++;
}

예제 #17

0

파일 보기

파일: NormalizationFst2.cpp 프로젝트: Rajat-dhyani/UnitexGramLab

/**
 * This function explore the normalization grammar to construct
 * the normalization tree. If the 'list' parameter is NULL, then we
 * are in the main call to the main graph; otherwise, we are within
 * a subgraph.
 */
void explore_normalization_fst2(Fst2* fst2,int current_state,
                                struct normalization_tree* node,
                                struct string_hash* tokens,const unichar* output,
                                const Alphabet* alph,struct norm_info** list) {
Fst2State state=fst2->states[current_state];
if (is_final_state(state)) {
   /* If we are in a final state, we behave differently if we are in a subgraph
    * or in the main call to the main graph. */
   if (list!=NULL) {
      (*list)=insert_in_norm_info_list(output,node,(*list));
   }
   else {
      node->outputs=sorted_insert(output,node->outputs);
   }
}
Transition* trans=state->transitions;
unichar tmp[1024];
while (trans!=NULL) {
   if (trans->tag_number<0) {
      /* Case of a subgraph call */
      struct norm_info* tmp_list=NULL;
      explore_normalization_fst2(fst2,fst2->initial_states[-(trans->tag_number)],node,
                                        tokens,output,alph,&tmp_list);
      while (tmp_list!=NULL) {
         /* We continue to explore the current graph */
         explore_normalization_fst2(fst2,trans->state_number,tmp_list->node,
                                        tokens,tmp_list->output,alph,list);
         struct norm_info* z=tmp_list;
         tmp_list=tmp_list->next;
         free_norm_info(z);
      }
   }
   else {
      /* If we have a normal transition */
      Fst2Tag tag=fst2->tags[trans->tag_number];
      u_strcpy(tmp,output);
      u_strcat(tmp," ");
      if (tag->output!=NULL && tag->output[0]!='\0' && u_strcmp(tag->output,"<E>") && !only_spaces(tag->output)) {
         /* We append the output if it exists and is not epsilon */
         u_strcat(tmp,tag->output);
      }
      if (!u_strcmp(tag->input,"<E>")) {
         /* If we have an epsilon transition, we go on in the fst2, but
          * we don't move in the normalization tree */
         explore_normalization_fst2(fst2,trans->state_number,node,tokens,tmp,alph,list);
      } else {
         /* If we have a normal transition, we explore all the tokens that match it */
         struct list_int* l=get_token_list_for_sequence(tag->input,alph,tokens);
         while (l!=NULL) {
            /* Then, we add a branch in the normalization tree for
             * each token. Note that it may introduce combinatory explosions
             * if the the fst2 matches large sequences */
            struct normalization_tree_transition* trans_norm;
            trans_norm=get_transition(l->n,node->trans);
            if (trans_norm==NULL) {
               /* If the transition does not exist in the tree, we create it */
               trans_norm=new_normalization_tree_transition(l->n,new_normalization_tree(),node->trans);
               node->trans=trans_norm;
            }
            explore_normalization_fst2(fst2,trans->state_number,trans_norm->node,
                                           tokens,tmp,alph,list);
            struct list_int* L=l;
            l=l->next;
            free(L);
         }
      }
   }
   trans=trans->next;
}
}

예제 #18

0

파일 보기

파일: Fst2TxtAsRoutine.cpp 프로젝트: adri87/Q-A

void scan_graph(int n_graph,         // number of current graph
                     int e,          // number of current state
                     int pos,        //
                     int depth,
                     struct parsing_info** liste_arrivee,
                     unichar* mot_token_buffer,
                     struct fst2txt_parameters* p,Abstract_allocator prv_alloc_recycle) {
Fst2State etat_courant=p->fst2->states[e];
if (depth > MAX_DEPTH) {

  error(  "\n"
          "Maximal stack size reached in graph %i!\n"
          "Recognized more than %i tokens starting from:\n"
          "  ",
          n_graph, MAX_DEPTH);
  for (int i=0; i<60; i++) {
    error("%S",p->buffer[p->current_origin+i]);
  }
  error("\nSkipping match at this position, trying from next token!\n");
  p->output[0] = '\0';  // clear output
  p->input_length = 0; // reset taille_entree
  empty(p->stack);    // clear output stack
  if (liste_arrivee != NULL) {
    while (*liste_arrivee != NULL) { // free list of subgraph matches
      struct parsing_info* la_tmp=*liste_arrivee;
      *liste_arrivee=(*liste_arrivee)->next;
      la_tmp->next=NULL; // to don't free the next item
      free_parsing_info(la_tmp, prv_alloc_recycle);
    }
  }
  return;
  //  exit(1); // don't exit, try at next position
}
depth++;

if (is_final_state(etat_courant)) {
   // if we are in a final state
  p->stack->stack[p->stack->stack_pointer+1]='\0';
  if (n_graph == 0) { // in main graph
    if (pos>=p->input_length/*sommet>u_strlen(output)*/) {
      // and if the recognized input is longer than the current one, it replaces it
      u_strcpy(p->output,p->stack->stack);
      p->input_length=(pos);
    }
  } else { // in a subgraph
    (*liste_arrivee)=insert_if_absent(pos,-1,-1,(*liste_arrivee),p->stack->stack_pointer+1,
                                      p->stack->stack,p->variables,NULL,NULL,-1,-1,NULL,-1, prv_alloc_recycle);
  }
}

if (pos+p->current_origin==p->text_buffer->size) {
   // if we are at the end of the text, we return
   return;
}

int SOMMET=p->stack->stack_pointer+1;
int pos2;

/* If there are some letter sequence transitions like %hello, we process them */
if (p->token_tree[e]->transition_array!=NULL) {
   if (p->buffer[pos+p->current_origin]==' ') {pos2=pos+1;if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' ');}
   /* we don't keep this line because of problems occur in sentence tokenizing
    * if the return sequence is defautly considered as a separator like space
    else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);}
    */
   else pos2=pos;
   int position=0;
   unichar *token=mot_token_buffer;
   if (p->tokenization_policy==CHAR_BY_CHAR_TOKENIZATION
       || (is_letter(p->buffer[pos2+p->current_origin],p->alphabet) && (pos2+p->current_origin==0 || !is_letter(p->buffer[pos2+p->current_origin-1],p->alphabet)))) {
      /* If we are in character by character mode */
      while (pos2+p->current_origin<p->text_buffer->size && is_letter(p->buffer[pos2+p->current_origin],p->alphabet)) {
         token[position++]=p->buffer[(pos2++)+p->current_origin];
         if (p->tokenization_policy==CHAR_BY_CHAR_TOKENIZATION) {
            break;
         }
      }
      token[position]='\0';
      if (position!=0 &&
          (p->tokenization_policy==CHAR_BY_CHAR_TOKENIZATION || !(is_letter(token[position-1],p->alphabet) && is_letter(p->buffer[pos2+p->current_origin],p->alphabet)))) {
       // we proceed only if we have exactly read the contenu sequence
       // in both modes MERGE and REPLACE, we process the transduction if any
       int SOMMET2=p->stack->stack_pointer;
       Transition* RES=get_matching_tags(token,p->token_tree[e],p->alphabet);
       Transition* TMP;
       unichar* mot_token_new_recurse_buffer=NULL;
       if (RES!=NULL) {
          // we allocate a new mot_token_buffer for the scan_graph recursin because we need preserve current
          // token=mot_token_buffer
          mot_token_new_recurse_buffer=(unichar*)malloc(MOT_BUFFER_TOKEN_SIZE*sizeof(unichar));
          if (mot_token_new_recurse_buffer==NULL) {
            fatal_alloc_error("scan_graph");
          }
       }
       while (RES!=NULL) {
           p->stack->stack_pointer=SOMMET2;
          Fst2Tag etiq=p->fst2->tags[RES->tag_number];
          traiter_transduction(p,etiq->output);
          int longueur=u_strlen(etiq->input);
          unichar C=token[longueur];
          token[longueur]='\0';
          if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
             // if we are in MERGE mode, we add to ouput the char we have read
             push_input_string(p->stack,token,0);
          }
          token[longueur]=C;
          scan_graph(n_graph,RES->state_number,pos2-(position-longueur),depth,liste_arrivee,mot_token_new_recurse_buffer,p);
          TMP=RES;
          RES=RES->next;
          free(TMP);
       }
       if (mot_token_new_recurse_buffer!=NULL) {
         free(mot_token_new_recurse_buffer);
       }
   }
}
}

Transition* t=etat_courant->transitions;
while (t!=NULL) {
    p->stack->stack_pointer=SOMMET-1;
      // we process the transition of the current state
      int n_etiq=t->tag_number;
      if (n_etiq<0) {
         // case of a sub-graph
         struct parsing_info* liste=NULL;
         unichar* pile_old;
         p->stack->stack[p->stack->stack_pointer+1]='\0';
         pile_old = u_strdup(p->stack->stack);
         scan_graph((((unsigned)n_etiq)-1),p->fst2->initial_states[-n_etiq],pos,depth,&liste,mot_token_buffer,p);
         while (liste!=NULL) {
            p->stack->stack_pointer=liste->stack_pointer-1;
            u_strcpy(p->stack->stack,liste->stack);
            scan_graph(n_graph,t->state_number,liste->position,depth,liste_arrivee,mot_token_buffer,p);
            struct parsing_info* l_tmp=liste;
            liste=liste->next;
            l_tmp->next=NULL; // to don't free the next item
            free_parsing_info(l_tmp, prv_alloc_recycle);
         }
         u_strcpy(p->stack->stack,pile_old);
         free(pile_old);
         p->stack->stack_pointer=SOMMET-1;
      }
      else {
         // case of a normal tag
         Fst2Tag etiq=p->fst2->tags[n_etiq];
         unichar* contenu=etiq->input;
         int contenu_len_possible_match=u_len_possible_match(contenu);
         if (etiq->type==BEGIN_OUTPUT_VAR_TAG) {
        	 fatal_error("Unsupported $|XXX( tags in Fst2Txt\n");
         }
         if (etiq->type==END_OUTPUT_VAR_TAG) {
           	 fatal_error("Unsupported $|XXX) tags in Fst2Txt\n");
         }
         if (etiq->type==BEGIN_VAR_TAG) {
            // case of a $a( variable tag
            //int old;
            struct transduction_variable* L=get_transduction_variable(p->variables,etiq->variable);
            if (L==NULL) {
               fatal_error("Unknown variable: %S\n",etiq->variable);
            }
            //old=L->start;
            if (p->buffer[pos+p->current_origin]==' ' && pos+p->current_origin+1<p->text_buffer->size) {
               pos2=pos+1;
               if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' ');
            }
            //else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);}
            else pos2=pos;
            L->start_in_tokens=pos2;
            scan_graph(n_graph,t->state_number,pos2,depth,liste_arrivee,mot_token_buffer,p);
            //L->start=old;
         }
         else if (etiq->type==END_VAR_TAG) {
              // case of a $a) variable tag
              //int old;
              struct transduction_variable* L=get_transduction_variable(p->variables,etiq->variable);
              if (L==NULL) {
                 fatal_error("Unknown variable: %S\n",etiq->variable);
              }
              //old=L->end;
              if (pos>0)
                L->end_in_tokens=pos-1;
              else L->end_in_tokens=pos;
              // BUG: qd changement de buffer, penser au cas start dans ancien buffer et end dans nouveau
              scan_graph(n_graph,t->state_number,pos,depth,liste_arrivee,mot_token_buffer,p);
              //L->end=old;
         }
         else if ((contenu_len_possible_match==5) && (!u_trymatch_superfast5(contenu,ETIQ_MOT_LN5))) {
              // case of transition by any sequence of letters
              if (p->buffer[pos+p->current_origin]==' ' && pos+p->current_origin+1<p->text_buffer->size) {
                 pos2=pos+1;
                 if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' ');
              }
              //else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);}
              else pos2=pos;
              unichar* mot=mot_token_buffer;
              int position=0;
              if (p->tokenization_policy==CHAR_BY_CHAR_TOKENIZATION ||
                  ((pos2+p->current_origin)==0 || !is_letter(p->buffer[pos2+p->current_origin-1],p->alphabet))) {
                     while (pos2+p->current_origin<p->text_buffer->size && is_letter(p->buffer[pos2+p->current_origin],p->alphabet)) {
                           mot[position++]=p->buffer[(pos2++)+p->current_origin];
                     }
                     mot[position]='\0';
                     if (position!=0) {
                       // we proceed only if we have read a letter sequence
                       // in both modes MERGE and REPLACE, we process the transduction if any
                       traiter_transduction(p,etiq->output);
                       if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
                         // if we are in MERGE mode, we add to ouput the char we have read
                         push_output_string(p->stack,mot);
                       }
                       scan_graph(n_graph,t->state_number,pos2,depth,liste_arrivee,mot_token_buffer,p);
                     }
              }
         }
         else if ((contenu_len_possible_match==4) && (!u_trymatch_superfast4(contenu,ETIQ_NB_LN4))) {
              // case of transition by any sequence of digits
              if (p->buffer[pos+p->current_origin]==' ') {
                 pos2=pos+1;
                 if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' ');
              }
              //else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);}
              else pos2=pos;
              unichar* mot=mot_token_buffer;
              int position=0;
              while (pos2+p->current_origin<p->text_buffer->size && (p->buffer[pos2+p->current_origin]>='0')
                     && (p->buffer[pos2+p->current_origin]<='9')) {
                 mot[position++]=p->buffer[(pos2++)+p->current_origin];
              }
              mot[position]='\0';
              if (position!=0) {
                 // we proceed only if we have read a letter sequence
                 // in both modes MERGE and REPLACE, we process the transduction if any
                 traiter_transduction(p,etiq->output);
                 if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
                    // if we are in MERGE mode, we add to ouput the char we have read
                     push_output_string(p->stack,mot);
                 }
                 scan_graph(n_graph,t->state_number,pos2,depth,liste_arrivee,mot_token_buffer,p);
              }
         }
         else if ((contenu_len_possible_match==5) && (!u_trymatch_superfast5(contenu,ETIQ_MAJ_LN5))) {
              // case of upper case letter sequence
              if (p->buffer[pos+p->current_origin]==' ') {pos2=pos+1;if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' ');}
              //else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);}
              else pos2=pos;
              unichar* mot=mot_token_buffer;
              int position=0;
              if (p->tokenization_policy==CHAR_BY_CHAR_TOKENIZATION ||
                  ((pos2+p->current_origin)==0 || !is_letter(p->buffer[pos2+p->current_origin-1],p->alphabet))) {
                 while (pos2+p->current_origin<p->text_buffer->size && is_upper(p->buffer[pos2+p->current_origin],p->alphabet)) {
                    mot[position++]=p->buffer[(pos2++)+p->current_origin];
                 }
                 mot[position]='\0';
                 if (position!=0 && !is_letter(p->buffer[pos2+p->current_origin],p->alphabet)) {
                   // we proceed only if we have read an upper case letter sequence
                   // which is not followed by a lower case letter
                   // in both modes MERGE and REPLACE, we process the transduction if any
                   traiter_transduction(p,etiq->output);
                   if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
                     // if we are in MERGE mode, we add to ouput the char we have read
                     push_input_string(p->stack,mot,0);
                   }
                   scan_graph(n_graph,t->state_number,pos2,depth,liste_arrivee,mot_token_buffer,p);
                 }
              }
         }
         else if ((contenu_len_possible_match==5) && (!u_trymatch_superfast5(contenu,ETIQ_MIN_LN5))) {
              // case of lower case letter sequence
              if (p->buffer[pos+p->current_origin]==' ') {pos2=pos+1;if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' ');}
              //else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);}
              else pos2=pos;
              unichar* mot=mot_token_buffer;
              int position=0;
              if (p->tokenization_policy==CHAR_BY_CHAR_TOKENIZATION ||
                  (pos2+p->current_origin==0 || !is_letter(p->buffer[pos2+p->current_origin-1],p->alphabet))) {
                 while (pos2+p->current_origin<p->text_buffer->size && is_lower(p->buffer[pos2+p->current_origin],p->alphabet)) {
                    mot[position++]=p->buffer[(pos2++)+p->current_origin];
                 }
                 mot[position]='\0';
                 if (position!=0 && !is_letter(p->buffer[pos2+p->current_origin],p->alphabet)) {
                   // we proceed only if we have read a lower case letter sequence
                   // which is not followed by an upper case letter
                   // in both modes MERGE and REPLACE, we process the transduction if any
                   traiter_transduction(p,etiq->output);
                   if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
                     // if we are in MERGE mode, we add to ouput the char we have read
                     push_input_string(p->stack,mot,0);
                   }
                   scan_graph(n_graph,t->state_number,pos2,depth,liste_arrivee,mot_token_buffer,p);
                 }
              }
         }
         else if ((contenu_len_possible_match==5) && (!u_trymatch_superfast5(contenu,ETIQ_PRE_LN5))) {
              // case of a sequence beginning by an upper case letter
              if (p->buffer[pos+p->current_origin]==' ') {pos2=pos+1;if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' ');}
              //else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);}
              else pos2=pos;
              unichar* mot=mot_token_buffer;
              int position=0;
              if (p->tokenization_policy==CHAR_BY_CHAR_TOKENIZATION ||
                  (is_upper(p->buffer[pos2+p->current_origin],p->alphabet) && (pos2+p->current_origin==0 || !is_letter(p->buffer[pos2+p->current_origin-1],p->alphabet)))) {
                 while (pos2+p->current_origin<p->text_buffer->size && is_letter(p->buffer[pos2+p->current_origin],p->alphabet)) {
                    mot[position++]=p->buffer[(pos2++)+p->current_origin];
                 }
                 mot[position]='\0';
                 if (position!=0 && !is_letter(p->buffer[pos2+p->current_origin],p->alphabet)) {
                   // we proceed only if we have read a letter sequence
                   // which is not followed by a letter
                   // in both modes MERGE and REPLACE, we process the transduction if any
                   traiter_transduction(p,etiq->output);
                   if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
                     // if we are in MERGE mode, we add to ouput the char we have read
                     push_input_string(p->stack,mot,0);
                   }
                   scan_graph(n_graph,t->state_number,pos2,depth,liste_arrivee,mot_token_buffer,p);
                 }
              }
         }
         else if ((contenu_len_possible_match==5) && (!u_trymatch_superfast5(contenu,ETIQ_PNC_LN5))) {
              // case of a punctuation sequence
              if (p->buffer[pos+p->current_origin]==' ') {pos2=pos+1;if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' ');}
              //else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);}
              else pos2=pos;
              unichar C=p->buffer[pos2+p->current_origin];
              if (C==';' || C=='!' || C=='?' ||
                  C==':' ||  C==0xbf ||
                  C==0xa1 || C==0x0e4f || C==0x0e5a ||
                  C==0x0e5b || C==0x3001 || C==0x3002 ||
                  C==0x30fb) {
                 // in both modes MERGE and REPLACE, we process the transduction if any
                 traiter_transduction(p,etiq->output);
                 if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
                    // if we are in MERGE mode, we add to ouput the char we have read
                    push(p->stack,C);
                 }
                 scan_graph(n_graph,t->state_number,pos2+1,depth,liste_arrivee,mot_token_buffer,p);
              }
              else {
                   // we consider the case of ...
                   // BUG: if ... appears at the end of the buffer
                   if (C=='.') {
                      if ((pos2+p->current_origin+2)<p->text_buffer->size && p->buffer[pos2+p->current_origin+1]=='.' && p->buffer[pos2+p->current_origin+2]=='.') {
                         traiter_transduction(p,etiq->output);
                         if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
                            // if we are in MERGE mode, we add to ouput the ... we have read
                            push(p->stack,C);push(p->stack,C);push(p->stack,C);
                         }
                         scan_graph(n_graph,t->state_number,pos2+3,depth,liste_arrivee,mot_token_buffer,p);
                      } else {
                        // we consider the . as a normal punctuation sign
                        traiter_transduction(p,etiq->output);
                        if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
                          // if we are in MERGE mode, we add to ouput the char we have read
                          push(p->stack,C);
                        }
                        scan_graph(n_graph,t->state_number,pos2+1,depth,liste_arrivee,mot_token_buffer,p);
                      }
                   }
              }
         }
         else if ((contenu_len_possible_match==3) && (!u_trymatch_superfast3(contenu,ETIQ_E_LN3))) {
              // case of an empty sequence
              // in both modes MERGE and REPLACE, we process the transduction if any
              traiter_transduction(p,etiq->output);
              scan_graph(n_graph,t->state_number,pos,depth,liste_arrivee,mot_token_buffer,p);
         }
         else if ((contenu_len_possible_match==3) && (!u_trymatch_superfast3(contenu,ETIQ_CIRC_LN3))) {
              // case of a new line sequence
              if (p->buffer[pos+p->current_origin]=='\n') {
                 // in both modes MERGE and REPLACE, we process the transduction if any
                 traiter_transduction(p,etiq->output);
                 if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
                    // if we are in MERGE mode, we add to ouput the char we have read
                    push(p->stack,'\n');
                 }
                 scan_graph(n_graph,t->state_number,pos+1,depth,liste_arrivee,mot_token_buffer,p);
              }
         }
         else if ((contenu_len_possible_match==1) && (!u_trymatch_superfast1(contenu,'#')) && (!(etiq->control&RESPECT_CASE_TAG_BIT_MASK))) {
              // case of a no space condition
              if (p->buffer[pos+p->current_origin]!=' ') {
                // in both modes MERGE and REPLACE, we process the transduction if any
                traiter_transduction(p,etiq->output);
                scan_graph(n_graph,t->state_number,pos,depth,liste_arrivee,mot_token_buffer,p);
              }
         }
         else if ((contenu_len_possible_match==1) && (!u_trymatch_superfast1(contenu,' '))) {
         // case of an obligatory space
              if (p->buffer[pos+p->current_origin]==' ') {
                // in both modes MERGE and REPLACE, we process the transduction if any
                traiter_transduction(p,etiq->output);
                 if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
                    // if we are in MERGE mode, we add to ouput the char we have read
                    push(p->stack,' ');
                 }
                scan_graph(n_graph,t->state_number,pos+1,depth,liste_arrivee,mot_token_buffer,p);
              }
         }
         else if ((contenu_len_possible_match==3) && (!u_trymatch_superfast5(contenu,ETIQ_L_LN3))) {
              // case of a single letter
              if (p->buffer[pos+p->current_origin]==' ') {pos2=pos+1;if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' ');}
              //else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);}
              else pos2=pos;
              if (is_letter(p->buffer[pos2+p->current_origin],p->alphabet)) {
                // in both modes MERGE and REPLACE, we process the transduction if any
                traiter_transduction(p,etiq->output);
                 if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
                    // if we are in MERGE mode, we add to ouput the char we have read
                    push(p->stack,p->buffer[pos2+p->current_origin]);
                 }
                scan_graph(n_graph,t->state_number,pos2+1,depth,liste_arrivee,mot_token_buffer,p);
              }
         }
         else {
              // case of a normal letter sequence
              if (p->buffer[pos+p->current_origin]==' ') {pos2=pos+1;if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' ');}
              //else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);}
              else pos2=pos;
              if (etiq->control&RESPECT_CASE_TAG_BIT_MASK) {
                 // case of exact case match
                 int position=0;
                 while (pos2+p->current_origin<p->text_buffer->size && p->buffer[pos2+p->current_origin]==contenu[position]) {
                   pos2++; position++;
                 }
                 if (contenu[position]=='\0' && position!=0 &&
                     !(is_letter(contenu[position-1],p->alphabet) && is_letter(p->buffer[pos2+p->current_origin],p->alphabet))) {
                   // we proceed only if we have exactly read the contenu sequence
                   // in both modes MERGE and REPLACE, we process the transduction if any
                   traiter_transduction(p,etiq->output);
                   if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
                     // if we are in MERGE mode, we add to ouput the char we have read
                     push_input_string(p->stack,contenu,0);
                   }
                   scan_graph(n_graph,t->state_number,pos2,depth,liste_arrivee,mot_token_buffer,p);
                 }
              }
              else {
                 // case of variable case match
                 // the letter sequences may have been caught by the arbre_etiquette structure
                 int position=0;
                 unichar* mot=mot_token_buffer;
                 while (pos2+p->current_origin<p->text_buffer->size && is_equal_or_uppercase(contenu[position],p->buffer[pos2+p->current_origin],p->alphabet)) {
                   mot[position++]=p->buffer[(pos2++)+p->current_origin];
                 }
                 mot[position]='\0';
                 if (contenu[position]=='\0' && position!=0 &&
                     !(is_letter(contenu[position-1],p->alphabet) && is_letter(p->buffer[pos2+p->current_origin],p->alphabet))) {
                   // we proceed only if we have exactly read the contenu sequence
                   // in both modes MERGE and REPLACE, we process the transduction if any
                   traiter_transduction(p,etiq->output);
                   if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) {
                     // if we are in MERGE mode, we add to ouput the char we have read
                     push_input_string(p->stack,mot,0);
                   }
                   scan_graph(n_graph,t->state_number,pos2,depth,liste_arrivee,mot_token_buffer,p);
                 }
              }
         }
      }
      t=t->next;
}
}