void print_args(args_t* args) { int i, j; printf("\nSTATES: %d/%d\n", args->num_fstates, args->num_states); for (i = 0; i < args->num_states; i++) { if (is_final_state(args, i)) { printf("(%s), ", args->states[i]); } else { printf("%s , ", args->states[i]); } } printf("\b\b \n"); printf("\nSYMBOLS: %d\n", args->num_symbols); for (i = 0; i < args->num_symbols; i++) { printf("%c , ", args->symbols[i]); } printf("\b\b \n"); printf("\nTRANSITIONS:\n"); for (i = 0; i < args->num_states; i++) { for (j = 0; j < args->num_symbols; j++) { if (args->transitions[args->num_states * i + j] >= 0) { printf("%03d ", args->transitions[args->num_states * i + j]); } else { printf("--- "); } } printf("\n"); } printf("\n"); }
/** * We create a copy of the given graph using the following rules if full_simplification is not null: * - <E> transitions and graph calls are kept * - all right contexts are ignored, replaced by an epsilon transition * - all tags that don't match anything in the text (like $* $< and $>) are kept, * because they can be involved into a E loop. We also add a real E transition. * - all other transitions that matches something from the text are removed * * As a consequence, the resulting graph is only made of real E transitions, * pseudo-E transitions, and graph calls and we can use it as follows: * - if no final state is accessible, it means that the graph cannot match E * - if the initial state is final, it means that the graph match E * - otherwise, we don't know yet * * * If full_simplification is null, we have to create a condition graph suitable for * E loop and left recursion detection. For that purpose, we keep the graph as is, * with only one modification: adding an E transition to skip right contexts. But still, * we keep the context, because we also have to look at it for E loops and left recursions. * */ static SingleGraph create_condition_graph(Fst2* fst2,int graph,int full_simplification) { SingleGraph g=new_SingleGraph(INT_TAGS); int initial_state=fst2->initial_states[graph]; int n_states=fst2->number_of_states_per_graphs[graph]; for (int i=initial_state;i<initial_state+n_states;i++) { SingleGraphState dst=add_state(g); Fst2State src=fst2->states[i]; if (is_initial_state(src)) { set_initial_state(dst); } if (is_final_state(src)) { set_final_state(dst); } Transition* t=src->transitions; while (t!=NULL) { if (full_simplification) { deal_with_transition_v1(fst2,t,dst,initial_state); } else { deal_with_transition_v2(fst2,t,dst,initial_state); } t=t->next; } } clean_condition_graph(g); return g; }
/** * Replaces the given automaton by its complement one. */ void elag_complementation(language_t* language,SingleGraph A) { int sink_state_index=A->number_of_states; SingleGraphState sink_state=add_state(A); /* The sink state is not final (because finalities will be reversed * below), and its default transition loops back on itself */ sink_state->default_state=sink_state_index; for (int q=0;q<A->number_of_states;q++) { /* We reverse the finality of each state */ if (is_final_state(A->states[q])) { unset_final_state(A->states[q]); } else { set_final_state(A->states[q]); } if (A->states[q]->default_state==-1) { /* If there is no default transition, we create one that is * tagged by anything but the non default ones */ symbol_t* s=LEXIC_minus_transitions(language,A->states[q]->outgoing_transitions); if (s!=NULL) { add_all_outgoing_transitions(A->states[q],s,sink_state_index); /* We have added a single transition tagged by a symbol list. Now * we replace it by a list of transitions, each one of them * tagged with a single symbol */ flatten_transition(A->states[q]->outgoing_transitions); /* Important to use free_symbols and not free_symbol, because * s represents a symbol list */ free_symbols(s); } } } }
/** * A debug printing of a SingleGraph. */ void print_graph(SingleGraph g) { u_printf("--------------------------------\n"); if (g==NULL) { u_printf("NULL graph\n"); u_printf("--------------------------------\n"); return; } for (int i=0;i<g->number_of_states;i++) { SingleGraphState s=g->states[i]; if (is_initial_state(s)) { u_printf("-> "); } else {u_printf(" ");} if (is_final_state(s)) { u_printf("%d t ",i); } else { u_printf("%d : ",i); } u_printf("(def %d) \n\t",s->default_state); Transition* t=s->outgoing_transitions; while (t!=NULL) { symbols_dump((symbol_t*)t->label); u_printf(",%d ",t->state_number); t=t->next; } u_printf("\n\n"); } u_printf("--------------------------------\n"); }
// simulate NTM until it halts or nr_steps steps are reached void NTM::simulate(uint max_nr_steps, bool verbose) { uint c_symbol; uint c_state; while(nr_steps <= max_nr_steps) { list<TM_State> new_tm_states; if ( verbose ) { cout << "Nr steps: " << nr_steps << endl; cout << "There are " << tm_states.size() << " states" << endl; } for (list<TM_State>::const_iterator it = tm_states.begin(); it != tm_states.end(); ++it) { c_symbol = it->tape.read(); c_state = it->state; if ( !is_final_state(c_state) ) { for(list<Action>::const_iterator act_it = trans_actions[c_state][c_symbol].begin(); act_it != trans_actions[c_state][c_symbol].end(); ++act_it) { TM_State tms(*it); tms.state = act_it->state; tms.tape.write(act_it->symbol); tms.tape.move(act_it->move); new_tm_states.push_front(tms); // if(verbose) { // print(tms); // } } } } nr_steps++; swap(tm_states, new_tm_states); } cout << "Done simulating!" << endl; }
// Ex. 14 automaton* read_automaton(FILE* fptr) { int state_c; int symbol_c; int initial_state; int final_state_c; int* final_states; s_table_element* symbol_table; state* state_table; char line[MAX_LEN+1]; if(fseek(fptr, 0, SEEK_SET)) { puts("Couldn't seek the start of the file!"); return NULL; } // Reading the number of states, number of symbols, and initial state fgets(line, (int)MAX_LEN, fptr); sscanf(line, "%d %d %d", &state_c, &symbol_c, &initial_state); // Constructing the table of final states final_states = read_final_states(fptr, &final_state_c); // Constructing the symbol table symbol_table = read_symbols(symbol_c, fptr); // Constructing the state table state_table = malloc(sizeof(state)*state_c); if(state_table == NULL) { puts("Couldn't allocate the state table"); return NULL; } int type; for(int i = 0; i < state_c; ++i) { if((i+1) == initial_state) type = 1; else if(is_final_state(final_states, final_state_c, i+1)) type = -1; else type = 0; state_table[i] = new_state(i+1, type, NULL); } while(read_transition(state_table, fptr)); automaton* ret = malloc(sizeof(automaton)); if(ret == NULL) return NULL; ret->initial_state = initial_state; ret->states = state_c; ret->symbol_c = symbol_c; ret->state_table = state_table; ret->symbol_table = symbol_table; ret->final_states = final_states; ret->final_state_c = final_state_c; return ret; }
/** * This function concatenates B at the end of A. A is modified. */ void elag_concat(language_t* language,SingleGraph A,SingleGraph B) { int oldnb=A->number_of_states; int* renumber=(int*)malloc(B->number_of_states*sizeof(int)); if (renumber==NULL) { fatal_alloc_error("elag_concat"); } int q; /* We copy the states of B into A */ for (q=0;q<B->number_of_states;q++) { renumber[q]=A->number_of_states; add_state(A); } for (q=0;q<B->number_of_states;q++) { A->states[renumber[q]]->outgoing_transitions=clone_transition_list(B->states[q]->outgoing_transitions,renumber,dup_symbol); A->states[renumber[q]]->default_state=(B->states[q]->default_state!=-1)?renumber[B->states[q]->default_state]:-1; if (is_final_state(B->states[q])) { set_final_state(A->states[renumber[q]]); } } /* Then, we concatenate A and B. * 1) We replace default transitions that outgo from B's initial states * by explicit transitions */ struct list_int* initials=get_initial_states(B); for (struct list_int* tmp=initials;tmp!=NULL;tmp=tmp->next) { explicit_default_transition(language,A,renumber[tmp->n]); } for (q=0;q<oldnb;q++) { if (is_final_state(A->states[q])) { /* Each final state of A becomes non final. Moreover, we have * to explicit its default transition, because if not, the concatenation * algorithm will modify the recognized language. */ unset_final_state(A->states[q]); explicit_default_transition(language,A,q); for (struct list_int* tmp=initials;tmp!=NULL;tmp=tmp->next) { concat(&(A->states[q]->outgoing_transitions),clone_transition_list(A->states[renumber[tmp->n]]->outgoing_transitions,NULL,dup_symbol)); if (is_final_state(A->states[renumber[tmp->n]])) { set_final_state(A->states[q]); } } } } free(renumber); free_list_int(initials); }
// // this function explores a sub-graph, considering tokens as strings // void explorer_sub_automate_normalization_string(Fst2* automate,int n, struct normalization_tree* noeud_normalization, unichar* output,struct norm_info** TEMP_LIST) { Fst2State etat; etat=automate->states[n]; if (is_final_state(etat)) { // if we are in a final state (*TEMP_LIST)=insert_in_norm_info_list(output,noeud_normalization,(*TEMP_LIST)); } Transition* trans; trans=etat->transitions; unichar tmp[1000]; while (trans!=NULL) { if (trans->tag_number<0) { // case of a sub-graph struct norm_info* TMP=NULL; explorer_sub_automate_normalization_string(automate,automate->initial_states[-(trans->tag_number)],noeud_normalization, output,&TMP); while (TMP!=NULL) { // we continue to explore the current automaton explorer_sub_automate_normalization_string(automate,trans->state_number,TMP->node, TMP->output,TEMP_LIST); struct norm_info* z=TMP; TMP=TMP->next; free_norm_info(z); } } else { // normal transition Fst2Tag etiq; etiq=automate->tags[trans->tag_number]; u_strcpy(tmp,output); u_strcat(tmp," "); if (etiq->output!=NULL && u_strcmp(etiq->output,"") && u_strcmp(etiq->output,"<E>") && !only_spaces(etiq->output)) { // we append the output if it exists and is not epsilon u_strcat(tmp,etiq->output); } struct normalization_tree_transition* trans_norm; trans_norm=get_trans_arbre_normalization_string(etiq->input,noeud_normalization->trans); if (trans_norm==NULL) { // if the transition does not exist in the tree, we create it trans_norm=new_trans_arbre_normalization_string(etiq->input); // we also create the destination node trans_norm->node=new_normalization_tree(); trans_norm->next=noeud_normalization->trans; noeud_normalization->trans=trans_norm; } explorer_sub_automate_normalization_string(automate,trans->state_number,trans_norm->node, tmp,TEMP_LIST); } trans=trans->next; } }
/** * Allocates, initializes and returns an array that associates * a color (0 or 1) to each state of 'A', making sure that the * state #0 will be colored with 0. '*nbColors' will be set to * the number of colors that have been used (1 if all states * have the same finality; 2 otherwise). */ int* init_colors(SingleGraph A,int *nbColors) { int* color=(int*)calloc(A->number_of_states,sizeof(int)); if (color==NULL) { fatal_alloc_error("init_colors"); } /* bicolor will indicate if all states are of the same color (finality) or * not */ bool bicolor=false; if (is_final_state(A->states[0])) { /* We distinguish two cases (initial state final or not), just * to ensure that the color of the initial state #0 will be 0 */ for (int e=0;e<A->number_of_states;e++) { color[e]=is_final_state(A->states[e])?0:(bicolor=true,1); } } else { for (int e=0;e<A->number_of_states;e++) { color[e]=is_final_state(A->states[e])?(bicolor=true,1):0; } } (*nbColors)=(bicolor?2:1); return color; }
/** * An Elag constraints is of the form: <=> .... <=> .... <=> * This function looks for states that are pointed to by middle '<=>' transitions. * It places their numbers into 'constraints' and it returns the size of this * array, i.e. the number of constraints expressed by this Elag rule. */ int count_constraints(Fst2Automaton* aut,int* constraints) { int source=0; int e; Transition* t; int c; int nbConstraints=0; symbol_t* symbol; SingleGraph automaton=aut->automaton; for (t=automaton->states[0]->outgoing_transitions;t!=NULL && source==0;t=t->next) { symbol=t->label; if (symbol->type==S_EQUAL) { if (t->state_number==0) { fatal_error("Illegal cycle in grammar\n"); } source=t->tag_number; } } if (source==0) { /* If there are no contraints */ return 0; } /* We look for '<=>' transitions, but only from the state 1, because * from state 0, we would take into account all the '<=>' that begin rules. */ for (e=1;e<automaton->number_of_states;e++) { for (t=automaton->states[e]->outgoing_transitions;t!=NULL;t=t->next) { symbol=t->label; if (t->state_number!=source && symbol->type==S_EQUAL && !is_final_state(automaton->states[t->state_number])) { /* We don't take into account '<=>' transitions that go to final states because * they are not middle '<=>' transitions. */ for (c=0;c<nbConstraints;c++) { if (constraints[c]==t->state_number) { /* We stop if the constraint is already in the list */ break; } } if (c==nbConstraints) { if (++nbConstraints>=ELAG_MAX_CONSTRAINTS) { fatal_error("Too many constraints with same condition\n"); } constraints[c]=t->state_number; } } } } if (nbConstraints==0) { fatal_error("Middle delimitor '<=>' not found\n"); } return nbConstraints; }
static E_MATCHING_STATUS get_status(SingleGraph g) { if (g->number_of_states==0) { return CHK_DOES_NOT_MATCH_E; } int i=get_initial_state(g); if (i<0) { fatal_error("Internal error in get_status: invalid negative initial state %d\n",i); } SingleGraphState s=g->states[i]; if (is_final_state(s)) { /* If the initial state is final, it means that we can reach it without matching * anything in the text */ return CHK_MATCHES_E; } return CHK_DONT_KNOW; }
/** * Creates a SingleGraph copy of the given .fst2 subgraph, using * the same tag numeration. */ SingleGraph create_copy_of_fst2_subgraph(Fst2* fst2,int n) { int n_states=fst2->number_of_states_per_graphs[n]; SingleGraph g=new_SingleGraph(n_states,INT_TAGS); int shift=fst2->initial_states[n]; for (int i=0;i<n_states;i++) { SingleGraphState dest=add_state(g); Fst2State src=fst2->states[i+shift]; if (is_initial_state(src)) { set_initial_state(dest); } if (is_final_state(src)) { set_final_state(dest); } Transition* t=src->transitions; while (t!=NULL) { add_outgoing_transition(dest,t->tag_number,t->state_number); t=t->next; } } return g; }
/** * Returns 1 if we can match <E> from the current state, with or without * conditions; 0 otherwise. */ int graph_matches_E(int initial_state,int current_state,const Fst2State* states,Fst2Tag* tags, int current_graph,unichar** graph_names, ConditionList conditions_for_states[], ConditionList *graph_conditions) { Transition* l; Fst2State s; int ret_value=0; int ret; *graph_conditions=NULL; s=states[current_state]; if (is_final_state(s)) { /* If we are arrived in a final state, then the graph matches <E> */ set_bit_mask(&(s->control),UNCONDITIONAL_E_MATCH); return 1; } if (is_bit_mask_set(s->control,TMP_LOOP_MARK)) { /* If we have a loop, we do nothing, because they will be * dealt with later. */ return 0; } if (is_bit_mask_set(s->control,VISITED_MARK)) { /* If we are in state that has already been visited */ if (is_bit_mask_set(s->control,UNCONDITIONAL_E_MATCH)) { /* If this state can match <E> without conditions, then we have finished */ return 1; } if (is_bit_mask_set(s->control,CONDITIONAL_E_MATCH)) { /* If this state can match <E> with conditions, then we have finished, but * we copy the necessary conditions in 'graph_conditions'. */ *graph_conditions=clone_ConditionList(conditions_for_states[current_state-initial_state]); return 1; } /* If the state has been visited and if it does not match <E>, then we return OK */ return 0; } set_bit_mask(&(s->control),VISITED_MARK); set_bit_mask(&(s->control),TMP_LOOP_MARK); l=s->transitions; /* We look all the outgoing transitions */ while (l!=NULL) { if (l->tag_number<0) { /* If we have a subgraph, we test if it matches <E> */ *graph_conditions=NULL; ret=graph_matches_E(initial_state,l->state_number,states,tags,current_graph,graph_names,conditions_for_states,graph_conditions); if (ret==1) { /* If the subgraph matches <E>, we say that the current state matches * <E>, modulo the conditions to be verified */ set_bit_mask(&(s->control),CONDITIONAL_E_MATCH); /* We insert the new condition in first position... */ insert_graph_in_conditions(-(l->tag_number),graph_conditions); /* ...and we merge the new conditions with the existing ones for this state */ merge_condition_lists(&conditions_for_states[current_state-initial_state],*graph_conditions); *graph_conditions=NULL; } ret_value=ret_value|ret; } else if (tags[l->tag_number]->control&1) { /* If we have an <E> transition, we explore the rest of the graph from it */ *graph_conditions=NULL; ret=graph_matches_E(initial_state,l->state_number,states,tags,current_graph,graph_names,conditions_for_states,graph_conditions); if (ret==1) { /* If we can match <E> from the <E>-transition's destination state, then * we can match it from the current state. */ if (*graph_conditions==NULL) { /* If there is no condition */ set_bit_mask(&(s->control),UNCONDITIONAL_E_MATCH); } else { /* Otherwise, we add the condition to the existing ones */ set_bit_mask(&(s->control),CONDITIONAL_E_MATCH); merge_condition_lists(&conditions_for_states[current_state-initial_state],*graph_conditions); *graph_conditions=NULL; } } ret_value=ret_value|ret; } l=l->next; } unset_bit_mask(&(s->control),TMP_LOOP_MARK); *graph_conditions=clone_ConditionList(conditions_for_states[current_state-initial_state]); return ret_value; }
/** * This function analyzes the given Elag rule automaton to find * where the rule and constraint parts are. As a side effect, it builds * a fst2 grammar ("foo.fst2" => "foo-conc.fst2") that can be used by * the Locate program to match the <!> .... <!> .... <!> part of the rule. */ void split_elag_rule(elRule* rule, const VersatileEncodingConfig* vec,language_t* language) { int c; /* This array contains the numbers of the states that are pointed to by * middle '<=>' of the constraints */ int constraints[ELAG_MAX_CONSTRAINTS]; int nbConstraints=count_constraints(rule->automaton,constraints); /* +1 because we have to count the <!> .... <!> .... <!> part of the rule */ rule->nbContexts=nbConstraints+1; rule->contexts=(elContext*)malloc(rule->nbContexts*sizeof(elContext)); if (rule->contexts==NULL) { fatal_alloc_error("split_elag_rule"); } for (c=0;c<rule->nbContexts;c++) { rule->contexts[c].left=NULL; rule->contexts[c].right=NULL; } int endR1=ELAG_UNDEFINED; int endR2=ELAG_UNDEFINED; int endC2=ELAG_UNDEFINED; for (Transition* t=rule->automaton->automaton->states[0]->outgoing_transitions;t!=NULL;t=t->next) { symbol_t* symbol=t->label; switch (symbol->type) { /* We split the unique <!> .... <!> .... <!> part */ case S_EXCLAM: if (rule->contexts[0].left!=NULL) { fatal_error("Too much '<!>' tags\n",rule->name); } rule->contexts[0].left=new_SingleGraph(PTR_TAGS); /* We look for the end of the first part of the rule */ endR1=get_sub_automaton(rule->automaton->automaton,rule->contexts[0].left,t->state_number,0,S_EXCLAM); rule->contexts[0].right=new_SingleGraph(PTR_TAGS); endR2=get_sub_automaton(rule->automaton->automaton,rule->contexts[0].right,endR1,0,S_EXCLAM); if (endR1==ELAG_UNDEFINED || endR2==ELAG_UNDEFINED || !is_final_state(rule->automaton->automaton->states[endR2])) { fatal_error("split_elag_rule: %s: parse error in <!> part\n",rule->name); } break; /* We split the nbConstraints <=> .... <=> .... <=> parts */ case S_EQUAL: if (rule->contexts[1].left!=NULL) { fatal_error("Non deterministic .fst2 file\n"); } for (c=0;c<nbConstraints;c++) { rule->contexts[c+1].left=new_SingleGraph(PTR_TAGS); get_sub_automaton(rule->automaton->automaton,rule->contexts[c+1].left,t->state_number,1,constraints[c]); rule->contexts[c+1].right=new_SingleGraph(PTR_TAGS); endC2=get_sub_automaton(rule->automaton->automaton,rule->contexts[c+1].right,constraints[c],0,S_EQUAL); if (endC2==ELAG_UNDEFINED || !is_final_state(rule->automaton->automaton->states[endC2])) { fatal_error("split_elag_rule: %s: parse error in <=> part\n",rule->name); } } break; default: fatal_error("Left delimitor '<!>' or '<=>' missing\n"); } } if (rule->contexts[0].left==NULL) { fatal_error("In grammar '%s': symbol '<!>' not found.\n",rule->name); } char buf[FILENAME_MAX]; remove_extension(rule->name,buf); strcat(buf,"-conc.fst2"); /* We create the.fst2 to be used by Locate */ Fst2Automaton* locate=make_locate_automaton(rule,language); save_automaton(locate,buf,vec,FST_LOCATE); free_Fst2Automaton(locate,free_symbol); }
void make_with_func(args_t* args, char* outfile){ int i, j, count; if (get_args("test.auto", args)) { } else { printf("Falha ao obter args\n"); } //cria arquivo FILE * arq = fopen(outfile, "w"); //insere includes fprintf(arq, "#include <stdio.h>\n"); fprintf(arq, "#include <stdlib.h>\n"); fprintf(arq, "#define TAM 10\n"); fprintf(arq, "\n"); //variaveis globais fprintf(arq, "char v[TAM];\n"); fprintf(arq, "int p = 0;\n"); fprintf(arq, "\n"); //protitipos das funcões for(i = 0; i < args->num_states; i++){ fprintf(arq, "void e%d();\n", i); } fprintf(arq, "void sucesso();\n"); fprintf(arq, "void erro();\n"); fprintf(arq, "\n"); //main fprintf(arq, "int main(void) {\n"); fprintf(arq, " p = 0;\n"); fprintf(arq, " printf(\"Sequencia:\"); \n"); fprintf(arq, " fgets(v, TAM, stdin);\n"); fprintf(arq, " e0();\n"); fprintf(arq, " return(0);\n"); fprintf(arq, "}\n"); fprintf(arq, "\n"); //funcoes dos estados for (i = 0; i < args->num_states; i++) { fprintf(arq, "\nvoid %s(){\n", args->states[i]); count = 0; for (j = 0; j < args->num_symbols; j++) { if (args.transitions[i + j * args->num_states] != -1) { if (count > 0){ fprintf(arq, " else \n"); fprintf(arq, " if(v[p] == '%c'){ \n", args->symbols[j]); } else{ fprintf(arq, " if(v[p] == '%c'){ \n", args->symbols[j]); } fprintf(arq, " p++;\n"); fprintf(arq, " e%d();\n", args->transitions[i + j * args->num_states]); fprintf(arq, " }"); count++; } } if (count > 0) { fprintf(arq, " else {\n"); } if (is_final_state(&args, i)){ fprintf(arq, " sucesso();\n"); fprintf(arq, " }\n"); fprintf(arq, "}\n"); } else { fprintf(arq, " erro();\n"); fprintf(arq, " }\n"); fprintf(arq, "}\n"); } } fprintf(arq, "\n"); //cria funcao sucesso fprintf(arq, "void sucesso(){\n"); fprintf(arq, " printf(\" Sucesso!\");\n"); fprintf(arq, "}\n"); fprintf(arq, "\n"); //cria funcao erro fprintf(arq, "void erro(){\n"); fprintf(arq, " printf(\" Erro!\");\n"); fprintf(arq, "}\n"); fprintf(arq, "\n"); //fecha arquivo fclose(arq); }
/** * Saves the given automaton into the given .fst2 file. */ void fst_file_write(Elag_fst_file_out* fstf,const Fst2Automaton* A) { Ustring* tag=new_Ustring(); void (*symbol_to_tag)(const symbol_t*,Ustring*)=NULL; switch (fstf->type) { case FST_TEXT: symbol_to_tag=symbol_to_text_label; break; case FST_GRAMMAR: symbol_to_tag=symbol_to_grammar_label; break; case FST_LOCATE: symbol_to_tag=symbol_to_locate_label; break; default: fatal_error("fst_file_write: invalid fstf->type: %d\n",fstf->type); } /* We save the graph number and name */ u_fprintf(fstf->f,"-%d %S\n",fstf->nb_automata+1,A->name); int index; unichar deflabel[]={'<','d','e','f','>',0}; for (int q=0;q<A->automaton->number_of_states;q++) { SingleGraphState state=A->automaton->states[q]; u_fprintf(fstf->f,"%C ",is_final_state(state)?'t':':'); for (Transition* t=state->outgoing_transitions;t!=NULL;t=t->next) { if (t->tag_number==-1) { /* If we are in the case of an "EMPTY" transition created because * the automaton was emptied as trim time */ u_strcpy(tag,"EMPTY"); } else { symbol_t* symbol=t->label; symbol_to_tag(symbol,tag); } if (fstf->type==FST_LOCATE) { /* If we are saving a Locate .fst2, we have to perform * some special things */ if (u_strcmp(tag->str, "<PNC>") == 0) { PNC_trans_write(fstf, t->state_number); } else if (u_strcmp(tag->str, "<CHFA>") == 0 || u_strcmp(tag->str, "<NB>") == 0) { CHFA_trans_write(fstf, t->state_number); } else if (u_strcmp(tag->str, "<.>") == 0) { LEXIC_trans_write(fstf, t->state_number); } else { goto normal_output; } } else { /* If we have a normal transition to print */ normal_output: index=get_value_index(tag->str,fstf->labels); u_fprintf(fstf->f,"%d %d ",index,t->state_number); } } if (state->default_state!=-1) { if (fstf->type!=FST_GRAMMAR) { error("Unexpected <def> label in text/locate automaton\n"); } index=get_value_index(deflabel,fstf->labels); u_fprintf(fstf->f,"%d %d ",index,state->default_state); } u_fputc('\n',fstf->f); } u_fprintf(fstf->f,"f \n"); free_Ustring(tag); fstf->nb_automata++; }
/** * This function explore the normalization grammar to construct * the normalization tree. If the 'list' parameter is NULL, then we * are in the main call to the main graph; otherwise, we are within * a subgraph. */ void explore_normalization_fst2(Fst2* fst2,int current_state, struct normalization_tree* node, struct string_hash* tokens,const unichar* output, const Alphabet* alph,struct norm_info** list) { Fst2State state=fst2->states[current_state]; if (is_final_state(state)) { /* If we are in a final state, we behave differently if we are in a subgraph * or in the main call to the main graph. */ if (list!=NULL) { (*list)=insert_in_norm_info_list(output,node,(*list)); } else { node->outputs=sorted_insert(output,node->outputs); } } Transition* trans=state->transitions; unichar tmp[1024]; while (trans!=NULL) { if (trans->tag_number<0) { /* Case of a subgraph call */ struct norm_info* tmp_list=NULL; explore_normalization_fst2(fst2,fst2->initial_states[-(trans->tag_number)],node, tokens,output,alph,&tmp_list); while (tmp_list!=NULL) { /* We continue to explore the current graph */ explore_normalization_fst2(fst2,trans->state_number,tmp_list->node, tokens,tmp_list->output,alph,list); struct norm_info* z=tmp_list; tmp_list=tmp_list->next; free_norm_info(z); } } else { /* If we have a normal transition */ Fst2Tag tag=fst2->tags[trans->tag_number]; u_strcpy(tmp,output); u_strcat(tmp," "); if (tag->output!=NULL && tag->output[0]!='\0' && u_strcmp(tag->output,"<E>") && !only_spaces(tag->output)) { /* We append the output if it exists and is not epsilon */ u_strcat(tmp,tag->output); } if (!u_strcmp(tag->input,"<E>")) { /* If we have an epsilon transition, we go on in the fst2, but * we don't move in the normalization tree */ explore_normalization_fst2(fst2,trans->state_number,node,tokens,tmp,alph,list); } else { /* If we have a normal transition, we explore all the tokens that match it */ struct list_int* l=get_token_list_for_sequence(tag->input,alph,tokens); while (l!=NULL) { /* Then, we add a branch in the normalization tree for * each token. Note that it may introduce combinatory explosions * if the the fst2 matches large sequences */ struct normalization_tree_transition* trans_norm; trans_norm=get_transition(l->n,node->trans); if (trans_norm==NULL) { /* If the transition does not exist in the tree, we create it */ trans_norm=new_normalization_tree_transition(l->n,new_normalization_tree(),node->trans); node->trans=trans_norm; } explore_normalization_fst2(fst2,trans->state_number,trans_norm->node, tokens,tmp,alph,list); struct list_int* L=l; l=l->next; free(L); } } } trans=trans->next; } }
void scan_graph(int n_graph, // number of current graph int e, // number of current state int pos, // int depth, struct parsing_info** liste_arrivee, unichar* mot_token_buffer, struct fst2txt_parameters* p,Abstract_allocator prv_alloc_recycle) { Fst2State etat_courant=p->fst2->states[e]; if (depth > MAX_DEPTH) { error( "\n" "Maximal stack size reached in graph %i!\n" "Recognized more than %i tokens starting from:\n" " ", n_graph, MAX_DEPTH); for (int i=0; i<60; i++) { error("%S",p->buffer[p->current_origin+i]); } error("\nSkipping match at this position, trying from next token!\n"); p->output[0] = '\0'; // clear output p->input_length = 0; // reset taille_entree empty(p->stack); // clear output stack if (liste_arrivee != NULL) { while (*liste_arrivee != NULL) { // free list of subgraph matches struct parsing_info* la_tmp=*liste_arrivee; *liste_arrivee=(*liste_arrivee)->next; la_tmp->next=NULL; // to don't free the next item free_parsing_info(la_tmp, prv_alloc_recycle); } } return; // exit(1); // don't exit, try at next position } depth++; if (is_final_state(etat_courant)) { // if we are in a final state p->stack->stack[p->stack->stack_pointer+1]='\0'; if (n_graph == 0) { // in main graph if (pos>=p->input_length/*sommet>u_strlen(output)*/) { // and if the recognized input is longer than the current one, it replaces it u_strcpy(p->output,p->stack->stack); p->input_length=(pos); } } else { // in a subgraph (*liste_arrivee)=insert_if_absent(pos,-1,-1,(*liste_arrivee),p->stack->stack_pointer+1, p->stack->stack,p->variables,NULL,NULL,-1,-1,NULL,-1, prv_alloc_recycle); } } if (pos+p->current_origin==p->text_buffer->size) { // if we are at the end of the text, we return return; } int SOMMET=p->stack->stack_pointer+1; int pos2; /* If there are some letter sequence transitions like %hello, we process them */ if (p->token_tree[e]->transition_array!=NULL) { if (p->buffer[pos+p->current_origin]==' ') {pos2=pos+1;if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' ');} /* we don't keep this line because of problems occur in sentence tokenizing * if the return sequence is defautly considered as a separator like space else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);} */ else pos2=pos; int position=0; unichar *token=mot_token_buffer; if (p->tokenization_policy==CHAR_BY_CHAR_TOKENIZATION || (is_letter(p->buffer[pos2+p->current_origin],p->alphabet) && (pos2+p->current_origin==0 || !is_letter(p->buffer[pos2+p->current_origin-1],p->alphabet)))) { /* If we are in character by character mode */ while (pos2+p->current_origin<p->text_buffer->size && is_letter(p->buffer[pos2+p->current_origin],p->alphabet)) { token[position++]=p->buffer[(pos2++)+p->current_origin]; if (p->tokenization_policy==CHAR_BY_CHAR_TOKENIZATION) { break; } } token[position]='\0'; if (position!=0 && (p->tokenization_policy==CHAR_BY_CHAR_TOKENIZATION || !(is_letter(token[position-1],p->alphabet) && is_letter(p->buffer[pos2+p->current_origin],p->alphabet)))) { // we proceed only if we have exactly read the contenu sequence // in both modes MERGE and REPLACE, we process the transduction if any int SOMMET2=p->stack->stack_pointer; Transition* RES=get_matching_tags(token,p->token_tree[e],p->alphabet); Transition* TMP; unichar* mot_token_new_recurse_buffer=NULL; if (RES!=NULL) { // we allocate a new mot_token_buffer for the scan_graph recursin because we need preserve current // token=mot_token_buffer mot_token_new_recurse_buffer=(unichar*)malloc(MOT_BUFFER_TOKEN_SIZE*sizeof(unichar)); if (mot_token_new_recurse_buffer==NULL) { fatal_alloc_error("scan_graph"); } } while (RES!=NULL) { p->stack->stack_pointer=SOMMET2; Fst2Tag etiq=p->fst2->tags[RES->tag_number]; traiter_transduction(p,etiq->output); int longueur=u_strlen(etiq->input); unichar C=token[longueur]; token[longueur]='\0'; if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) { // if we are in MERGE mode, we add to ouput the char we have read push_input_string(p->stack,token,0); } token[longueur]=C; scan_graph(n_graph,RES->state_number,pos2-(position-longueur),depth,liste_arrivee,mot_token_new_recurse_buffer,p); TMP=RES; RES=RES->next; free(TMP); } if (mot_token_new_recurse_buffer!=NULL) { free(mot_token_new_recurse_buffer); } } } } Transition* t=etat_courant->transitions; while (t!=NULL) { p->stack->stack_pointer=SOMMET-1; // we process the transition of the current state int n_etiq=t->tag_number; if (n_etiq<0) { // case of a sub-graph struct parsing_info* liste=NULL; unichar* pile_old; p->stack->stack[p->stack->stack_pointer+1]='\0'; pile_old = u_strdup(p->stack->stack); scan_graph((((unsigned)n_etiq)-1),p->fst2->initial_states[-n_etiq],pos,depth,&liste,mot_token_buffer,p); while (liste!=NULL) { p->stack->stack_pointer=liste->stack_pointer-1; u_strcpy(p->stack->stack,liste->stack); scan_graph(n_graph,t->state_number,liste->position,depth,liste_arrivee,mot_token_buffer,p); struct parsing_info* l_tmp=liste; liste=liste->next; l_tmp->next=NULL; // to don't free the next item free_parsing_info(l_tmp, prv_alloc_recycle); } u_strcpy(p->stack->stack,pile_old); free(pile_old); p->stack->stack_pointer=SOMMET-1; } else { // case of a normal tag Fst2Tag etiq=p->fst2->tags[n_etiq]; unichar* contenu=etiq->input; int contenu_len_possible_match=u_len_possible_match(contenu); if (etiq->type==BEGIN_OUTPUT_VAR_TAG) { fatal_error("Unsupported $|XXX( tags in Fst2Txt\n"); } if (etiq->type==END_OUTPUT_VAR_TAG) { fatal_error("Unsupported $|XXX) tags in Fst2Txt\n"); } if (etiq->type==BEGIN_VAR_TAG) { // case of a $a( variable tag //int old; struct transduction_variable* L=get_transduction_variable(p->variables,etiq->variable); if (L==NULL) { fatal_error("Unknown variable: %S\n",etiq->variable); } //old=L->start; if (p->buffer[pos+p->current_origin]==' ' && pos+p->current_origin+1<p->text_buffer->size) { pos2=pos+1; if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' '); } //else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);} else pos2=pos; L->start_in_tokens=pos2; scan_graph(n_graph,t->state_number,pos2,depth,liste_arrivee,mot_token_buffer,p); //L->start=old; } else if (etiq->type==END_VAR_TAG) { // case of a $a) variable tag //int old; struct transduction_variable* L=get_transduction_variable(p->variables,etiq->variable); if (L==NULL) { fatal_error("Unknown variable: %S\n",etiq->variable); } //old=L->end; if (pos>0) L->end_in_tokens=pos-1; else L->end_in_tokens=pos; // BUG: qd changement de buffer, penser au cas start dans ancien buffer et end dans nouveau scan_graph(n_graph,t->state_number,pos,depth,liste_arrivee,mot_token_buffer,p); //L->end=old; } else if ((contenu_len_possible_match==5) && (!u_trymatch_superfast5(contenu,ETIQ_MOT_LN5))) { // case of transition by any sequence of letters if (p->buffer[pos+p->current_origin]==' ' && pos+p->current_origin+1<p->text_buffer->size) { pos2=pos+1; if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' '); } //else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);} else pos2=pos; unichar* mot=mot_token_buffer; int position=0; if (p->tokenization_policy==CHAR_BY_CHAR_TOKENIZATION || ((pos2+p->current_origin)==0 || !is_letter(p->buffer[pos2+p->current_origin-1],p->alphabet))) { while (pos2+p->current_origin<p->text_buffer->size && is_letter(p->buffer[pos2+p->current_origin],p->alphabet)) { mot[position++]=p->buffer[(pos2++)+p->current_origin]; } mot[position]='\0'; if (position!=0) { // we proceed only if we have read a letter sequence // in both modes MERGE and REPLACE, we process the transduction if any traiter_transduction(p,etiq->output); if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) { // if we are in MERGE mode, we add to ouput the char we have read push_output_string(p->stack,mot); } scan_graph(n_graph,t->state_number,pos2,depth,liste_arrivee,mot_token_buffer,p); } } } else if ((contenu_len_possible_match==4) && (!u_trymatch_superfast4(contenu,ETIQ_NB_LN4))) { // case of transition by any sequence of digits if (p->buffer[pos+p->current_origin]==' ') { pos2=pos+1; if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' '); } //else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);} else pos2=pos; unichar* mot=mot_token_buffer; int position=0; while (pos2+p->current_origin<p->text_buffer->size && (p->buffer[pos2+p->current_origin]>='0') && (p->buffer[pos2+p->current_origin]<='9')) { mot[position++]=p->buffer[(pos2++)+p->current_origin]; } mot[position]='\0'; if (position!=0) { // we proceed only if we have read a letter sequence // in both modes MERGE and REPLACE, we process the transduction if any traiter_transduction(p,etiq->output); if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) { // if we are in MERGE mode, we add to ouput the char we have read push_output_string(p->stack,mot); } scan_graph(n_graph,t->state_number,pos2,depth,liste_arrivee,mot_token_buffer,p); } } else if ((contenu_len_possible_match==5) && (!u_trymatch_superfast5(contenu,ETIQ_MAJ_LN5))) { // case of upper case letter sequence if (p->buffer[pos+p->current_origin]==' ') {pos2=pos+1;if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' ');} //else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);} else pos2=pos; unichar* mot=mot_token_buffer; int position=0; if (p->tokenization_policy==CHAR_BY_CHAR_TOKENIZATION || ((pos2+p->current_origin)==0 || !is_letter(p->buffer[pos2+p->current_origin-1],p->alphabet))) { while (pos2+p->current_origin<p->text_buffer->size && is_upper(p->buffer[pos2+p->current_origin],p->alphabet)) { mot[position++]=p->buffer[(pos2++)+p->current_origin]; } mot[position]='\0'; if (position!=0 && !is_letter(p->buffer[pos2+p->current_origin],p->alphabet)) { // we proceed only if we have read an upper case letter sequence // which is not followed by a lower case letter // in both modes MERGE and REPLACE, we process the transduction if any traiter_transduction(p,etiq->output); if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) { // if we are in MERGE mode, we add to ouput the char we have read push_input_string(p->stack,mot,0); } scan_graph(n_graph,t->state_number,pos2,depth,liste_arrivee,mot_token_buffer,p); } } } else if ((contenu_len_possible_match==5) && (!u_trymatch_superfast5(contenu,ETIQ_MIN_LN5))) { // case of lower case letter sequence if (p->buffer[pos+p->current_origin]==' ') {pos2=pos+1;if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' ');} //else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);} else pos2=pos; unichar* mot=mot_token_buffer; int position=0; if (p->tokenization_policy==CHAR_BY_CHAR_TOKENIZATION || (pos2+p->current_origin==0 || !is_letter(p->buffer[pos2+p->current_origin-1],p->alphabet))) { while (pos2+p->current_origin<p->text_buffer->size && is_lower(p->buffer[pos2+p->current_origin],p->alphabet)) { mot[position++]=p->buffer[(pos2++)+p->current_origin]; } mot[position]='\0'; if (position!=0 && !is_letter(p->buffer[pos2+p->current_origin],p->alphabet)) { // we proceed only if we have read a lower case letter sequence // which is not followed by an upper case letter // in both modes MERGE and REPLACE, we process the transduction if any traiter_transduction(p,etiq->output); if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) { // if we are in MERGE mode, we add to ouput the char we have read push_input_string(p->stack,mot,0); } scan_graph(n_graph,t->state_number,pos2,depth,liste_arrivee,mot_token_buffer,p); } } } else if ((contenu_len_possible_match==5) && (!u_trymatch_superfast5(contenu,ETIQ_PRE_LN5))) { // case of a sequence beginning by an upper case letter if (p->buffer[pos+p->current_origin]==' ') {pos2=pos+1;if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' ');} //else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);} else pos2=pos; unichar* mot=mot_token_buffer; int position=0; if (p->tokenization_policy==CHAR_BY_CHAR_TOKENIZATION || (is_upper(p->buffer[pos2+p->current_origin],p->alphabet) && (pos2+p->current_origin==0 || !is_letter(p->buffer[pos2+p->current_origin-1],p->alphabet)))) { while (pos2+p->current_origin<p->text_buffer->size && is_letter(p->buffer[pos2+p->current_origin],p->alphabet)) { mot[position++]=p->buffer[(pos2++)+p->current_origin]; } mot[position]='\0'; if (position!=0 && !is_letter(p->buffer[pos2+p->current_origin],p->alphabet)) { // we proceed only if we have read a letter sequence // which is not followed by a letter // in both modes MERGE and REPLACE, we process the transduction if any traiter_transduction(p,etiq->output); if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) { // if we are in MERGE mode, we add to ouput the char we have read push_input_string(p->stack,mot,0); } scan_graph(n_graph,t->state_number,pos2,depth,liste_arrivee,mot_token_buffer,p); } } } else if ((contenu_len_possible_match==5) && (!u_trymatch_superfast5(contenu,ETIQ_PNC_LN5))) { // case of a punctuation sequence if (p->buffer[pos+p->current_origin]==' ') {pos2=pos+1;if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' ');} //else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);} else pos2=pos; unichar C=p->buffer[pos2+p->current_origin]; if (C==';' || C=='!' || C=='?' || C==':' || C==0xbf || C==0xa1 || C==0x0e4f || C==0x0e5a || C==0x0e5b || C==0x3001 || C==0x3002 || C==0x30fb) { // in both modes MERGE and REPLACE, we process the transduction if any traiter_transduction(p,etiq->output); if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) { // if we are in MERGE mode, we add to ouput the char we have read push(p->stack,C); } scan_graph(n_graph,t->state_number,pos2+1,depth,liste_arrivee,mot_token_buffer,p); } else { // we consider the case of ... // BUG: if ... appears at the end of the buffer if (C=='.') { if ((pos2+p->current_origin+2)<p->text_buffer->size && p->buffer[pos2+p->current_origin+1]=='.' && p->buffer[pos2+p->current_origin+2]=='.') { traiter_transduction(p,etiq->output); if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) { // if we are in MERGE mode, we add to ouput the ... we have read push(p->stack,C);push(p->stack,C);push(p->stack,C); } scan_graph(n_graph,t->state_number,pos2+3,depth,liste_arrivee,mot_token_buffer,p); } else { // we consider the . as a normal punctuation sign traiter_transduction(p,etiq->output); if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) { // if we are in MERGE mode, we add to ouput the char we have read push(p->stack,C); } scan_graph(n_graph,t->state_number,pos2+1,depth,liste_arrivee,mot_token_buffer,p); } } } } else if ((contenu_len_possible_match==3) && (!u_trymatch_superfast3(contenu,ETIQ_E_LN3))) { // case of an empty sequence // in both modes MERGE and REPLACE, we process the transduction if any traiter_transduction(p,etiq->output); scan_graph(n_graph,t->state_number,pos,depth,liste_arrivee,mot_token_buffer,p); } else if ((contenu_len_possible_match==3) && (!u_trymatch_superfast3(contenu,ETIQ_CIRC_LN3))) { // case of a new line sequence if (p->buffer[pos+p->current_origin]=='\n') { // in both modes MERGE and REPLACE, we process the transduction if any traiter_transduction(p,etiq->output); if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) { // if we are in MERGE mode, we add to ouput the char we have read push(p->stack,'\n'); } scan_graph(n_graph,t->state_number,pos+1,depth,liste_arrivee,mot_token_buffer,p); } } else if ((contenu_len_possible_match==1) && (!u_trymatch_superfast1(contenu,'#')) && (!(etiq->control&RESPECT_CASE_TAG_BIT_MASK))) { // case of a no space condition if (p->buffer[pos+p->current_origin]!=' ') { // in both modes MERGE and REPLACE, we process the transduction if any traiter_transduction(p,etiq->output); scan_graph(n_graph,t->state_number,pos,depth,liste_arrivee,mot_token_buffer,p); } } else if ((contenu_len_possible_match==1) && (!u_trymatch_superfast1(contenu,' '))) { // case of an obligatory space if (p->buffer[pos+p->current_origin]==' ') { // in both modes MERGE and REPLACE, we process the transduction if any traiter_transduction(p,etiq->output); if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) { // if we are in MERGE mode, we add to ouput the char we have read push(p->stack,' '); } scan_graph(n_graph,t->state_number,pos+1,depth,liste_arrivee,mot_token_buffer,p); } } else if ((contenu_len_possible_match==3) && (!u_trymatch_superfast5(contenu,ETIQ_L_LN3))) { // case of a single letter if (p->buffer[pos+p->current_origin]==' ') {pos2=pos+1;if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' ');} //else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);} else pos2=pos; if (is_letter(p->buffer[pos2+p->current_origin],p->alphabet)) { // in both modes MERGE and REPLACE, we process the transduction if any traiter_transduction(p,etiq->output); if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) { // if we are in MERGE mode, we add to ouput the char we have read push(p->stack,p->buffer[pos2+p->current_origin]); } scan_graph(n_graph,t->state_number,pos2+1,depth,liste_arrivee,mot_token_buffer,p); } } else { // case of a normal letter sequence if (p->buffer[pos+p->current_origin]==' ') {pos2=pos+1;if (p->output_policy==MERGE_OUTPUTS) push(p->stack,' ');} //else if (buffer[pos+origine_courante]==0x0d) {pos2=pos+2;if (MODE==MERGE) empiler(0x0a);} else pos2=pos; if (etiq->control&RESPECT_CASE_TAG_BIT_MASK) { // case of exact case match int position=0; while (pos2+p->current_origin<p->text_buffer->size && p->buffer[pos2+p->current_origin]==contenu[position]) { pos2++; position++; } if (contenu[position]=='\0' && position!=0 && !(is_letter(contenu[position-1],p->alphabet) && is_letter(p->buffer[pos2+p->current_origin],p->alphabet))) { // we proceed only if we have exactly read the contenu sequence // in both modes MERGE and REPLACE, we process the transduction if any traiter_transduction(p,etiq->output); if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) { // if we are in MERGE mode, we add to ouput the char we have read push_input_string(p->stack,contenu,0); } scan_graph(n_graph,t->state_number,pos2,depth,liste_arrivee,mot_token_buffer,p); } } else { // case of variable case match // the letter sequences may have been caught by the arbre_etiquette structure int position=0; unichar* mot=mot_token_buffer; while (pos2+p->current_origin<p->text_buffer->size && is_equal_or_uppercase(contenu[position],p->buffer[pos2+p->current_origin],p->alphabet)) { mot[position++]=p->buffer[(pos2++)+p->current_origin]; } mot[position]='\0'; if (contenu[position]=='\0' && position!=0 && !(is_letter(contenu[position-1],p->alphabet) && is_letter(p->buffer[pos2+p->current_origin],p->alphabet))) { // we proceed only if we have exactly read the contenu sequence // in both modes MERGE and REPLACE, we process the transduction if any traiter_transduction(p,etiq->output); if (p->output_policy==MERGE_OUTPUTS /*|| etiq->transduction==NULL || etiq->transduction[0]=='\0'*/) { // if we are in MERGE mode, we add to ouput the char we have read push_input_string(p->stack,mot,0); } scan_graph(n_graph,t->state_number,pos2,depth,liste_arrivee,mot_token_buffer,p); } } } } t=t->next; } }