C++ (Cpp) new_Fst2Automaton Exemples

Exemple #1

0

Afficher le fichier

Fichier : ElagRulesCompilation.cpp Projet : Rajat-dhyani/UnitexGramLab

/**
 * This function builds and returns an automaton for pattern
 * matching of the rule's context.
 */
Fst2Automaton* make_locate_automaton(elRule* rule,language_t* language) {
Fst2Automaton* res=new_Fst2Automaton(NULL,-1);
res->automaton=clone(rule->contexts[0].left,dup_symbol);
/* We concatenate the left and right contexts */
elag_concat(language,res->automaton,rule->contexts[0].right);
/* Then we add loops with ignorable POS on each state */
for (int i=0;i<language->POSs->size;i++) {
   POS_t* PoS=(POS_t*)language->POSs->value[i];
   if (PoS->ignorable) {
      /* If we have a POS that can be ignored, we add a transition tagged
       * by this symbol to each state */
      for (int q=1;q<res->automaton->number_of_states;q++) {
         symbol_t* s=new_symbol_POS(PoS,-1);
         add_outgoing_transition(res->automaton->states[q],s,q);
         free_symbol(s);
      }
   }
}
return res;
}

Exemple #2

0

Afficher le fichier

Fichier : ElagRulesCompilation.cpp Projet : Rajat-dhyani/UnitexGramLab

/**
 * This function takes an fst2 representing an Elag rule and returns
 * an automaton A so that the intersection of A and a sentence automaton
 * reject sequences that are not valid regarding this rule.
 */
Fst2Automaton* compile_elag_rule(elRule* rule,language_t* language) {
u_printf("Compiling %s... (%d context%s)\n",rule->name,rule->nbContexts,(rule->nbContexts>1)?"s":"");
/* Now, we will convert the automaton into the Elag format, i.e. with
 * transitions tagged with symbol_t* and not integers */
for (int c=0;c<rule->nbContexts;c++) {
   //convert_transitions_to_elag_ones(rule->contexts[c].left);
   elag_determinize(language,rule->contexts[c].left,free_symbol);
   trim(rule->contexts[c].left,free_symbol);
   //convert_transitions_to_elag_ones(rule->contexts[c].right);
   elag_determinize(language,rule->contexts[c].right,free_symbol);
   trim(rule->contexts[c].right,free_symbol);
}
/* We build A*.R1 */
prefix_with_everything(rule->contexts[0].left);
//u_printf("------------- anything_R1 -------------\n");
//print_graph(rule->contexts[0].left);
elag_determinize(language,rule->contexts[0].left,free_symbol);
//print_graph(rule->contexts[0].left);
elag_minimize(rule->contexts[0].left);
SingleGraph anything_R1=rule->contexts[0].left;
/* and R2.A* */
suffix_with_everything(rule->contexts[0].right);
elag_determinize(language,rule->contexts[0].right,free_symbol);
elag_minimize(rule->contexts[0].right);
SingleGraph R2_anything=rule->contexts[0].right;
/* We compute the number of constraint combinations */
int p=((rule->nbContexts-1)>=0) ? ((int)(1 << (rule->nbContexts-1))) : 0;
/* We allocate the resulting automaton */
SingleGraph result=new_SingleGraph(PTR_TAGS);


for (int ens=0;ens<p;ens++) {
   /* For each combination of constraints, we produce an automaton a1
    * that does not match these constraints */
   SingleGraph a1=combine_constraints(rule,ens,anything_R1,R2_anything,language);
   /* And we make the union of it with the current automaton */
   build_union(result,a1);
   elag_determinize(language,result,free_symbol);
   elag_minimize(result);
}
/* Finally, we take the complement of the automaton that rejects wrong paths.
 * This new automaton recognizes correct paths, and so, the application of the
 * Elag rule will consists of intersecting this automaton with the sentence ones. */

//u_printf("------------- DUMP -------------\n");
//print_graph(result);

elag_complementation(language,result);

//u_printf("------------- AFTER COMPL -------------\n");
//print_graph(result);

trim(result,free_symbol);

if (result->number_of_states==0) {
   error("Grammar %s forbids everything\n",rule->name);
}
u_printf("Grammar %s compiled (%d states)\n",rule->name,result->number_of_states);
Fst2Automaton* Result=new_Fst2Automaton(rule->automaton->name,-1);
Result->automaton=result;
return Result;
}

Exemple #3

0

Afficher le fichier

Fichier : ElagFstFilesIO.cpp Projet : UnitexGramLab/unitex-core

/**
 * Loads and returns an automaton from the given .fst2.
 * Returns NULL if there is no more automaton to load.
 */
Fst2Automaton* load_automaton(Elag_fst_file_in* fstf) {
if (fstf->pos>=fstf->nb_automata) {
   return NULL;
}
Ustring* ustr=new_Ustring();
readline(ustr,fstf->f);
const unichar* p=ustr->str;
if (p[0]!='-') {
   fatal_error("load_automaton: %s: bad file format\n",fstf->name);
}
p++;
int i=u_parse_int(p,&p);
if (i!=fstf->pos+1) {
   /* We make sure that the automaton number is what it should be */
   fatal_error("load_automaton: %s: parsing error with line '%S' ('-%d ...' expected)\n",fstf->name,ustr->str,fstf->pos+1);
}
/* Now p points on the automaton name */
p++;
Fst2Automaton* A=new_Fst2Automaton(p);
while (readline(ustr,fstf->f) && ustr->str[0]!='f') {
   /* If there is a state to read */
   p=ustr->str;
   SingleGraphState state=add_state(A->automaton);
   if (*p=='t') {
      /* If necessary, we set the state final */
      set_final_state(state);
   }
   /* We puts p on the first digit */
   while (*p!='\0' && !u_is_digit(*p)) {
      p++;
   }
   while (*p!='\0') {
      /* If there is a transition to read */
      int tag_number=u_parse_int(p,&p);
      if (fstf->renumber!=NULL) {
         tag_number=fstf->renumber[tag_number];
      }
      while (*p==' ') {
         p++;
      }
      if (!u_is_digit(*p)) {
         fatal_error("load_automaton: %s: bad file format (line='%S')\n",fstf->name,ustr->str);
      }
      int state_number=u_parse_int(p,&p);
      symbol_t* tmp=(symbol_t*)fstf->symbols->value[tag_number];
      if (tmp!=NULL) {
         /* If it is a good symbol (successfully loaded), we add transition(s) */
         if (fstf->type!=FST_TEXT) {
            add_all_outgoing_transitions(state,tmp,state_number);
         } else {
            /* In a text automaton, we add one transition per element of
             * the symbol list. For instance, if we have:
             *
             * tmp = "{domestique,.N:fs}" => "{domestique,.N:ms}" => NULL
             *
             * then we add two transitions. */
            add_all_outgoing_transitions(state,tmp,state_number);
         }
      }
      while (*p==' ') {
         p++;
      }
   }
}
if (*ustr->str=='\0') {
   fatal_error("load_automaton: unexpected end of file\n");
}
if (A->automaton->number_of_states==0) {
   error("load_automaton: automaton with no state\n");
} else {
   set_initial_state(A->automaton->states[0]);
}
fstf->pos++;
free_Ustring(ustr);
return A;
}