/**
 * Frees all the memory associated to the given rule, except its
 * automaton.
 */
void free_elRule(elRule* rule) {
if (rule==NULL) return;
if (rule->name!=NULL) free(rule->name);
for (int i=0;i<rule->nbContexts;i++) {
   free_SingleGraph(rule->contexts[i].right,free_symbol);
   free_SingleGraph(rule->contexts[i].left,free_symbol);
}
free(rule->contexts);
free_Fst2Automaton(rule->automaton,free_symbol);
free(rule);
}
/**
 * Compiles the given .fst2 grammar into the given .elg file.
 * Returns 0 in case of success; -1 otherwise.
 */
int compile_elag_grammar(char* grammar,char* elg_file, const VersatileEncodingConfig* vec,
                         language_t* language) {
elRule* rule=new_elRule(grammar,vec,language);
if (rule==NULL) {
   error("Unable to read grammar '%s'\n",grammar);
   return -1;
}
Fst2Automaton* A=compile_elag_rule(rule,language);
if (A==NULL) {
   fatal_error("Unable to compile rule '%s'\n",grammar);
}
free_elRule(rule);
save_automaton(A,elg_file,vec,FST_GRAMMAR);
free_Fst2Automaton(A,free_symbol);
return 0;
}
/**
 * This function analyzes the given Elag rule automaton to find
 * where the rule and constraint parts are. As a side effect, it builds
 * a fst2 grammar ("foo.fst2" => "foo-conc.fst2") that can be used by
 * the Locate program to match the <!> .... <!> .... <!> part of the rule.
 */
void split_elag_rule(elRule* rule, const VersatileEncodingConfig* vec,language_t* language) {
int c;
/* This array contains the numbers of the states that are pointed to by
 * middle '<=>' of the constraints */
int constraints[ELAG_MAX_CONSTRAINTS];
int nbConstraints=count_constraints(rule->automaton,constraints);
/* +1 because we have to count the <!> .... <!> .... <!> part of the rule */
rule->nbContexts=nbConstraints+1;
rule->contexts=(elContext*)malloc(rule->nbContexts*sizeof(elContext));
if (rule->contexts==NULL) {
   fatal_alloc_error("split_elag_rule");
}
for (c=0;c<rule->nbContexts;c++) {
   rule->contexts[c].left=NULL;
   rule->contexts[c].right=NULL;
}
int endR1=ELAG_UNDEFINED;
int endR2=ELAG_UNDEFINED;
int endC2=ELAG_UNDEFINED;
for (Transition* t=rule->automaton->automaton->states[0]->outgoing_transitions;t!=NULL;t=t->next) {
   symbol_t* symbol=t->label;
   switch (symbol->type) {
      /* We split the unique <!> .... <!> .... <!> part */
      case S_EXCLAM:
         if (rule->contexts[0].left!=NULL) {
            fatal_error("Too much '<!>' tags\n",rule->name);
         }
         rule->contexts[0].left=new_SingleGraph(PTR_TAGS);
         /* We look for the end of the first part of the rule */
         endR1=get_sub_automaton(rule->automaton->automaton,rule->contexts[0].left,t->state_number,0,S_EXCLAM);
         rule->contexts[0].right=new_SingleGraph(PTR_TAGS);
         endR2=get_sub_automaton(rule->automaton->automaton,rule->contexts[0].right,endR1,0,S_EXCLAM);
         if (endR1==ELAG_UNDEFINED || endR2==ELAG_UNDEFINED
             || !is_final_state(rule->automaton->automaton->states[endR2])) {
            fatal_error("split_elag_rule: %s: parse error in <!> part\n",rule->name);
         }
         break;

      /* We split the nbConstraints <=> .... <=> .... <=> parts */
      case S_EQUAL:
         if (rule->contexts[1].left!=NULL) {
            fatal_error("Non deterministic .fst2 file\n");
         }
         for (c=0;c<nbConstraints;c++) {
            rule->contexts[c+1].left=new_SingleGraph(PTR_TAGS);
            get_sub_automaton(rule->automaton->automaton,rule->contexts[c+1].left,t->state_number,1,constraints[c]);
            rule->contexts[c+1].right=new_SingleGraph(PTR_TAGS);
            endC2=get_sub_automaton(rule->automaton->automaton,rule->contexts[c+1].right,constraints[c],0,S_EQUAL);
            if (endC2==ELAG_UNDEFINED || !is_final_state(rule->automaton->automaton->states[endC2])) {
               fatal_error("split_elag_rule: %s: parse error in <=> part\n",rule->name);
            }
         }
         break;

      default: fatal_error("Left delimitor '<!>' or '<=>' missing\n");
   }
}
if (rule->contexts[0].left==NULL) {
   fatal_error("In grammar '%s': symbol '<!>' not found.\n",rule->name);
}
char buf[FILENAME_MAX];
remove_extension(rule->name,buf);
strcat(buf,"-conc.fst2");

/* We create the.fst2 to be used by Locate */
Fst2Automaton* locate=make_locate_automaton(rule,language);
save_automaton(locate,buf,vec,FST_LOCATE);
free_Fst2Automaton(locate,free_symbol);
}
/**
 * This function reads a file that contains a list of Elag grammar names,
 * and it compiles them into the file 'outname'. However, if the result
 * automaton is too big, it will be saved in several automata inside
 * the output file.
 */
int compile_elag_rules(char* rulesname,char* outname, const VersatileEncodingConfig* vec,language_t* language) {
u_printf("Compilation of %s\n",rulesname);
U_FILE* f=NULL;
U_FILE* frules=u_fopen(ASCII,rulesname,U_READ);
if (frules==NULL) {
   fatal_error("Cannot open file '%s'\n",rulesname);
}
U_FILE* out=u_fopen(ASCII,outname,U_WRITE);
if (out==NULL) {
   fatal_error("cannot open file '%s'\n",outname);
}
/* Name of the file that contains the result automaton */
char fstoutname[FILENAME_MAX];
int nbRules=0;
char buf[FILENAME_MAX];
time_t start_time=time(0);
Fst2Automaton* res=NULL;
Fst2Automaton* A;
int fst_number=0;
Ustring* ustr=new_Ustring();

char buf2[FILENAME_MAX];
char directory[FILENAME_MAX];
get_path(rulesname,directory);

while (af_fgets(buf,FILENAME_MAX,frules->f)) {
   /* We read one by one the Elag grammar names in the .lst file */
   chomp(buf);
   if (*buf=='\0') {
      /* If we have an empty line */
      continue;
   }
   if (!is_absolute_path(buf)) {
      strcpy(buf2,buf);
      sprintf(buf,"%s%s",directory,buf2);
   }

   u_printf("\n%s...\n",buf);
   remove_extension(buf);
   strcat(buf,".elg");
   if ((f=u_fopen(ASCII,buf,U_READ))==NULL) {
      /* If the .elg file doesn't exist, we create one */
      remove_extension(buf);
      u_printf("Precompiling %s.fst2\n",buf);
      strcat(buf,".fst2");
      elRule* rule=new_elRule(buf,vec,language);
      if (rule==NULL) {
         fatal_error("Unable to read grammar '%s'\n",buf);
      }
      if ((A=compile_elag_rule(rule,language))==NULL) {
         fatal_error("Unable to compile rule '%s'\n",buf);
      }
      free_elRule(rule);
   } else {
      /* If there is already .elg, we use it */
      u_fclose(f);
      A=load_elag_grammar_automaton(vec,buf,language);
      if (A==NULL) {
         fatal_error("Unable to load '%s'\n",buf);
      }
   }
   if (A->automaton->number_of_states==0) {
      error("Grammar %s forbids everything!\n",buf);
   }
   if (res!=NULL) {
      /* If there is already an automaton, we intersect it with the new one */
      SingleGraph tmp=res->automaton;
      res->automaton=elag_intersection(language,tmp,A->automaton,GRAMMAR_GRAMMAR);
      free_SingleGraph(tmp,NULL);
      free_Fst2Automaton(A,NULL);
      trim(res->automaton,NULL);
   } else {
      res=A;
   }
   nbRules++;
   if (res->automaton->number_of_states>MAX_GRAM_SIZE) {
      /* If the automaton is too large, we will split the grammar
       * into several automata */
      elag_minimize(res->automaton,1);
      sprintf(fstoutname,"%s-%d.elg",outname,fst_number++);
      u_fprintf(out,"<%s>\n",fstoutname);
      u_printf("Splitting big grammar in '%s' (%d states)\n",fstoutname,res->automaton->number_of_states);
      u_sprintf(ustr,"%s: compiled elag grammar",fstoutname);
      free(res->name);
      res->name=u_strdup(ustr->str);
      save_automaton(res,fstoutname,vec,FST_GRAMMAR);
      free_Fst2Automaton(res,NULL);
      res=NULL;
   }
}
if (res!=NULL) {
   /* We save the last automaton, if any */
   sprintf(fstoutname,"%s-%d.elg",outname,fst_number++);
   u_fprintf(out,"<%s>\n",fstoutname);
   u_printf("Saving grammar in '%s'(%d states)\n",fstoutname,res->automaton->number_of_states);
   elag_minimize(res->automaton,1);
   u_sprintf(ustr,"%s: compiled elag grammar",fstoutname);
   free(res->name);
   res->name=u_strdup(ustr->str);
   save_automaton(res,fstoutname,vec,FST_GRAMMAR);
   free_Fst2Automaton(res,free_symbol);
}
time_t end_time=time(0);
u_fclose(frules);
u_fclose(out);
free_Ustring(ustr);
u_printf("\nDone.\nElapsed time: %.0f s\n",difftime(end_time,start_time));
u_printf("\n%d rule%s from %s compiled in %s (%d automat%s)\n",
         nbRules,(nbRules>1)?"s":"",rulesname,outname,fst_number,
         (fst_number>1)?"a":"on");
return 0;
}
예제 #5
0
void free_Fst2Automaton_including_symbols(Fst2Automaton* A) {
free_Fst2Automaton(A,free_symbol);
}
예제 #6
0
void free_Fst2Automaton_excluding_symbols(Fst2Automaton* A) {
free_Fst2Automaton(A,NULL);
}