/**
 * Loads a .fst2 file with the given name and type, according to the
 * given language description.
 */
Elag_fst_file_in* load_elag_fst2_file(const VersatileEncodingConfig* vec,const char* fname,language_t* language) {
Elag_fst_file_in* fstf=(Elag_fst_file_in*)malloc(sizeof(Elag_fst_file_in));
if (fstf==NULL) {
   fatal_alloc_error("load_elag_fst2_file");
}
fstf->name=strdup(fname);
if (fstf->name==NULL) {
   fatal_alloc_error("load_elag_fst2_file");
}
if ((fstf->f=u_fopen(vec,fname,U_READ))==NULL) {
   error("load_fst_file: unable to open '%s' for reading\n",fname);
   goto error_fstf;
}
unichar buf[MAXBUF];
if (u_fgets(buf,MAXBUF,fstf->f)==EOF) {
   error("load_fst_file: '%s' is empty\n",fname);
   goto error_f;
}
if (!u_is_digit(*buf)) {
   error("load_fst_file: %s: bad file format\n",fname);
   goto error_f;
}
fstf->nb_automata=u_parse_int(buf);
fstf->language=language;
fstf->type=FST_GRAMMAR;
fstf->pos0=(int)ftell(fstf->f);
fstf->symbols=new_string_hash_ptr(64);
fstf->renumber=NULL;
if (load_elag_fst2_tags(fstf)==-1) {
   error("load_fst_file: %s: cannot load symbols\n",fstf->name);
   goto error_symbols;
}
fstf->pos=0;
return fstf;
/* If an error occurs */
error_symbols: free_string_hash_ptr(fstf->symbols,(void(*)(void*))free_symbols);

error_f: u_fclose(fstf->f);

error_fstf: free(fstf->name);

free(fstf);
return NULL;
}
Esempio n. 2
0
/**
 * \brief Reads an line of a concord.ind file.
 *
 * \param[in] line the unichar string containing the line
 *
 * The line is expected to be in the the format : n.n.n n.n.n t where n are integers and t is string
 *
 * \return The information read in a locate_pos structure
 */
locate_pos *read_concord_line(const unichar *line) {

	locate_pos *l;
	l = (locate_pos*) malloc(sizeof(locate_pos) * 1);
	if (l == NULL) {
		perror("malloc\n");
		fprintf(stderr, "Impossible to allocate memory\n");
		exit(1);
	}
	l->label = (unichar*) malloc(sizeof(unichar) * (u_strlen(line) + 1));
	if (l->label == NULL) {
		perror("malloc\n");
		fprintf(stderr, "Impossible to allocate memory\n");
		exit(1);
	}

	// format of a line : n.n.n n.n.n t where n are integers and t is string
	const unichar **next;

	const unichar *current = line;
	next = &line; // make next not NULL
	l->token_start_offset = (long)u_parse_int(current, next);

	current = (*next)+1;
	l->character_start_offset = (long)u_parse_int(current, next);

	current = (*next)+1;
	l->logical_start_offset = (long)u_parse_int(current, next);

	current = (*next)+1;
	l->token_end_offset = (long)u_parse_int(current, next);

	current = (*next)+1;
	l-> character_end_offset = (long)u_parse_int(current, next);

	current = (*next)+1;
	l-> logical_end_offset = (long)u_parse_int(current, next);

	current = (*next)+1;
	u_strcpy(l->label,current);

	return l;
}
/**
 * Loads and returns an automaton from the given .fst2.
 * Returns NULL if there is no more automaton to load.
 */
Fst2Automaton* load_automaton(Elag_fst_file_in* fstf) {
if (fstf->pos>=fstf->nb_automata) {
   return NULL;
}
Ustring* ustr=new_Ustring();
readline(ustr,fstf->f);
const unichar* p=ustr->str;
if (p[0]!='-') {
   fatal_error("load_automaton: %s: bad file format\n",fstf->name);
}
p++;
int i=u_parse_int(p,&p);
if (i!=fstf->pos+1) {
   /* We make sure that the automaton number is what it should be */
   fatal_error("load_automaton: %s: parsing error with line '%S' ('-%d ...' expected)\n",fstf->name,ustr->str,fstf->pos+1);
}
/* Now p points on the automaton name */
p++;
Fst2Automaton* A=new_Fst2Automaton(p);
while (readline(ustr,fstf->f) && ustr->str[0]!='f') {
   /* If there is a state to read */
   p=ustr->str;
   SingleGraphState state=add_state(A->automaton);
   if (*p=='t') {
      /* If necessary, we set the state final */
      set_final_state(state);
   }
   /* We puts p on the first digit */
   while (*p!='\0' && !u_is_digit(*p)) {
      p++;
   }
   while (*p!='\0') {
      /* If there is a transition to read */
      int tag_number=u_parse_int(p,&p);
      if (fstf->renumber!=NULL) {
         tag_number=fstf->renumber[tag_number];
      }
      while (*p==' ') {
         p++;
      }
      if (!u_is_digit(*p)) {
         fatal_error("load_automaton: %s: bad file format (line='%S')\n",fstf->name,ustr->str);
      }
      int state_number=u_parse_int(p,&p);
      symbol_t* tmp=(symbol_t*)fstf->symbols->value[tag_number];
      if (tmp!=NULL) {
         /* If it is a good symbol (successfully loaded), we add transition(s) */
         if (fstf->type!=FST_TEXT) {
            add_all_outgoing_transitions(state,tmp,state_number);
         } else {
            /* In a text automaton, we add one transition per element of
             * the symbol list. For instance, if we have:
             *
             * tmp = "{domestique,.N:fs}" => "{domestique,.N:ms}" => NULL
             *
             * then we add two transitions. */
            add_all_outgoing_transitions(state,tmp,state_number);
         }
      }
      while (*p==' ') {
         p++;
      }
   }
}
if (*ustr->str=='\0') {
   fatal_error("load_automaton: unexpected end of file\n");
}
if (A->automaton->number_of_states==0) {
   error("load_automaton: automaton with no state\n");
} else {
   set_initial_state(A->automaton->states[0]);
}
fstf->pos++;
free_Ustring(ustr);
return A;
}