/** * Loads a .fst2 file with the given name and type, according to the * given language description. */ Elag_fst_file_in* load_elag_fst2_file(const VersatileEncodingConfig* vec,const char* fname,language_t* language) { Elag_fst_file_in* fstf=(Elag_fst_file_in*)malloc(sizeof(Elag_fst_file_in)); if (fstf==NULL) { fatal_alloc_error("load_elag_fst2_file"); } fstf->name=strdup(fname); if (fstf->name==NULL) { fatal_alloc_error("load_elag_fst2_file"); } if ((fstf->f=u_fopen(vec,fname,U_READ))==NULL) { error("load_fst_file: unable to open '%s' for reading\n",fname); goto error_fstf; } unichar buf[MAXBUF]; if (u_fgets(buf,MAXBUF,fstf->f)==EOF) { error("load_fst_file: '%s' is empty\n",fname); goto error_f; } if (!u_is_digit(*buf)) { error("load_fst_file: %s: bad file format\n",fname); goto error_f; } fstf->nb_automata=u_parse_int(buf); fstf->language=language; fstf->type=FST_GRAMMAR; fstf->pos0=(int)ftell(fstf->f); fstf->symbols=new_string_hash_ptr(64); fstf->renumber=NULL; if (load_elag_fst2_tags(fstf)==-1) { error("load_fst_file: %s: cannot load symbols\n",fstf->name); goto error_symbols; } fstf->pos=0; return fstf; /* If an error occurs */ error_symbols: free_string_hash_ptr(fstf->symbols,(void(*)(void*))free_symbols); error_f: u_fclose(fstf->f); error_fstf: free(fstf->name); free(fstf); return NULL; }
/** * \brief Reads an line of a concord.ind file. * * \param[in] line the unichar string containing the line * * The line is expected to be in the the format : n.n.n n.n.n t where n are integers and t is string * * \return The information read in a locate_pos structure */ locate_pos *read_concord_line(const unichar *line) { locate_pos *l; l = (locate_pos*) malloc(sizeof(locate_pos) * 1); if (l == NULL) { perror("malloc\n"); fprintf(stderr, "Impossible to allocate memory\n"); exit(1); } l->label = (unichar*) malloc(sizeof(unichar) * (u_strlen(line) + 1)); if (l->label == NULL) { perror("malloc\n"); fprintf(stderr, "Impossible to allocate memory\n"); exit(1); } // format of a line : n.n.n n.n.n t where n are integers and t is string const unichar **next; const unichar *current = line; next = &line; // make next not NULL l->token_start_offset = (long)u_parse_int(current, next); current = (*next)+1; l->character_start_offset = (long)u_parse_int(current, next); current = (*next)+1; l->logical_start_offset = (long)u_parse_int(current, next); current = (*next)+1; l->token_end_offset = (long)u_parse_int(current, next); current = (*next)+1; l-> character_end_offset = (long)u_parse_int(current, next); current = (*next)+1; l-> logical_end_offset = (long)u_parse_int(current, next); current = (*next)+1; u_strcpy(l->label,current); return l; }
/** * Loads and returns an automaton from the given .fst2. * Returns NULL if there is no more automaton to load. */ Fst2Automaton* load_automaton(Elag_fst_file_in* fstf) { if (fstf->pos>=fstf->nb_automata) { return NULL; } Ustring* ustr=new_Ustring(); readline(ustr,fstf->f); const unichar* p=ustr->str; if (p[0]!='-') { fatal_error("load_automaton: %s: bad file format\n",fstf->name); } p++; int i=u_parse_int(p,&p); if (i!=fstf->pos+1) { /* We make sure that the automaton number is what it should be */ fatal_error("load_automaton: %s: parsing error with line '%S' ('-%d ...' expected)\n",fstf->name,ustr->str,fstf->pos+1); } /* Now p points on the automaton name */ p++; Fst2Automaton* A=new_Fst2Automaton(p); while (readline(ustr,fstf->f) && ustr->str[0]!='f') { /* If there is a state to read */ p=ustr->str; SingleGraphState state=add_state(A->automaton); if (*p=='t') { /* If necessary, we set the state final */ set_final_state(state); } /* We puts p on the first digit */ while (*p!='\0' && !u_is_digit(*p)) { p++; } while (*p!='\0') { /* If there is a transition to read */ int tag_number=u_parse_int(p,&p); if (fstf->renumber!=NULL) { tag_number=fstf->renumber[tag_number]; } while (*p==' ') { p++; } if (!u_is_digit(*p)) { fatal_error("load_automaton: %s: bad file format (line='%S')\n",fstf->name,ustr->str); } int state_number=u_parse_int(p,&p); symbol_t* tmp=(symbol_t*)fstf->symbols->value[tag_number]; if (tmp!=NULL) { /* If it is a good symbol (successfully loaded), we add transition(s) */ if (fstf->type!=FST_TEXT) { add_all_outgoing_transitions(state,tmp,state_number); } else { /* In a text automaton, we add one transition per element of * the symbol list. For instance, if we have: * * tmp = "{domestique,.N:fs}" => "{domestique,.N:ms}" => NULL * * then we add two transitions. */ add_all_outgoing_transitions(state,tmp,state_number); } } while (*p==' ') { p++; } } } if (*ustr->str=='\0') { fatal_error("load_automaton: unexpected end of file\n"); } if (A->automaton->number_of_states==0) { error("load_automaton: automaton with no state\n"); } else { set_initial_state(A->automaton->states[0]); } fstf->pos++; free_Ustring(ustr); return A; }