int main_XMLizer(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } int output_style=TEI; char output[FILENAME_MAX]=""; char alphabet[FILENAME_MAX]=""; char normalization[FILENAME_MAX]=""; char segmentation[FILENAME_MAX]=""; VersatileEncodingConfig vec=VEC_DEFAULT; int convLFtoCRLF=1; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_XMLizer,lopts_XMLizer,&index))) { switch(val) { case 'x': output_style=XML; break; case 't': output_style=TEI; break; case 'n': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty normalization grammar name\n"); return USAGE_ERROR_CODE; } strcpy(normalization,options.vars()->optarg); break; case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output file name\n"); return USAGE_ERROR_CODE; } strcpy(output,options.vars()->optarg); break; case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet file name\n"); return USAGE_ERROR_CODE; } strcpy(alphabet,options.vars()->optarg); break; case 's': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty segmentation grammar name\n"); return USAGE_ERROR_CODE; } strcpy(segmentation,options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_XMLizer[index].name); return USAGE_ERROR_CODE; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (segmentation[0]=='\0') { error("You must specify the segmentation grammar to use\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } char input[FILENAME_MAX]; strcpy(input,argv[options.vars()->optind]); char snt[FILENAME_MAX]; remove_extension(input,snt); strcat(snt,"_tmp.snt"); char tmp[FILENAME_MAX]; remove_extension(input,tmp); strcat(tmp,".tmp"); normalize(input,snt,&vec,KEEP_CARRIAGE_RETURN,convLFtoCRLF,normalization,NULL,1); struct fst2txt_parameters* p=new_fst2txt_parameters(); p->vec=vec; p->input_text_file=strdup(snt); if (p->input_text_file ==NULL) { alloc_error("main_XMLizer"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } p->output_text_file_is_temp=1; p->output_text_file=strdup(tmp); if (p->output_text_file==NULL) { alloc_error("main_XMLizer"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } p->fst_file=strdup(segmentation); if (p->fst_file==NULL) { alloc_error("main_XMLizer"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } p->alphabet_file=strdup(alphabet); if (p->alphabet_file==NULL) { alloc_error("main_XMLizer"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } p->output_policy=MERGE_OUTPUTS; p->tokenization_policy=WORD_BY_WORD_TOKENIZATION; p->space_policy=DONT_START_WITH_SPACE; main_fst2txt(p); free_fst2txt_parameters(p); if (output[0]=='\0') { remove_extension(input,output); strcat(output,".xml"); } int return_value = xmlize(&vec,snt,output,output_style); af_remove(snt); af_remove(tmp); return return_value; }
int main_RebuildTfst(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } VersatileEncodingConfig vec=VEC_DEFAULT; int val, index=-1; bool only_verify_arguments = false; UnitexGetOpt options; int save_statistics=1; while (EOF!=(val=options.parse_long(argc,argv,optstring_RebuildTfst,lopts_RebuildTfst,&index))) { switch (val) { case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'S': save_statistics = 0; break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n", options.vars()->optopt) : error("Missing argument for option --%s\n", lopts_RebuildTfst[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n", options.vars()->optopt) : error("Invalid option --%s\n", options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } char input_tfst[FILENAME_MAX]; char input_tind[FILENAME_MAX]; strcpy(input_tfst,argv[options.vars()->optind]); remove_extension(input_tfst,input_tind); strcat(input_tind,".tind"); u_printf("Loading %s...\n",input_tfst); Tfst* tfst = open_text_automaton(&vec,input_tfst); if (tfst==NULL) { error("Unable to load %s automaton\n",input_tfst); return DEFAULT_ERROR_CODE; } char basedir[FILENAME_MAX]; get_path(input_tfst,basedir); char output_tfst[FILENAME_MAX]; sprintf(output_tfst, "%s.new.tfst",input_tfst); char output_tind[FILENAME_MAX]; sprintf(output_tind, "%s.new.tind",input_tfst); U_FILE* f_tfst; if ((f_tfst = u_fopen(&vec,output_tfst,U_WRITE)) == NULL) { error("Unable to open %s for writing\n", output_tfst); close_text_automaton(tfst); return DEFAULT_ERROR_CODE; } U_FILE* f_tind; if ((f_tind = u_fopen(BINARY,output_tind,U_WRITE)) == NULL) { u_fclose(f_tfst); close_text_automaton(tfst); error("Unable to open %s for writing\n", output_tind); return DEFAULT_ERROR_CODE; } /* We use this hash table to rebuild files tfst_tags_by_freq/alph.txt */ struct hash_table* form_frequencies=new_hash_table((HASH_FUNCTION)hash_unichar,(EQUAL_FUNCTION)u_equal, (FREE_FUNCTION)free,NULL,(KEYCOPY_FUNCTION)keycopy); u_fprintf(f_tfst,"%010d\n",tfst->N); for (int i = 1; i <= tfst->N; i++) { if ((i % 100) == 0) { u_printf("%d/%d sentences rebuilt...\n", i, tfst->N); } load_sentence(tfst,i); char grfname[FILENAME_MAX]; sprintf(grfname, "%ssentence%d.grf", basedir, i); unichar** tags=NULL; int n_tags=-1; if (fexists(grfname)) { /* If there is a .grf for the current sentence, then we must * take it into account */ if (0==pseudo_main_Grf2Fst2(&vec,grfname,0,NULL,1,1,NULL,NULL,0)) { /* We proceed only if the graph compilation was a success */ char fst2name[FILENAME_MAX]; sprintf(fst2name, "%ssentence%d.fst2", basedir, i); struct FST2_free_info fst2_free; Fst2* fst2=load_abstract_fst2(&vec,fst2name,0,&fst2_free); af_remove(fst2name); free_SingleGraph(tfst->automaton,NULL); tfst->automaton=create_copy_of_fst2_subgraph(fst2,1); tags=create_tfst_tags(fst2,&n_tags); free_abstract_Fst2(fst2,&fst2_free); } else { error("Error: %s is not a valid sentence automaton\n",grfname); } } save_current_sentence(tfst,f_tfst,f_tind,tags,n_tags,form_frequencies); if (tags!=NULL) { /* If necessary, we free the tags we created */ for (int count_tags=0;count_tags<n_tags;count_tags++) { free(tags[count_tags]); } free(tags); } } u_printf("Text automaton rebuilt.\n"); u_fclose(f_tind); u_fclose(f_tfst); close_text_automaton(tfst); /* Finally, we save statistics */ if (save_statistics) { char tfst_tags_by_freq[FILENAME_MAX]; char tfst_tags_by_alph[FILENAME_MAX]; strcpy(tfst_tags_by_freq, basedir); strcat(tfst_tags_by_freq, "tfst_tags_by_freq.txt"); strcpy(tfst_tags_by_alph, basedir); strcat(tfst_tags_by_alph, "tfst_tags_by_alph.txt"); U_FILE* f_tfst_tags_by_freq = u_fopen(&vec, tfst_tags_by_freq, U_WRITE); if (f_tfst_tags_by_freq == NULL) { error("Cannot open %s\n", tfst_tags_by_freq); } U_FILE* f_tfst_tags_by_alph = u_fopen(&vec, tfst_tags_by_alph, U_WRITE); if (f_tfst_tags_by_alph == NULL) { error("Cannot open %s\n", tfst_tags_by_alph); } sort_and_save_tfst_stats(form_frequencies, f_tfst_tags_by_freq, f_tfst_tags_by_alph); u_fclose(f_tfst_tags_by_freq); u_fclose(f_tfst_tags_by_alph); } free_hash_table(form_frequencies); /* make a backup and replace old automaton with new */ char backup_tfst[FILENAME_MAX]; char backup_tind[FILENAME_MAX]; sprintf(backup_tfst,"%s.bck",input_tfst); sprintf(backup_tind,"%s.bck",input_tind); /* We remove the existing backup files, if any */ af_remove(backup_tfst); af_remove(backup_tind); af_rename(input_tfst,backup_tfst); af_rename(input_tind,backup_tind); af_rename(output_tfst,input_tfst); af_rename(output_tind,input_tind); u_printf("\nYou can find a backup of the original files in:\n %s\nand %s\n", backup_tfst,backup_tind); return SUCCESS_RETURN_CODE; }
int main_Uncompress(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; char output[FILENAME_MAX]=""; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_Uncompress,lopts_Uncompress,&index))) { switch(val) { case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output file name\n"); return USAGE_ERROR_CODE; } strcpy(output,options.vars()->optarg); break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_Uncompress[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } if (output[0]=='\0') { remove_extension(argv[options.vars()->optind],output); strcat(output,".dic"); } U_FILE* f=u_fopen(&vec,output,U_WRITE); if (f==NULL) { error("Cannot open file %s\n",output); return DEFAULT_ERROR_CODE; } char inf_file[FILENAME_MAX]; remove_extension(argv[options.vars()->optind],inf_file); strcat(inf_file,".inf"); u_printf("Uncompressing %s...\n",argv[options.vars()->optind]); Dictionary* d=new_Dictionary(&vec,argv[options.vars()->optind],inf_file); if (d!=NULL) { rebuild_dictionary(d,f); } u_fclose(f); free_Dictionary(d); u_printf("Done.\n"); return SUCCESS_RETURN_CODE; }
int main_TrainingTagger(int argc,char* const argv[]) { if (argc==1) { usage(); return 0; } int val,index=-1,binaries=1,r_forms=1,i_forms=1; int semitic=0; struct OptVars* vars=new_OptVars(); char text[FILENAME_MAX]=""; char raw_forms[FILENAME_MAX]=""; char inflected_forms[FILENAME_MAX]=""; char output[FILENAME_MAX]=""; Encoding encoding_output = DEFAULT_ENCODING_OUTPUT; int bom_output = DEFAULT_BOM_OUTPUT; int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT; while (EOF!=(val=getopt_long_TS(argc,argv,optstring_TrainingTagger,lopts_TrainingTagger,&index,vars))) { switch(val) { case 'o': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty pattern\n"); } strcpy(output,vars->optarg); break; case 'b': binaries = 1; break; case 'n': binaries = 0; break; case 'a': break; case 'c': i_forms = 0; break; case 'm': r_forms = 0; break; case 'S': semitic=1; break; case 'k': if (vars->optarg[0]=='\0') { fatal_error("Empty input_encoding argument\n"); } decode_reading_encoding_parameter(&mask_encoding_compatibility_input,vars->optarg); break; case 'q': if (vars->optarg[0]=='\0') { fatal_error("Empty output_encoding argument\n"); } decode_writing_encoding_parameter(&encoding_output,&bom_output,vars->optarg); break; case 'h': usage(); return 0; case ':': if (index==-1) fatal_error("Missing argument for option -%c\n",vars->optopt); else fatal_error("Missing argument for option --%s\n",lopts_TrainingTagger[index].name); case '?': if (index==-1) fatal_error("Invalid option -%c\n",vars->optopt); else fatal_error("Invalid option --%s\n",vars->optarg); break; } index=-1; } if (vars->optind!=argc-1) { free_OptVars(vars); error("Invalid arguments: rerun with --help\n"); return 1; } strcpy(text,argv[vars->optind]); U_FILE* input_text=u_fopen_existing_versatile_encoding(mask_encoding_compatibility_input,text,U_READ); if (input_text==NULL) { free_OptVars(vars); fatal_error("cannot open file %s\n",text); return 1; } if(output[0]=='\0'){ remove_path_and_extension(text,output); } char path[FILENAME_MAX],filename[FILENAME_MAX]; get_path(text,path); if(strlen(path) == 0){ strcpy(path,"."); } /* we create files which will contain statistics extracted from the tagged corpus */ U_FILE* rforms_file = NULL, *iforms_file = NULL; if(r_forms == 1){ sprintf(filename,"%s_data_cat.dic",output); new_file(path,filename,raw_forms); rforms_file=u_fopen_creating_versatile_encoding(encoding_output,bom_output,raw_forms,U_WRITE); } if(i_forms == 1){ sprintf(filename,"%s_data_morph.dic",output); new_file(path,filename,inflected_forms); iforms_file=u_fopen_creating_versatile_encoding(encoding_output,bom_output,inflected_forms,U_WRITE); } u_printf("Gathering statistics from tagged corpus...\n"); do_training(input_text,rforms_file,iforms_file); /* we close all files and then we sort text dictionaries */ u_fclose(input_text); char disclaimer[FILENAME_MAX]; if(rforms_file != NULL){ u_fclose(rforms_file); pseudo_main_SortTxt(DEFAULT_ENCODING_OUTPUT,DEFAULT_BOM_OUTPUT,ALL_ENCODING_BOM_POSSIBLE,0,0,NULL,NULL,0,raw_forms); strcpy(disclaimer,raw_forms); remove_extension(disclaimer); strcat(disclaimer,".txt"); create_disclaimer(disclaimer); } if(iforms_file != NULL){ u_fclose(iforms_file); pseudo_main_SortTxt(DEFAULT_ENCODING_OUTPUT,DEFAULT_BOM_OUTPUT,ALL_ENCODING_BOM_POSSIBLE,0,0,NULL,NULL,0,inflected_forms); strcpy(disclaimer,inflected_forms); remove_extension(disclaimer); strcat(disclaimer,".txt"); create_disclaimer(disclaimer); } /* we compress dictionaries if option is specified by user (output is ".bin") */ if(binaries == 1){ /* simple forms dictionary */ if(r_forms == 1){ pseudo_main_Compress(DEFAULT_ENCODING_OUTPUT,DEFAULT_BOM_OUTPUT,ALL_ENCODING_BOM_POSSIBLE,0,semitic,raw_forms); } /* compound forms dictionary */ if(i_forms == 1){ pseudo_main_Compress(DEFAULT_ENCODING_OUTPUT,DEFAULT_BOM_OUTPUT,ALL_ENCODING_BOM_POSSIBLE,0,semitic,inflected_forms); } } free_OptVars(vars); u_printf("Done.\n"); return 0; }
int main_Extract(int argc,char* const argv[]) { if (argc==1) { usage(); return 0; } int val,index=-1; char extract_matching_units=1; char text_name[FILENAME_MAX]=""; char concord_ind[FILENAME_MAX]=""; char output[FILENAME_MAX]=""; Encoding encoding_output = DEFAULT_ENCODING_OUTPUT; int bom_output = DEFAULT_BOM_OUTPUT; int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT; struct OptVars* vars=new_OptVars(); while (EOF!=(val=getopt_long_TS(argc,argv,optstring_Extract,lopts_Extract,&index,vars))) { switch(val) { case 'y': extract_matching_units=1; break; case 'n': extract_matching_units=0; break; case 'o': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty output file name\n"); } strcpy(output,vars->optarg); break; case 'i': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty concordance file name\n"); } strcpy(concord_ind,vars->optarg); break; case 'h': usage(); return 0; case ':': if (index==-1) fatal_error("Missing argument for option -%c\n",vars->optopt); else fatal_error("Missing argument for option --%s\n",lopts_Extract[index].name); case '?': if (index==-1) fatal_error("Invalid option -%c\n",vars->optopt); else fatal_error("Invalid option --%s\n",vars->optarg); break; case 'k': if (vars->optarg[0]=='\0') { fatal_error("Empty input_encoding argument\n"); } decode_reading_encoding_parameter(&mask_encoding_compatibility_input,vars->optarg); break; case 'q': if (vars->optarg[0]=='\0') { fatal_error("Empty output_encoding argument\n"); } decode_writing_encoding_parameter(&encoding_output,&bom_output,vars->optarg); break; } index=-1; } if (output[0]=='\0') { fatal_error("You must specify the output text file\n"); } if (vars->optind!=argc-1) { fatal_error("Invalid arguments: rerun with --help\n"); } strcpy(text_name,argv[vars->optind]); struct snt_files* snt_files=new_snt_files(text_name); ABSTRACTMAPFILE* text=af_open_mapfile(snt_files->text_cod,MAPFILE_OPTION_READ,0); if (text==NULL) { error("Cannot open %s\n",snt_files->text_cod); return 1; } struct text_tokens* tok=load_text_tokens(snt_files->tokens_txt,mask_encoding_compatibility_input); if (tok==NULL) { error("Cannot load token list %s\n",snt_files->tokens_txt); af_close_mapfile(text); return 1; } if (tok->SENTENCE_MARKER==-1) { error("The text does not contain any sentence marker {S}\n"); af_close_mapfile(text); free_text_tokens(tok); return 1; } if (concord_ind[0]=='\0') { char tmp[FILENAME_MAX]; get_extension(text_name,tmp); if (strcmp(tmp,"snt")) { fatal_error("Unable to find the concord.ind file. Please explicit it\n"); } remove_extension(text_name,concord_ind); strcat(concord_ind,"_snt"); strcat(concord_ind,PATH_SEPARATOR_STRING); strcat(concord_ind,"concord.ind"); } U_FILE* concord=u_fopen_existing_versatile_encoding(mask_encoding_compatibility_input,concord_ind,U_READ); if (concord==NULL) { error("Cannot open concordance %s\n",concord_ind); af_close_mapfile(text); free_text_tokens(tok); return 1; } U_FILE* result=u_fopen_creating_versatile_encoding(encoding_output,bom_output,output,U_WRITE); if (result==NULL) { error("Cannot write output file %s\n",output); af_close_mapfile(text); u_fclose(concord); free_text_tokens(tok); return 1; } free_snt_files(snt_files); extract_units(extract_matching_units,text,tok,concord,result); af_close_mapfile(text); u_fclose(concord); u_fclose(result); free_text_tokens(tok); free_OptVars(vars); u_printf("Done.\n"); return 0; }
int write_vis(std::string &nemI_out_file, std::string &exoII_inp_file, Machine_Description* machine, Problem_Description* prob, Mesh_Description<INT>* mesh, LB_Description<INT>* lb) { int exid_vis, exid_inp; char title[MAX_LINE_LENGTH+1]; const char *coord_names[] = {"X", "Y", "Z"}; /*-----------------------------Execution Begins------------------------------*/ /* Generate the file name for the visualization file */ std::string vis_file_name = remove_extension(nemI_out_file); vis_file_name += "-vis.exoII"; /* Generate the title for the file */ strcpy(title, UTIL_NAME); strcat(title, " "); strcat(title, ELB_VERSION); strcat(title, " load balance visualization file"); /* * If the vis technique is to be by element block then calculate the * number of element blocks. */ int vis_nelem_blks; if(prob->type == ELEMENTAL) vis_nelem_blks = machine->num_procs; else vis_nelem_blks = machine->num_procs + 1; /* Create the ExodusII file */ std::cout << "Outputting load balance visualization file " << vis_file_name.c_str() << "\n"; int cpu_ws = 0; int io_ws = 0; int mode = EX_CLOBBER; if (prob->int64db|prob->int64api) { mode |= EX_NETCDF4|EX_NOCLASSIC|prob->int64db|prob->int64api; } if((exid_vis=ex_create(vis_file_name.c_str(), mode, &cpu_ws, &io_ws)) < 0) { Gen_Error(0, "fatal: unable to create visualization output file"); return 0; } ON_BLOCK_EXIT(ex_close, exid_vis); /* * Open the original input ExodusII file, read the values for the * element blocks and output them to the visualization file. */ int icpu_ws=0; int iio_ws=0; float vers=0.0; mode = EX_READ | prob->int64api; if((exid_inp=ex_open(exoII_inp_file.c_str(), mode, &icpu_ws, &iio_ws, &vers)) < 0) { Gen_Error(0, "fatal: unable to open input ExodusII file"); return 0; } ON_BLOCK_EXIT(ex_close, exid_inp); char **elem_type = (char**)array_alloc(2, mesh->num_el_blks, MAX_STR_LENGTH+1, sizeof(char)); if(!elem_type) { Gen_Error(0, "fatal: insufficient memory"); return 0; } ON_BLOCK_EXIT(free, elem_type); std::vector<INT> el_blk_ids(mesh->num_el_blks); std::vector<INT> el_cnt_blk(mesh->num_el_blks); std::vector<INT> node_pel_blk(mesh->num_el_blks); std::vector<INT> nattr_el_blk(mesh->num_el_blks); if(ex_get_elem_blk_ids(exid_inp, TOPTR(el_blk_ids)) < 0) { Gen_Error(0, "fatal: unable to get element block IDs"); return 0; } int acc_vis = ELB_TRUE; // Output a different element block per processor if (prob->vis_out == 2) acc_vis = ELB_FALSE; // Output a nodal/element variable showing processor size_t nsize = 0; /* * Find out if the mesh consists of mixed elements. If not then * element blocks will be used to visualize the partitioning. Otherwise * nodal/element results will be used. */ for(size_t ecnt=0; ecnt < mesh->num_el_blks; ecnt++) { if(ex_get_elem_block(exid_inp, el_blk_ids[ecnt], elem_type[ecnt], &el_cnt_blk[ecnt], &node_pel_blk[ecnt], &nattr_el_blk[ecnt]) < 0) { Gen_Error(0, "fatal: unable to get element block parameters"); return 0; } nsize += el_cnt_blk[ecnt]*node_pel_blk[ecnt]; if(strcmp(elem_type[0], elem_type[ecnt]) == 0) { if(node_pel_blk[0] != node_pel_blk[ecnt]) acc_vis = ELB_FALSE; } else acc_vis = ELB_FALSE; } if(acc_vis == ELB_TRUE) { /* Output the initial information */ if(ex_put_init(exid_vis, title, mesh->num_dims, mesh->num_nodes, mesh->num_elems, vis_nelem_blks, 0, 0) < 0) { Gen_Error(0, "fatal: unable to output initial params to vis file"); return 0; } /* Output the nodal coordinates */ float *xptr = nullptr; float *yptr = nullptr; float *zptr = nullptr; switch(mesh->num_dims) { case 3: zptr = (mesh->coords) + 2*mesh->num_nodes; /* FALLTHRU */ case 2: yptr = (mesh->coords) + mesh->num_nodes; /* FALLTHRU */ case 1: xptr = mesh->coords; } if(ex_put_coord(exid_vis, xptr, yptr, zptr) < 0) { Gen_Error(0, "fatal: unable to output coords to vis file"); return 0; } if(ex_put_coord_names(exid_vis, (char**)coord_names) < 0) { Gen_Error(0, "fatal: unable to output coordinate names"); return 0; } std::vector<INT> elem_block(mesh->num_elems); std::vector<INT> elem_map(mesh->num_elems); std::vector<INT> tmp_connect(nsize); for(size_t ecnt=0; ecnt < mesh->num_elems; ecnt++) { elem_map[ecnt] = ecnt+1; if(prob->type == ELEMENTAL) elem_block[ecnt] = lb->vertex2proc[ecnt]; else { int proc = lb->vertex2proc[mesh->connect[ecnt][0]]; int nnodes = get_elem_info(NNODES, mesh->elem_type[ecnt]); elem_block[ecnt] = proc; for(int ncnt=1; ncnt < nnodes; ncnt++) { if(lb->vertex2proc[mesh->connect[ecnt][ncnt]] != proc) { elem_block[ecnt] = machine->num_procs; break; } } } } int ccnt = 0; std::vector<INT> vis_el_blk_ptr(vis_nelem_blks+1); for(INT bcnt=0; bcnt < vis_nelem_blks; bcnt++) { vis_el_blk_ptr[bcnt] = ccnt; int pos = 0; int old_pos = 0; INT* el_ptr = TOPTR(elem_block); size_t ecnt = mesh->num_elems; while(pos != -1) { pos = in_list(bcnt, ecnt, el_ptr); if(pos != -1) { old_pos += pos + 1; ecnt = mesh->num_elems - old_pos; el_ptr = TOPTR(elem_block) + old_pos; int nnodes = get_elem_info(NNODES, mesh->elem_type[old_pos-1]); for(int ncnt=0; ncnt < nnodes; ncnt++) tmp_connect[ccnt++] = mesh->connect[old_pos-1][ncnt] + 1; } } } vis_el_blk_ptr[vis_nelem_blks] = ccnt; /* Output the element map */ if(ex_put_map(exid_vis, TOPTR(elem_map)) < 0) { Gen_Error(0, "fatal: unable to output element number map"); return 0; } /* Output the visualization element blocks */ for(int bcnt=0; bcnt < vis_nelem_blks; bcnt++) { /* * Note this assumes all the blocks contain the same type * element. */ int ecnt = (vis_el_blk_ptr[bcnt+1]-vis_el_blk_ptr[bcnt])/node_pel_blk[0]; if(ex_put_elem_block(exid_vis, bcnt+1, elem_type[0], ecnt, node_pel_blk[0], 0) < 0) { Gen_Error(0, "fatal: unable to output element block params"); return 0; } /* Output the connectivity */ if(ex_put_elem_conn(exid_vis, bcnt+1, &tmp_connect[vis_el_blk_ptr[bcnt]]) < 0) { Gen_Error(0, "fatal: unable to output element connectivity"); return 0; } } } else { /* For nodal/element results visualization of the partioning. */ // Copy the mesh portion to the vis file. ex_copy(exid_inp, exid_vis); /* Set up the file for nodal/element results */ float time_val = 0.0; if(ex_put_time(exid_vis, 1, &time_val) < 0) { Gen_Error(0, "fatal: unable to output time to vis file"); return 0; } const char *var_names[] = {"proc"}; if(prob->type == NODAL) { /* Allocate memory for the nodal values */ std::vector<float> proc_vals(mesh->num_nodes); if(ex_put_variable_param(exid_vis, EX_NODAL, 1) < 0) { Gen_Error(0, "fatal: unable to output var params to vis file"); return 0; } if(ex_put_variable_names(exid_vis, EX_NODAL, 1, (char**)var_names) < 0) { Gen_Error(0, "fatal: unable to output variable name"); return 0; } /* Do some problem specific assignment */ for(size_t ncnt=0; ncnt < mesh->num_nodes; ncnt++) proc_vals[ncnt] = lb->vertex2proc[ncnt]; for(int pcnt=0; pcnt < machine->num_procs; pcnt++) { for(auto & elem : lb->bor_nodes[pcnt]) proc_vals[elem] = machine->num_procs + 1; } /* Output the nodal variables */ if(ex_put_nodal_var(exid_vis, 1, 1, mesh->num_nodes, TOPTR(proc_vals)) < 0) { Gen_Error(0, "fatal: unable to output nodal variables"); return 0; } } else if(prob->type == ELEMENTAL) { /* Allocate memory for the element values */ std::vector<float> proc_vals(mesh->num_elems); if(ex_put_variable_param(exid_vis, EX_ELEM_BLOCK, 1) < 0) { Gen_Error(0, "fatal: unable to output var params to vis file"); return 0; } if(ex_put_variable_names(exid_vis, EX_ELEM_BLOCK, 1, (char**)var_names) < 0) { Gen_Error(0, "fatal: unable to output variable name"); return 0; } /* Do some problem specific assignment */ for(int proc=0; proc < machine->num_procs; proc++) { for (size_t e = 0; e < lb->int_elems[proc].size(); e++) { size_t ecnt = lb->int_elems[proc][e]; proc_vals[ecnt] = proc; } for (size_t e = 0; e < lb->bor_elems[proc].size(); e++) { size_t ecnt = lb->bor_elems[proc][e]; proc_vals[ecnt] = proc; } } /* Output the element variables */ size_t offset = 0; for (size_t i=0; i < mesh->num_el_blks; i++) { if(ex_put_var(exid_vis, 1, EX_ELEM_BLOCK, 1, el_blk_ids[i], el_cnt_blk[i], &proc_vals[offset]) < 0) { Gen_Error(0, "fatal: unable to output nodal variables"); return 0; } offset += el_cnt_blk[i]; } } } return 1; } /*---------------------------End write_vis()-------------------------------*/
/** * Takes a file name and copies it into 'result' without its extension, * if any. */ void remove_extension(const char* filename,char* result) { strcpy(result,filename); remove_extension(result); }
bool file_utils::split_path(const char *p, dynamic_string *pDrive, dynamic_string *pDir, dynamic_string *pFilename, dynamic_string *pExt) { VOGL_ASSERT(p); #if defined(PLATFORM_WINDOWS) char drive_buf[_MAX_DRIVE]; char dir_buf[_MAX_DIR]; char fname_buf[_MAX_FNAME]; char ext_buf[_MAX_EXT]; #if defined(COMPILER_MSVC) // Compiling with MSVC errno_t error = _splitpath_s(p, pDrive ? drive_buf : NULL, pDrive ? _MAX_DRIVE : 0, pDir ? dir_buf : NULL, pDir ? _MAX_DIR : 0, pFilename ? fname_buf : NULL, pFilename ? _MAX_FNAME : 0, pExt ? ext_buf : NULL, pExt ? _MAX_EXT : 0); if (error != 0) return false; #elif defined(COMPILER_MINGW) // Compiling with MinGW _splitpath(p, pDrive ? drive_buf : NULL, pDir ? dir_buf : NULL, pFilename ? fname_buf : NULL, pExt ? ext_buf : NULL); #else #error "Need to provide splitpath functionality for this compiler / platform combo." #endif if (pDrive) *pDrive = drive_buf; if (pDir) *pDir = dir_buf; if (pFilename) *pFilename = fname_buf; if (pExt) *pExt = ext_buf; #else // !PLATFORM_WINDOWS char dirtmp[1024]; char nametmp[1024]; strcpy_safe(dirtmp, sizeof(dirtmp), p); strcpy_safe(nametmp, sizeof(nametmp), p); if (pDrive) pDrive->clear(); const char *pDirName = dirname(dirtmp); if (!pDirName) return false; if (pDir) { pDir->set(pDirName); if ((!pDir->is_empty()) && (pDir->back() != '/')) pDir->append_char('/'); } const char *pBaseName = basename(nametmp); if (!pBaseName) return false; if (pFilename) { pFilename->set(pBaseName); remove_extension(*pFilename); } if (pExt) { pExt->set(pBaseName); get_extension(*pExt); if (pExt->get_len()) *pExt = "." + *pExt; } #endif // #if defined(PLATFORM_WINDOWS) return true; }
/** * Takes a file name, removes its extension and adds the suffix "_snt" * followed by the separator character. * * Example: filename="C:\English\novel.txt" => result="C:\English\novel_snt\" */ void get_snt_path(const char* filename,char* result) { remove_extension(filename,result); strcat(result,"_snt"); strcat(result,PATH_SEPARATOR_STRING); }
/** * Takes a file name and copies it without its path and extension, if * any, into 'result'. */ void remove_path_and_extension(const char* filename,char* result) { char temp[FILENAME_MAX]; remove_path(filename,temp); remove_extension(temp,result); }
int main_ElagComp(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; char compilename[FILENAME_MAX]=""; char directory[FILENAME_MAX]=""; char grammar[FILENAME_MAX]=""; char rule_file[FILENAME_MAX]=""; char lang[FILENAME_MAX]=""; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_ElagComp,lopts_ElagComp,&index))) { switch(val) { case 'l': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty language definition file\n"); return USAGE_ERROR_CODE; } strcpy(lang,options.vars()->optarg); break; case 'r': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty rule file\n"); return USAGE_ERROR_CODE; } strcpy(rule_file,options.vars()->optarg); get_path(rule_file,directory); break; case 'g': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty grammar file name\n"); return USAGE_ERROR_CODE; } strcpy(grammar,options.vars()->optarg); get_path(grammar,directory); break; case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output file\n"); return USAGE_ERROR_CODE; } strcpy(compilename,options.vars()->optarg); break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_ElagComp[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (lang[0]=='\0') { error("You must define the language definition file\n"); return USAGE_ERROR_CODE; } if ((rule_file[0]=='\0' && grammar[0]=='\0') || (rule_file[0]!='\0' && grammar[0]!='\0')) { error("You must define a rule list OR a grammar\n"); return USAGE_ERROR_CODE; } if (options.vars()->optind!=argc) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (rule_file[0]=='\0' && grammar[0]=='\0') { error("You must specified a grammar or a rule file name\n"); return USAGE_ERROR_CODE; } if (rule_file[0]!='\0' && grammar[0]!='\0') { error("Cannot handle both a rule file and a grammar\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } language_t* language = load_language_definition(&vec,lang); if (rule_file[0]!='\0') { /* If we work with a rule list */ if (compilename[0]=='\0') { int l=(int)strlen(rule_file); if (strcmp(rule_file+l-4,".lst")==0) { strcpy(compilename,rule_file); strcpy(compilename+l-4,".rul"); } else { sprintf(compilename,"%s.rul",rule_file); } } if (compile_elag_rules(rule_file,compilename,&vec,language)==-1) { error("An error occurred while compiling %s\n",compilename); free_language_t(language); return DEFAULT_ERROR_CODE; } u_printf("\nElag grammars are compiled in %s.\n",compilename); } else { /* If we must compile a single grammar */ char elg_file[FILENAME_MAX]; get_extension(grammar,elg_file); if (strcmp(elg_file,".fst2")) { error("Grammar '%s' should be a .fst2 file\n"); free_language_t(language); return DEFAULT_ERROR_CODE; } remove_extension(grammar,elg_file); strcat(elg_file,".elg"); if (compile_elag_grammar(grammar,elg_file,&vec,language)==-1) { error("An error occured while compiling %s\n",grammar); free_language_t(language); return DEFAULT_ERROR_CODE; } u_printf("Elag grammar is compiled into %s.\n",elg_file); } free_language_t(language); return SUCCESS_RETURN_CODE; }
int do_init(int argc, char** argv) { /* setup pre-defined, #define-dependant */ map_cache_file = std::string(db_path) + "/" + std::string(DBPATH) + "map_cache.dat"; // Process the command-line arguments process_args(argc, argv); ShowStatus("Initializing grfio with %s\n", grf_list_file.c_str()); grfio_init(grf_list_file.c_str()); // Attempt to open the map cache file and force rebuild if not found ShowStatus("Opening map cache: %s\n", map_cache_file.c_str()); if(!rebuild) { map_cache_fp = fopen(map_cache_file.c_str(), "rb"); if(map_cache_fp == NULL) { ShowNotice("Existing map cache not found, forcing rebuild mode\n"); rebuild = 1; } else fclose(map_cache_fp); } if(rebuild) map_cache_fp = fopen(map_cache_file.c_str(), "w+b"); else map_cache_fp = fopen(map_cache_file.c_str(), "r+b"); if(map_cache_fp == NULL) { ShowError("Failure when opening map cache file %s\n", map_cache_file.c_str()); exit(EXIT_FAILURE); } // Open the map list FILE *list; std::vector<std::string> directories = { std::string(db_path) + "/", std::string(db_path) + "/" + std::string(DBIMPORT) + "/" }; for (const auto &directory : directories) { std::string filename = directory + map_list_file; ShowStatus("Opening map list: %s\n", filename.c_str()); list = fopen(filename.c_str(), "r"); if (list == NULL) { ShowError("Failure when opening maps list file %s\n", filename.c_str()); exit(EXIT_FAILURE); } // Initialize the main header if (rebuild) { header.file_size = sizeof(struct main_header); header.map_count = 0; } else { if (fread(&header, sizeof(struct main_header), 1, map_cache_fp) != 1) { printf("An error as occured while reading map_cache_fp \n"); } header.file_size = GetULong((unsigned char *)&(header.file_size)); header.map_count = GetUShort((unsigned char *)&(header.map_count)); } // Read and process the map list char line[1024]; while (fgets(line, sizeof(line), list)) { if (line[0] == '/' && line[1] == '/') continue; char name[MAP_NAME_LENGTH_EXT]; if (sscanf(line, "%15s", name) < 1) continue; if (strcmp("map:", name) == 0 && sscanf(line, "%*s %15s", name) < 1) continue; struct map_data map; name[MAP_NAME_LENGTH_EXT - 1] = '\0'; remove_extension(name); if (find_map(name)) ShowInfo("Map '" CL_WHITE "%s" CL_RESET "' already in cache.\n", name); else if (read_map(name, &map)) { cache_map(name, &map); ShowInfo("Map '" CL_WHITE "%s" CL_RESET "' successfully cached.\n", name); } else ShowError("Map '" CL_WHITE "%s" CL_RESET "' not found!\n", name); } ShowStatus("Closing map list: %s\n", filename.c_str()); fclose(list); } // Write the main header and close the map cache ShowStatus("Closing map cache: %s\n", map_cache_file.c_str()); fseek(map_cache_fp, 0, SEEK_SET); fwrite(&header, sizeof(struct main_header), 1, map_cache_fp); fclose(map_cache_fp); ShowStatus("Finalizing grfio\n"); grfio_final(); ShowInfo("%d maps now in cache\n", header.map_count); return 0; }
int main_Normalize(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } int mode=KEEP_CARRIAGE_RETURN; int separator_normalization=1; char rules[FILENAME_MAX]=""; char input_offsets[FILENAME_MAX]=""; char output_offsets[FILENAME_MAX]=""; VersatileEncodingConfig vec=VEC_DEFAULT; int convLFtoCRLF=1; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_Normalize,lopts_Normalize,&index))) { switch(val) { case 'l': convLFtoCRLF=0; break; case 'n': mode=REMOVE_CARRIAGE_RETURN; break; case 'r': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty replacement rule file name\n"); return USAGE_ERROR_CODE; } strcpy(rules,options.vars()->optarg); break; case 1: separator_normalization=0; break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case '$': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty input offset file name\n"); return USAGE_ERROR_CODE; } strcpy(input_offsets,options.vars()->optarg); break; case '@': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output offset file name\n"); return USAGE_ERROR_CODE; } strcpy(output_offsets,options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_Normalize[index].name); return USAGE_ERROR_CODE; break; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; break; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } vector_offset* v_input_offsets=NULL; vector_offset* v_output_offsets=NULL; U_FILE* f_output_offsets=NULL; if (output_offsets[0]!='\0') { /* We deal with offsets only if we have to produce output offsets */ if (input_offsets[0]!='\0') { v_input_offsets=load_offsets(&vec,input_offsets); } f_output_offsets=u_fopen(&vec, output_offsets, U_WRITE); if (f_output_offsets==NULL) { error("Cannot create offset file %s\n",output_offsets); return DEFAULT_ERROR_CODE; } v_output_offsets=new_vector_offset(); } char tmp_file[FILENAME_MAX]; get_extension(argv[options.vars()->optind],tmp_file); if (!strcmp(tmp_file, ".snt")) { /* If the file to process has already the .snt extension, we temporary rename it to * .snt.normalizing */ strcpy(tmp_file,argv[options.vars()->optind]); strcat(tmp_file,".normalizing"); af_rename(argv[options.vars()->optind],tmp_file); } else { strcpy(tmp_file,argv[options.vars()->optind]); } /* We set the destination file */ char dest_file[FILENAME_MAX]; remove_extension(argv[options.vars()->optind],dest_file); strcat(dest_file,".snt"); u_printf("Normalizing %s...\n",argv[options.vars()->optind]); int return_value = normalize(tmp_file, dest_file, &vec, mode, convLFtoCRLF, rules, v_output_offsets, separator_normalization); u_printf("\n"); /* If we have used a temporary file, we delete it */ if (strcmp(tmp_file,argv[options.vars()->optind])) { af_remove(tmp_file); } process_offsets(v_input_offsets,v_output_offsets,f_output_offsets); u_fclose(f_output_offsets); free_vector_offset(v_input_offsets); free_vector_offset(v_output_offsets); u_printf((return_value==SUCCESS_RETURN_CODE) ? "Done.\n" : "Unsuccessfull.\n"); return return_value; }
/** * The main function of the cascade * * */ int cascade(const char* text, int in_place, int must_create_directory, fifo* transducer_list, const char *alphabet, const char*negation_operator, Encoding encoding_output,int bom_output,int mask_encoding_compatibility_input) { launch_tokenize_in_Cassys(text,alphabet,NULL,encoding_output,bom_output,mask_encoding_compatibility_input); //if (in_place == 0) initialize_working_directory(text, must_create_directory); struct snt_files *snt_text_files = new_snt_files(text); struct text_tokens *tokens = NULL; cassys_tokens_list *tokens_list = cassys_load_text(snt_text_files->tokens_txt, snt_text_files->text_cod,&tokens); fprintf(stdout,"Cascade begins\n"); int transducer_number = 1; char *labeled_text_name = NULL; if ((in_place != 0)) labeled_text_name = create_labeled_files_and_directory(text, transducer_number*0, must_create_directory,0); while(!is_empty(transducer_list)){ if ((in_place == 0)) labeled_text_name = create_labeled_files_and_directory(text, transducer_number, must_create_directory,1); /* else { labeled_text_name = strdup(text); }*/ launch_tokenize_in_Cassys(labeled_text_name,alphabet,snt_text_files->tokens_txt,encoding_output,bom_output,mask_encoding_compatibility_input); free_snt_files(snt_text_files); // apply transducer transducer *current_transducer = (transducer*)take_ptr(transducer_list); launch_locate_in_Cassys(labeled_text_name, current_transducer, alphabet, negation_operator,encoding_output,bom_output,mask_encoding_compatibility_input); // generate concordance for this transducer snt_text_files = new_snt_files(labeled_text_name); launch_concord_in_Cassys(labeled_text_name, snt_text_files -> concord_ind, alphabet,encoding_output,bom_output,mask_encoding_compatibility_input); // add_replaced_text(labeled_text_name,tokens_list,transducer_number,alphabet,mask_encoding_compatibility_input); // add protection character in braces when needed protect_special_characters(labeled_text_name,encoding_output,bom_output,mask_encoding_compatibility_input); transducer_number++; free(current_transducer -> transducer_file_name); free(current_transducer); if ((in_place == 0)) free(labeled_text_name); } if ((in_place != 0)) free(labeled_text_name); free_snt_files(snt_text_files); construct_cascade_concord(tokens_list,text,transducer_number,encoding_output,bom_output,mask_encoding_compatibility_input); struct snt_files *snt_files = new_snt_files(text); char result_file_name[FILENAME_MAX]; char text_name_without_extension[FILENAME_MAX]; remove_extension(text,text_name_without_extension); sprintf(result_file_name,"%s.csc",text_name_without_extension); copy_file(result_file_name,text); launch_concord_in_Cassys(result_file_name,snt_files->concord_ind,alphabet,encoding_output,bom_output,mask_encoding_compatibility_input); free_cassys_tokens_list(tokens_list); free_snt_files(snt_files); free_text_tokens(tokens); return 0; }
void write_rule_recipe (FILE* stream, const char* target, list* dependencies) { fprintf(stream, "\tgcc -o %s ", remove_extension(target)); write_rule_prerequisites(stream, target, dependencies); }
int main_Fst2Txt(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } struct fst2txt_parameters* p=new_fst2txt_parameters(); char in_offsets[FILENAME_MAX]=""; char out_offsets[FILENAME_MAX]=""; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_Fst2Txt,lopts_Fst2Txt,&index))) { switch(val) { case 't': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty text file name\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } p->input_text_file=strdup(options.vars()->optarg); if (p->input_text_file==NULL) { alloc_error("main_Fst2Txt"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } break; case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty text output file name\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } p->output_text_file=strdup(options.vars()->optarg); p->output_text_file_is_temp=0; if (p->output_text_file==NULL) { alloc_error("main_Fst2Txt"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } break; case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet file name\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } p->alphabet_file=strdup(options.vars()->optarg); if (p->alphabet_file==NULL) { alloc_error("main_Fst2Txt"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } break; case 'M': p->output_policy=MERGE_OUTPUTS; break; case 'R': p->output_policy=REPLACE_OUTPUTS; break; case 'c': p->tokenization_policy=CHAR_BY_CHAR_TOKENIZATION; break; case 'w': p->tokenization_policy=WORD_BY_WORD_TOKENIZATION; break; case 's': p->space_policy=START_WITH_SPACE; break; case 'x': p->space_policy=DONT_START_WITH_SPACE; break; case 'V': only_verify_arguments = true; break; case 'h': usage(); free_fst2txt_parameters(p); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_Fst2Txt[index].name); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(p->vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(p->vec.encoding_output),&(p->vec.bom_output),options.vars()->optarg); break; case '$': if (options.vars()->optarg[0]=='\0') { error("Empty input_offsets argument\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } strcpy(in_offsets,options.vars()->optarg); break; case '@': if (options.vars()->optarg[0]=='\0') { error("Empty output_offsets argument\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } strcpy(out_offsets,options.vars()->optarg); break; case 'l': p->convLFtoCRLF=0; break; case 'r': p->keepCR = 1; break; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } if (p->input_text_file==NULL) { error("You must specify the text file\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory free_fst2txt_parameters(p); return SUCCESS_RETURN_CODE; } if (out_offsets[0]!='\0') { /* We deal with offsets only if the program is expected to produce some */ if (in_offsets[0]!='\0') { p->v_in_offsets=load_offsets(&(p->vec),in_offsets); if (p->v_in_offsets==NULL) { error("Cannot load offset file %s\n",in_offsets); free_fst2txt_parameters(p); return DEFAULT_ERROR_CODE; } } else { /* If there is no input offset file, we create an empty offset vector * in order to avoid testing whether the vector is NULL or not */ p->v_in_offsets=new_vector_offset(1); } p->f_out_offsets=u_fopen(&(p->vec),out_offsets,U_WRITE); if (p->f_out_offsets==NULL) { error("Cannot create file %s\n",out_offsets); free_fst2txt_parameters(p); return DEFAULT_ERROR_CODE; } } if (p->output_text_file == NULL) { char tmp[FILENAME_MAX]; remove_extension(p->input_text_file, tmp); strcat(tmp, ".tmp"); p->output_text_file_is_temp=1; p->output_text_file = strdup(tmp); if (p->output_text_file == NULL) { alloc_error("main_Fst2Txt"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } } p->fst_file=strdup(argv[options.vars()->optind]); if (p->fst_file==NULL) { alloc_error("main_Fst2Txt"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } int result=main_fst2txt(p); free_fst2txt_parameters(p); return result; }
/** * The same than main, but no call to setBufferMode. */ int main_BuildKrMwuDic(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } int val,index=-1; char output[FILENAME_MAX]=""; char inflection_dir[FILENAME_MAX]=""; char alphabet[FILENAME_MAX]=""; char dic_bin[FILENAME_MAX]=""; char dic_inf[FILENAME_MAX]=""; // default policy is to compile only out of date graphs GraphRecompilationPolicy graph_recompilation_policy = ONLY_OUT_OF_DATE; VersatileEncodingConfig vec=VEC_DEFAULT; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_BuildKrMwuDic,lopts_BuildKrMwuDic,&index))) { switch(val) { case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output file name\n"); return USAGE_ERROR_CODE; } strcpy(output,options.vars()->optarg); break; case 'd': if (options.vars()->optarg[0]=='\0') { error("Empty inflection directory\n"); return USAGE_ERROR_CODE; } strcpy(inflection_dir,options.vars()->optarg); break; case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet file name\n"); return USAGE_ERROR_CODE; } strcpy(alphabet,options.vars()->optarg); break; case 'b': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty binary dictionary name\n"); return USAGE_ERROR_CODE; } strcpy(dic_bin,options.vars()->optarg); remove_extension(dic_bin,dic_inf); strcat(dic_inf,".inf"); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case 'f': graph_recompilation_policy = ALWAYS_RECOMPILE; break; case 'n': graph_recompilation_policy = NEVER_RECOMPILE; break; case 't': graph_recompilation_policy = ONLY_OUT_OF_DATE; break; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_BuildKrMwuDic[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (output[0]=='\0') { error("Output file must be specified\n"); return USAGE_ERROR_CODE; } if (inflection_dir[0]=='\0') { error("Inflection directory must be specified\n"); return USAGE_ERROR_CODE; } if (alphabet[0]=='\0') { error("Alphabet file must be specified\n"); return USAGE_ERROR_CODE; } if (dic_bin[0]=='\0') { error("Binary dictionary must be specified\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } U_FILE* delas=u_fopen(&vec,argv[options.vars()->optind],U_READ); if (delas==NULL) { error("Cannot open %s\n",argv[options.vars()->optind]); return DEFAULT_ERROR_CODE; } U_FILE* grf=u_fopen(&vec,output,U_WRITE); if (grf==NULL) { error("Cannot open %s\n",output); u_fclose(delas); return DEFAULT_ERROR_CODE; } Alphabet* alph=load_alphabet(&vec,alphabet,1); if (alph==NULL) { u_fclose(grf); u_fclose(delas); error("Cannot open alphabet file %s\n",alphabet); return DEFAULT_ERROR_CODE; } Korean* korean=new Korean(alph); MultiFlex_ctx* multiFlex_ctx=new_MultiFlex_ctx(inflection_dir, NULL, NULL, &vec, korean, NULL, NULL, graph_recompilation_policy); Dictionary* d=new_Dictionary(&vec,dic_bin,dic_inf); create_mwu_dictionary(delas,grf,multiFlex_ctx,d); free_Dictionary(d); u_fclose(delas); u_fclose(grf); free_alphabet(alph); delete korean; for (int count_free_fst2=0;count_free_fst2<multiFlex_ctx->n_fst2;count_free_fst2++) { free_abstract_Fst2(multiFlex_ctx->fst2[count_free_fst2],&(multiFlex_ctx->fst2_free[count_free_fst2])); multiFlex_ctx->fst2[count_free_fst2]=NULL; } free_MultiFlex_ctx(multiFlex_ctx); u_printf("Done.\n"); return SUCCESS_RETURN_CODE; }
/* * Process the compiler options into options suitable for passing to the * preprocessor and the real compiler. The preprocessor options don't include * -E; this is added later. Returns true on success, otherwise false. */ bool c166_process_args(struct args *orig_args, struct args **preprocessor_args, struct args **compiler_args) { int i; bool found_c_opt = false; bool found_S_opt = false; bool found_H_opt = false; /* 0: Choose preprocessor type by the file extension. * 1: Use c preprocessor. * 2: Use c++ preprocessor.*/ unsigned force_preprocessor_type = 0; const char *actual_language; /* Language to actually use. */ struct stat st; /* is the dependency makefile name overridden with -MF? */ bool dependency_filename_specified = false; /* is the dependency makefile target name specified with -MT or -MQ? */ bool dependency_target_specified = false; struct args *stripped_args = NULL, *dep_args = NULL, *h_args; int argc = orig_args->argc; char **argv = orig_args->argv; bool result = true; stripped_args = args_init(0, NULL); dep_args = args_init(0, NULL); h_args = args_init(0, NULL); args_add(stripped_args, argv[0]); for (i = 1; i < argc; i++) { /* The user knows best: just swallow the next arg */ if (str_eq(argv[i], "--ccache-skip")) { i++; if (i == argc) { cc_log("--ccache-skip lacks an argument"); result = false; goto out; } args_add(stripped_args, argv[i]); continue; } /* Special case for -E. */ if (str_eq(argv[i], "-E")) { stats_update(STATS_PREPROCESSING); result = false; goto out; } /* These are always too hard. */ if (compopt_too_hard(argv[i])) { cc_log("Compiler option %s is unsupported", argv[i]); stats_update(STATS_UNSUPPORTED); result = false; goto out; } /* These are too hard in direct mode. */ if (enable_direct) { if (compopt_too_hard_for_direct_mode(argv[i])) { cc_log("Unsupported compiler option for direct mode: %s", argv[i]); enable_direct = false; } } /* we must have -c */ if (str_eq(argv[i], "-c")) { args_add(stripped_args, argv[i]); found_c_opt = true; continue; } /* -S changes the default extension */ /* TODO: Check this -S out! if (str_eq(argv[i], "-S")) { args_add(stripped_args, argv[i]); found_S_opt = true; continue; } */ /* we need to work out where the output was meant to go */ if (str_eq(argv[i], "-o")) { if (i == argc-1) { cc_log("Missing argument to %s", argv[i]); stats_update(STATS_ARGS); result = false; goto out; } output_obj = argv[i+1]; i++; continue; } /* alternate form of -o, with no space */ if (str_startswith(argv[i], "-o")) { output_obj = &argv[i][2]; continue; } /* debugging is handled specially, so that we know if we can strip line number info */ if (str_startswith(argv[i], "-g")) { args_add(stripped_args, argv[i]); if (enable_unify) { cc_log("%s used; disabling unify mode", argv[i]); enable_unify = false; } continue; } if (str_startswith(argv[i], "-H")) { cc_log("Detected -H %s", argv[i]); args_add(h_args, argv[i]); found_H_opt = true; continue; } /* * Options taking an argument that that we may want to rewrite * to relative paths to get better hit rate. A secondary effect * is that paths in the standard error output produced by the * compiler will be normalized. */ if (compopt_takes_path(argv[i])) { char *relpath; if (i == argc-1) { cc_log("Missing argument to %s", argv[i]); stats_update(STATS_ARGS); result = false; goto out; } args_add(stripped_args, argv[i]); relpath = make_relative_path(x_strdup(argv[i+1])); args_add(stripped_args, relpath); free(relpath); i++; continue; } /* Same as above but options with concatenated argument. */ if (compopt_short(compopt_takes_path, argv[i])) { char *relpath; char *option; relpath = make_relative_path(x_strdup(argv[i] + 2)); option = format("-%c%s", argv[i][1], relpath); args_add(stripped_args, option); free(relpath); free(option); continue; } /* options that take an argument */ if (compopt_takes_arg(argv[i])) { if (i == argc-1) { cc_log("Missing argument to %s", argv[i]); stats_update(STATS_ARGS); result = false; goto out; } args_add(stripped_args, argv[i]); args_add(stripped_args, argv[i+1]); i++; continue; } if (str_eq(argv[i], "-c++")) { force_preprocessor_type = 2; args_add(stripped_args, argv[i]); continue; } if (str_eq(argv[i], "-noc++")) { force_preprocessor_type = 1; args_add(stripped_args, argv[i]); continue; } /* other options */ if (argv[i][0] == '-') { args_add(stripped_args, argv[i]); continue; } /* if an argument isn't a plain file then assume its an option, not an input file. This allows us to cope better with unusual compiler options */ if (stat(argv[i], &st) != 0 || !S_ISREG(st.st_mode)) { cc_log("%s is not a regular file, not considering as input file", argv[i]); args_add(stripped_args, argv[i]); continue; } if (input_file) { if (language_for_file(argv[i])) { cc_log("Multiple input files: %s and %s", input_file, argv[i]); stats_update(STATS_MULTIPLE); } else if (!found_c_opt) { cc_log("Called for link with %s", argv[i]); if (strstr(argv[i], "conftest.")) { stats_update(STATS_CONFTEST); } else { stats_update(STATS_LINK); } } else { cc_log("Unsupported source extension: %s", argv[i]); stats_update(STATS_SOURCELANG); } result = false; goto out; } /* Rewrite to relative to increase hit rate. */ input_file = make_relative_path(x_strdup(argv[i])); } if (!input_file) { cc_log("No input file found"); stats_update(STATS_NOINPUT); result = false; goto out; } if(force_preprocessor_type == 0) { actual_language = language_for_file(input_file); } else if(force_preprocessor_type == 2) { actual_language = "c++"; } else { actual_language = "c"; } output_is_precompiled_header = actual_language && strstr(actual_language, "-header") != NULL; if (!found_c_opt && !output_is_precompiled_header) { cc_log("No -c option found"); /* I find that having a separate statistic for autoconf tests is useful, as they are the dominant form of "called for link" in many cases */ if (strstr(input_file, "conftest.")) { stats_update(STATS_CONFTEST); } else { stats_update(STATS_LINK); } result = false; goto out; } if (!actual_language) { cc_log("Unsupported source extension: %s", input_file); stats_update(STATS_SOURCELANG); result = false; goto out; } direct_i_file = language_is_preprocessed(actual_language); if (output_is_precompiled_header) { /* It doesn't work to create the .gch from preprocessed source. */ cc_log("Creating precompiled header; not compiling preprocessed code"); compile_preprocessed_source_code = false; } i_extension = getenv("CCACHE_EXTENSION"); if (!i_extension) { const char *p_language = p_language_for_language(actual_language); if(str_eq(p_language, "c++-cpp-output")) { /* Dirty fix for preprocessed file extension for cc166. * The cp166 cannot handle cpp files with extension ii.*/ i_extension = "ii.cpp"; } else { i_extension = extension_for_language(p_language) + 1; } } /* don't try to second guess the compilers heuristics for stdout handling */ if (output_obj && str_eq(output_obj, "-")) { stats_update(STATS_OUTSTDOUT); cc_log("Output file is -"); result = false; goto out; } if (!output_obj) { if (output_is_precompiled_header) { output_obj = format("%s.gch", input_file); } else { char *p; output_obj = x_strdup(input_file); if ((p = strrchr(output_obj, '/'))) { output_obj = p+1; } p = strrchr(output_obj, '.'); if (!p || !p[1]) { cc_log("Badly formed object filename"); stats_update(STATS_ARGS); result = false; goto out; } *p = 0; p = output_obj; if(found_S_opt) { output_obj = format("%s.s", p); } else { /*The default extension of object file is obj for c166.*/ output_obj = format("%s.obj", p); } free(p); } } /* cope with -o /dev/null */ if (!str_eq(output_obj,"/dev/null") && stat(output_obj, &st) == 0 && !S_ISREG(st.st_mode)) { cc_log("Not a regular file: %s", output_obj); stats_update(STATS_DEVICE); result = false; goto out; } /* * Some options shouldn't be passed to the real compiler when it compiles * preprocessed code: */ *preprocessor_args = args_copy(stripped_args); /* Args with -H has been already preprocessed. * If it passed to the compiler again, some type redefined error will pop up.*/ if (found_H_opt) { args_extend(*preprocessor_args, h_args); } /* * Add flags for dependency generation only to the preprocessor command line. */ if (generating_dependencies) { if (!dependency_filename_specified) { char *default_depfile_name; char *base_name; base_name = remove_extension(output_obj); default_depfile_name = format("%s.d", base_name); free(base_name); args_add(dep_args, "-MF"); args_add(dep_args, default_depfile_name); output_dep = make_relative_path(x_strdup(default_depfile_name)); } if (!dependency_target_specified) { args_add(dep_args, "-MQ"); args_add(dep_args, output_obj); } } if (compile_preprocessed_source_code) { *compiler_args = args_copy(stripped_args); } else { *compiler_args = args_copy(*preprocessor_args); } /* Due to bugs or cc166 v8.6r3, the behaviours of c/c++ preprocessor * are quite different. * When using cpp preprocessor, the output will be directly send to stdout like gcc. * When using c preprocessor, the output will be written to filename.i even without "-o".*/ if(str_eq(actual_language, "c")) { #ifdef _WIN32 #error Never test this in Windows. #else args_add(*preprocessor_args, "-o/dev/stdout"); #endif } /* * Only pass dependency arguments to the preprocesor since Intel's C++ * compiler doesn't produce a correct .d file when compiling preprocessed * source. */ args_extend(*preprocessor_args, dep_args); out: args_free(stripped_args); args_free(dep_args); args_free(h_args); return result; }
/** * The same than main, but no call to setBufferMode. */ int main_BuildKrMwuDic(int argc,char* const argv[]) { if (argc==1) { usage(); return 0; } int val,index=-1; char output[FILENAME_MAX]=""; char inflection_dir[FILENAME_MAX]=""; char alphabet[FILENAME_MAX]=""; char dic_bin[FILENAME_MAX]=""; char dic_inf[FILENAME_MAX]=""; Encoding encoding_output = DEFAULT_ENCODING_OUTPUT; int bom_output = DEFAULT_BOM_OUTPUT; int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT; struct OptVars* vars=new_OptVars(); while (EOF!=(val=getopt_long_TS(argc,argv,optstring_BuildKrMwuDic,lopts_BuildKrMwuDic,&index,vars))) { switch(val) { case 'o': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty output file name\n"); } strcpy(output,vars->optarg); break; case 'd': if (vars->optarg[0]=='\0') { fatal_error("Empty inflection directory\n"); } strcpy(inflection_dir,vars->optarg); break; case 'a': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty alphabet file name\n"); } strcpy(alphabet,vars->optarg); break; case 'b': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty binary dictionary name\n"); } strcpy(dic_bin,vars->optarg); remove_extension(dic_bin,dic_inf); strcat(dic_inf,".inf"); break; case 'h': usage(); return 0; case ':': if (index==-1) fatal_error("Missing argument for option -%c\n",vars->optopt); else fatal_error("Missing argument for option --%s\n",lopts_BuildKrMwuDic[index].name); case '?': if (index==-1) fatal_error("Invalid option -%c\n",vars->optopt); else fatal_error("Invalid option --%s\n",vars->optarg); break; case 'k': if (vars->optarg[0]=='\0') { fatal_error("Empty input_encoding argument\n"); } decode_reading_encoding_parameter(&mask_encoding_compatibility_input,vars->optarg); break; case 'q': if (vars->optarg[0]=='\0') { fatal_error("Empty output_encoding argument\n"); } decode_writing_encoding_parameter(&encoding_output,&bom_output,vars->optarg); break; } index=-1; } if (vars->optind!=argc-1) { fatal_error("Invalid arguments: rerun with --help\n"); } if (output[0]=='\0') { fatal_error("Output file must be specified\n"); } if (inflection_dir[0]=='\0') { fatal_error("Inflection directory must be specified\n"); } if (alphabet[0]=='\0') { fatal_error("Alphabet file must be specified\n"); } if (dic_bin[0]=='\0') { fatal_error("Binary dictionary must be specified\n"); } U_FILE* delas=u_fopen_existing_versatile_encoding(mask_encoding_compatibility_input,argv[vars->optind],U_READ); if (delas==NULL) { fatal_error("Cannot open %s\n",argv[vars->optind]); } U_FILE* grf=u_fopen_existing_versatile_encoding(mask_encoding_compatibility_input,output,U_WRITE); if (grf==NULL) { fatal_error("Cannot open %s\n",output); } Alphabet* alph=load_alphabet(alphabet,1); if (alph==NULL) { fatal_error("Cannot open alphabet file %s\n",alphabet); } Korean* korean=new Korean(alph); MultiFlex_ctx* multiFlex_ctx = (MultiFlex_ctx*)malloc(sizeof(MultiFlex_ctx)); if (multiFlex_ctx==NULL) { fatal_alloc_error("main_BuildKrMwuDic"); } strcpy(multiFlex_ctx->inflection_directory,inflection_dir); if (init_transducer_tree(multiFlex_ctx)) { fatal_error("init_transducer_tree error\n"); } struct l_morpho_t* pL_MORPHO=init_langage_morph(); if (pL_MORPHO == NULL) { fatal_error("init_langage_morph error\n"); } unsigned char* bin=load_BIN_file(dic_bin); struct INF_codes* inf=load_INF_file(dic_inf); create_mwu_dictionary(delas,grf,multiFlex_ctx,korean,pL_MORPHO,encoding_output, bom_output,mask_encoding_compatibility_input,bin,inf); free(bin); free_INF_codes(inf); u_fclose(delas); u_fclose(grf); free_alphabet(alph); delete korean; free_transducer_tree(multiFlex_ctx); for (int count_free_fst2=0;count_free_fst2<multiFlex_ctx->n_fst2;count_free_fst2++) { free_abstract_Fst2(multiFlex_ctx->fst2[count_free_fst2],&(multiFlex_ctx->fst2_free[count_free_fst2])); multiFlex_ctx->fst2[count_free_fst2]=NULL; } free_language_morpho(pL_MORPHO); free(multiFlex_ctx); free_OptVars(vars); u_printf("Done.\n"); return 0; }
/* * Process the compiler options into options suitable for passing to the * preprocessor and the real compiler. The preprocessor options don't include * -E; this is added later. Returns true on success, otherwise false. */ bool armcc_process_args(struct args *orig_args, struct args **preprocessor_args, struct args **compiler_args) { int i; bool found_c_opt = false; bool found_S_opt = false; bool found_pch = false; bool found_fpch_preprocess = false; const char *actual_language; /* Language to actually use. */ const char *input_charset = NULL; struct stat st; /* is the dependency makefile name overridden with --depend? */ bool dependency_filename_specified = false; /* is the dependency makefile target name specified ? */ bool dependency_target_specified = false; char *dep_file = NULL, *dep_dir = NULL; struct args *stripped_args = NULL, *dep_args = NULL; int argc = orig_args->argc; char **argv = orig_args->argv; bool result = true; /* 0: Choose preprocessor type by the file extension. * 1: Use c preprocessor. * 2: Use c++ preprocessor.*/ unsigned force_preprocessor_type = 0; stripped_args = args_init(0, NULL); dep_args = args_init(0, NULL); args_add(stripped_args, argv[0]); for (i = 1; i < argc; i++) { /* The user knows best: just swallow the next arg */ if (str_eq(argv[i], "--ccache-skip")) { i++; if (i == argc) { cc_log("--ccache-skip lacks an argument"); result = false; goto out; } args_add(stripped_args, argv[i]); continue; } /* Special case for -E. */ if (str_eq(argv[i], "-E")) { stats_update(STATS_PREPROCESSING); result = false; goto out; } /* These are always too hard. */ if (compopt_too_hard(argv[i]) || str_startswith(argv[i], "@") || str_startswith(argv[i], "-fdump-")) { cc_log("Compiler option %s is unsupported", argv[i]); stats_update(STATS_UNSUPPORTED); result = false; goto out; } /* These are too hard in direct mode. */ if (enable_direct) { if (compopt_too_hard_for_direct_mode(argv[i])) { cc_log("Unsupported compiler option for direct mode: %s", argv[i]); enable_direct = false; } } /* we must have -c */ if (str_eq(argv[i], "-c")) { args_add(stripped_args, argv[i]); found_c_opt = true; continue; } /* -S changes the default extension */ if (str_eq(argv[i], "-S")) { args_add(stripped_args, argv[i]); found_S_opt = true; continue; } /* we need to work out where the output was meant to go */ if (str_eq(argv[i], "-o")) { if (i == argc-1) { cc_log("Missing argument to %s", argv[i]); stats_update(STATS_ARGS); result = false; goto out; } output_obj = argv[i+1]; i++; continue; } /* alternate form of -o, with no space */ if (str_startswith(argv[i], "-o")) { output_obj = &argv[i][2]; continue; } /* If multiple source type options are there, the armcc will use the last one. */ if (str_eq(argv[i], "--cpp")) { force_preprocessor_type = 2; continue; } else if (str_eq(argv[i], "--c90") || str_eq(argv[i], "--c99")) { force_preprocessor_type = 1; continue; } if (str_eq(argv[i], "--md")) { generating_dependencies = true; continue; } /* The rvct started supporting --depend_target from 4.0. * And there is a bug when using -E and --depend together with rvct which version is earlier than 4.0_697. * That is too hard to support "--depend" for the earlier version of rvct.*/ if (str_startswith(argv[i], "--depend_dir")) { /* We just concat the dir and the filename and pass the result * to --depend. */ if (i >= argc - 1) { cc_log("Missing argument to %s", argv[i]); stats_update(STATS_ARGS); result = false; goto out; } dep_dir= x_strdup(argv[i+1]); i++; continue; } else if (str_startswith(argv[i], "--depend_target")) { if (i >= argc - 1) { cc_log("Missing argument to %s", argv[i]); stats_update(STATS_ARGS); result = false; goto out; } dependency_target_specified = true; args_add(dep_args, argv[i]); args_add(dep_args, argv[i+1]); i++; continue; } else if (str_startswith(argv[i], "--depend")) { dependency_filename_specified = true; generating_dependencies = true; if (i >= argc - 1) { cc_log("Missing argument to %s", argv[i]); stats_update(STATS_ARGS); result = false; goto out; } dep_file = x_strdup(argv[i + 1]); i++; continue; } /* * Options taking an argument that that we may want to rewrite * to relative paths to get better hit rate. A secondary effect * is that paths in the standard error output produced by the * compiler will be normalized. */ if (compopt_takes_path(argv[i])) { char *relpath; char *pchpath; if (i == argc-1) { cc_log("Missing argument to %s", argv[i]); stats_update(STATS_ARGS); result = false; goto out; } args_add(stripped_args, argv[i]); relpath = make_relative_path(x_strdup(argv[i+1])); args_add(stripped_args, relpath); /* Try to be smart about detecting precompiled headers */ pchpath = format("%s.gch", argv[i+1]); if (stat(pchpath, &st) == 0) { cc_log("Detected use of precompiled header: %s", pchpath); found_pch = true; } free(pchpath); free(relpath); i++; continue; } /* Same as above but options with concatenated argument. */ if (compopt_short(compopt_takes_path, argv[i])) { char *relpath; char *option; relpath = make_relative_path(x_strdup(argv[i] + 2)); option = format("-%c%s", argv[i][1], relpath); args_add(stripped_args, option); free(relpath); free(option); continue; } /* options that take an argument */ if (compopt_takes_arg(argv[i])) { if (i == argc-1) { cc_log("Missing argument to %s", argv[i]); stats_update(STATS_ARGS); result = false; goto out; } args_add(stripped_args, argv[i]); args_add(stripped_args, argv[i+1]); i++; continue; } /* other options */ if (argv[i][0] == '-') { args_add(stripped_args, argv[i]); continue; } /* if an argument isn't a plain file then assume its an option, not an input file. This allows us to cope better with unusual compiler options */ if (stat(argv[i], &st) != 0 || !S_ISREG(st.st_mode)) { cc_log("%s is not a regular file, not considering as input file", argv[i]); args_add(stripped_args, argv[i]); continue; } if (input_file) { if (language_for_file(argv[i])) { cc_log("Multiple input files: %s and %s", input_file, argv[i]); stats_update(STATS_MULTIPLE); } else if (!found_c_opt) { cc_log("Called for link with %s", argv[i]); if (strstr(argv[i], "conftest.")) { stats_update(STATS_CONFTEST); } else { stats_update(STATS_LINK); } } else { cc_log("Unsupported source extension: %s", argv[i]); stats_update(STATS_SOURCELANG); } result = false; goto out; } /* Rewrite to relative to increase hit rate. */ input_file = make_relative_path(x_strdup(argv[i])); } if (!input_file) { cc_log("No input file found"); stats_update(STATS_NOINPUT); result = false; goto out; } if (found_pch || found_fpch_preprocess) { using_precompiled_header = true; if (!(sloppiness & SLOPPY_TIME_MACROS)) { cc_log("You have to specify \"time_macros\" sloppiness when using" " precompiled headers to get direct hits"); cc_log("Disabling direct mode"); stats_update(STATS_CANTUSEPCH); result = false; goto out; } } if(force_preprocessor_type == 0) { actual_language = language_for_file(input_file); } else if(force_preprocessor_type == 2) { actual_language = "c++"; } else { actual_language = "c"; } output_is_precompiled_header = actual_language && strstr(actual_language, "-header") != NULL; if (!found_c_opt && !output_is_precompiled_header) { cc_log("No -c option found"); /* I find that having a separate statistic for autoconf tests is useful, as they are the dominant form of "called for link" in many cases */ if (strstr(input_file, "conftest.")) { stats_update(STATS_CONFTEST); } else { stats_update(STATS_LINK); } result = false; goto out; } if (!actual_language) { cc_log("Unsupported source extension: %s", input_file); stats_update(STATS_SOURCELANG); result = false; goto out; } direct_i_file = language_is_preprocessed(actual_language); if (output_is_precompiled_header) { /* It doesn't work to create the .gch from preprocessed source. */ cc_log("Creating precompiled header; not compiling preprocessed code"); compile_preprocessed_source_code = false; } /* don't try to second guess the compilers heuristics for stdout handling */ if (output_obj && str_eq(output_obj, "-")) { stats_update(STATS_OUTSTDOUT); cc_log("Output file is -"); result = false; goto out; } if (!output_obj) { if (output_is_precompiled_header) { output_obj = format("%s.gch", input_file); } else { char *p; output_obj = x_strdup(input_file); if ((p = strrchr(output_obj, '/'))) { output_obj = p+1; } p = strrchr(output_obj, '.'); if (!p || !p[1]) { cc_log("Badly formed object filename"); stats_update(STATS_ARGS); result = false; goto out; } p[1] = found_S_opt ? 's' : 'o'; p[2] = 0; } } /* cope with -o /dev/null */ if (!str_eq(output_obj,"/dev/null") && stat(output_obj, &st) == 0 && !S_ISREG(st.st_mode)) { cc_log("Not a regular file: %s", output_obj); stats_update(STATS_DEVICE); result = false; goto out; } /* * Some options shouldn't be passed to the real compiler when it compiles * preprocessed code: * * -finput-charset=XXX (otherwise conversion happens twice) * -x XXX (otherwise the wrong language is selected) */ *preprocessor_args = args_copy(stripped_args); if (input_charset) { args_add(*preprocessor_args, input_charset); } if (found_pch) { args_add(*preprocessor_args, "-fpch-preprocess"); } /* * Add flags for dependency generation only to the preprocessor command line. */ if (generating_dependencies) { char *dep_path; if(!dependency_filename_specified) { char *base_name; base_name = remove_extension(output_obj); dep_file = format("%s.d", base_name); free(base_name); } if (!dependency_target_specified) { args_add(dep_args, "--depend_target"); args_add(dep_args, output_obj); } free(output_dep); if(dep_dir) { #ifdef _WIN32 dep_path = make_relative_path(format("%s\\%s", dep_dir, dep_file)); #else dep_path = make_relative_path(format("%s/%s", dep_dir, dep_file)); #endif } else { dep_path = x_strdup(dep_file); } args_add(dep_args, "--depend"); args_add(dep_args, dep_path); /* dep_path will be free in make_relative_path */ output_dep = make_relative_path(x_strdup(dep_path)); } if (compile_preprocessed_source_code) { *compiler_args = args_copy(stripped_args); } else { *compiler_args = args_copy(*preprocessor_args); } i_extension = getenv("CCACHE_EXTENSION"); if (!i_extension) { const char *p_language = p_language_for_language(actual_language); i_extension = extension_for_language(p_language) + 1; } /* Patch for preprocessed file extension for armcc 3.1. * armcc 3.1 cannot recognize "i" or "ii" as the preprocessed source file * without --compile_all_input. */ args_add(*compiler_args, "--compile_all_input"); if (str_eq(i_extension, "ii")) { args_add(*compiler_args, "--cpp"); } /* * Only pass dependency arguments to the preprocesor since Intel's C++ * compiler doesn't produce a correct .d file when compiling preprocessed * source. */ args_extend(*preprocessor_args, dep_args); out: free(dep_file); free(dep_dir); args_free(stripped_args); args_free(dep_args); return result; }
int main_TEI2Txt(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } char output[FILENAME_MAX]=""; VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_TEI2Txt,lopts_TEI2Txt,&index))) { switch(val) { case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output file name\n"); return USAGE_ERROR_CODE; } strcpy(output,options.vars()->optarg); break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_TEI2Txt[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } if(output[0]=='\0') { remove_extension(argv[options.vars()->optind],output); strcat(output,".txt"); } int return_value = tei2txt(argv[options.vars()->optind],output,&vec); return return_value; }
int do_init(int argc, char** argv) { FILE *list; char line[1024]; struct map_data map; char name[MAP_NAME_LENGTH_EXT]; grf_list_file = aStrdup("conf/grf-files.txt"); map_list_file = aStrdup("db/map_index.txt"); /* setup pre-defined, #define-dependant */ map_cache_file = aStrdup("db/map_cache.dat"); cmdline->exec(argc, argv, CMDLINE_OPT_PREINIT); cmdline->exec(argc, argv, CMDLINE_OPT_NORMAL); ShowStatus("Initializing grfio with %s\n", grf_list_file); grfio_init(grf_list_file); // Attempt to open the map cache file and force rebuild if not found ShowStatus("Opening map cache: %s\n", map_cache_file); if(!rebuild) { map_cache_fp = fopen(map_cache_file, "rb"); if(map_cache_fp == NULL) { ShowNotice("Existing map cache not found, forcing rebuild mode\n"); rebuild = 1; } else fclose(map_cache_fp); } if(rebuild) map_cache_fp = fopen(map_cache_file, "w+b"); else map_cache_fp = fopen(map_cache_file, "r+b"); if(map_cache_fp == NULL) { ShowError("Failure when opening map cache file %s\n", map_cache_file); exit(EXIT_FAILURE); } // Open the map list ShowStatus("Opening map list: %s\n", map_list_file); list = fopen(map_list_file, "r"); if(list == NULL) { ShowError("Failure when opening maps list file %s\n", map_list_file); exit(EXIT_FAILURE); } // Initialize the main header if(rebuild) { header.file_size = sizeof(struct main_header); header.map_count = 0; } else { if(fread(&header, sizeof(struct main_header), 1, map_cache_fp) != 1){ printf("An error as occured while reading map_cache_fp \n"); } header.file_size = GetULong((unsigned char *)&(header.file_size)); header.map_count = GetUShort((unsigned char *)&(header.map_count)); } // Read and process the map list while(fgets(line, sizeof(line), list)) { if(line[0] == '/' && line[1] == '/') continue; if(sscanf(line, "%15s", name) < 1) continue; if(strcmp("map:", name) == 0 && sscanf(line, "%*s %15s", name) < 1) continue; name[MAP_NAME_LENGTH_EXT-1] = '\0'; remove_extension(name); if (find_map(name)) { ShowInfo("Map '"CL_WHITE"%s"CL_RESET"' already in cache.\n", name); } else if(!read_map(name, &map)) { ShowError("Map '"CL_WHITE"%s"CL_RESET"' not found!\n", name); } else if (!cache_map(name, &map)) { ShowError("Map '"CL_WHITE"%s"CL_RESET"' failed to cache (write error).\n", name); } else { ShowInfo("Map '"CL_WHITE"%s"CL_RESET"' successfully cached.\n", name); } } ShowStatus("Closing map list: %s\n", map_list_file); fclose(list); // Write the main header and close the map cache ShowStatus("Closing map cache: %s\n", map_cache_file); fseek(map_cache_fp, 0, SEEK_SET); fwrite(&header, sizeof(struct main_header), 1, map_cache_fp); fclose(map_cache_fp); ShowStatus("Finalizing grfio\n"); grfio_final(); ShowInfo("%d maps now in cache\n", header.map_count); aFree(grf_list_file); aFree(map_list_file); aFree(map_cache_file); return 0; }
void options_screen() { /* options are */ /* 17 Lines -7 -1025 0 New Progs ON OFF SAME 1 New Accs ON OFF SAME 2 Auto Window PAGING SCROLLING 3 Accs Window PAGING SCROLLING 4 Sets Window PAGING SCROLLING 5 Other Window PAGING SCROLLING 6 Auto Path -------------------------------------- 7 Accs Path -------------------------------------- 8 <Set Keys> 9 <Resolution Info> 10 [ OK ] 11[ CANCEL ] 12[ SAVE ] */ #define MAX_OPTIONS 12 int options_y, options_x, curr_option, old_option, exit_options; long key; int upk,lowk; /*char temp[60]; */ /* local defaults */ int lnew_progs_flag; /* 1 = ON, 2 = OFF, 3 = SAME */ int lnew_accs_flag; int lauto_paging; /* 1 = paging, 2 = scrolling */ int laccs_paging; int lsets_paging; int lother_paging; char lauto_path[FILENAME_MAX]; char laccs_path[FILENAME_MAX]; curr_option=0; old_option=11; lnew_progs_flag=prog_defaults.new_progs_flag; lnew_accs_flag=prog_defaults.new_accs_flag; lauto_paging=prog_defaults.auto_paging; laccs_paging=prog_defaults.accs_paging; lsets_paging=prog_defaults.sets_paging; lother_paging=prog_defaults.other_paging; strcpy(lauto_path,prog_defaults.auto_path); strcpy(laccs_path,prog_defaults.accs_path); options_y=prog_defaults.rows-3-17+1; if(options_y>0) options_y=options_y/2; else options_y=1; options_x=prog_defaults.columns-40; if(options_x>0) options_x=options_x/2; else options_x=0; CLEAR_SCREEN centre_text("OPTIONS",line_buffer,prog_defaults.columns,' '); DISCARD_EOL AT(title_row,0) REVERSE_VIDEO printf("%s",line_buffer); NORMAL_VIDEO options_bottom_title(); AT(options_y,options_x+1) printf("New Progs"); print_on_off_same(options_y,options_x+1,lnew_progs_flag); AT(options_y+1,options_x+1) printf("New Accs"); print_on_off_same(options_y+1,options_x+1,lnew_accs_flag); AT(options_y+3,options_x+1) printf("Auto Window"); print_paging_scrolling(options_y+3,options_x+1,lauto_paging); AT(options_y+4,options_x+1) printf("Accs Window"); print_paging_scrolling(options_y+4,options_x+1,laccs_paging); AT(options_y+5,options_x+1) printf("Sets Window"); print_paging_scrolling(options_y+5,options_x+1,lsets_paging); AT(options_y+6,options_x+1) printf("Other Window"); print_paging_scrolling(options_y+6,options_x+1,lother_paging); remove_extension(lauto_path); AT(options_y+8,options_x+1) printf("Auto Path"); /* AT(options_y+9,options_x+1) printf("--------------------------------------"); */ AT(options_y+9,options_x+1) printf("%s",lauto_path); remove_extension(laccs_path); AT(options_y+11,options_x+1) printf("Accs Path"); /* AT(options_y+12,options_x+1) printf("--------------------------------------"); */ AT(options_y+12,options_x+1) printf("%s",laccs_path); AT(options_y+14,options_x+1) printf("<Set Keys>"); AT(options_y+14,options_x+22) printf("<Resolution Info>"); AT(options_y+16,options_x+3) printf("[ OK ]"); AT(options_y+16,options_x+15) printf("[ CANCEL ]"); AT(options_y+16,options_x+27) printf("[ SAVE ]"); upk=0;lowk=0;exit_options=0; print_option(options_y,options_x,old_option, curr_option); while(exit_options==0) { while(!Bconstat(2)) ; key=Bconin(2); upk=(int)(key>>16); lowk=(int)(key%256); if(upk==0x48 ) /* up arrow */ { old_option=curr_option; switch(curr_option) { case 11: case 10: curr_option=8; break; case 12: curr_option=9; break; case 9: curr_option=7; break; default: curr_option=curr_option-1; break; } if(curr_option<0) curr_option=MAX_OPTIONS; print_option(options_y,options_x,old_option, curr_option); } else { if(upk==0x50) /* down arrow */ { old_option=curr_option; switch(curr_option) { case 8: curr_option=10; break; case 9: curr_option =12; break; case 10: case 11: case 12: curr_option=0; break; default: curr_option=curr_option+1; break; } if(curr_option>MAX_OPTIONS) curr_option=0; print_option(options_y,options_x,old_option, curr_option); } else { if(upk==0x4D) /* right arrow */ { old_option=curr_option; switch(curr_option) { case 0: lnew_progs_flag++; if(lnew_progs_flag>3) lnew_progs_flag=1; print_on_off_same(options_y,options_x+1,lnew_progs_flag); break; case 1: lnew_accs_flag++; if(lnew_accs_flag>3) lnew_accs_flag=1; print_on_off_same(options_y+1,options_x+1,lnew_accs_flag); break; case 2: lauto_paging++; if(lauto_paging>SCROLLING) lauto_paging=PAGING; print_paging_scrolling(options_y+3,options_x+1,lauto_paging); break; case 3: laccs_paging++; if(laccs_paging>SCROLLING) laccs_paging=PAGING; print_paging_scrolling(options_y+4,options_x+1,laccs_paging); break; case 4: lsets_paging++; if(lsets_paging>SCROLLING) lsets_paging=PAGING; print_paging_scrolling(options_y+5,options_x+1,lsets_paging); break; case 5: lother_paging++; if(lother_paging>SCROLLING) lother_paging=PAGING; print_paging_scrolling(options_y+6,options_x+1,lother_paging); break; case 8: case 10: case 11: curr_option++; break; } if(curr_option>MAX_OPTIONS) curr_option=0; print_option(options_y,options_x,old_option, curr_option); } else { if(upk==0x4B) /* left arrow */ { old_option=curr_option; switch(curr_option) { case 0: lnew_progs_flag--; if(lnew_progs_flag<1) lnew_progs_flag=3; print_on_off_same(options_y,options_x+1,lnew_progs_flag); break; case 1: lnew_accs_flag--; if(lnew_accs_flag<1) lnew_accs_flag=3; print_on_off_same(options_y+1,options_x+1,lnew_accs_flag); break; case 2: lauto_paging--; if(lauto_paging<PAGING) lauto_paging=SCROLLING; print_paging_scrolling(options_y+3,options_x+1,lauto_paging); break; case 3: laccs_paging--; if(laccs_paging<PAGING) laccs_paging=SCROLLING; print_paging_scrolling(options_y+4,options_x+1,laccs_paging); break; case 4: lsets_paging--; if(lsets_paging<PAGING) lsets_paging=SCROLLING; print_paging_scrolling(options_y+5,options_x+1,lsets_paging); break; case 5: lother_paging--; if(lother_paging<PAGING) lother_paging=SCROLLING; print_paging_scrolling(options_y+6,options_x+1,lother_paging); break; case 9: case 11: case 12: curr_option--; break; } if(curr_option<0) curr_option=MAX_OPTIONS; print_option(options_y,options_x,old_option, curr_option); } else { if(upk==0x1C) /* return */ { switch(curr_option) { case 6: edit_at(options_y+9,options_x+1,FILENAME_MAX,37,lauto_path,'-',0,valid_filename_char,1); options_bottom_title(); break; case 7: edit_at(options_y+12,options_x+1,FILENAME_MAX,37,laccs_path,'-',0,valid_filename_char,1); options_bottom_title(); break; case 8: /* set keys */ break; case 9: /* set res stuff */ break; case 10: /* set defaults */ prog_defaults.new_progs_flag=lnew_progs_flag; prog_defaults.new_accs_flag=lnew_accs_flag; prog_defaults.auto_paging=lauto_paging; prog_defaults.accs_paging=laccs_paging; prog_defaults.sets_paging=lsets_paging; prog_defaults.other_paging=lother_paging; strcpy(prog_defaults.auto_path,lauto_path); strcpy(prog_defaults.accs_path,laccs_path); exit_options=1; break; case 11: exit_options=1; break; case 12: /* save defaults */ break; } } else { if(upk==prog_defaults.toggle_key_high) { switch(curr_option) { case 0: lnew_progs_flag++; if(lnew_progs_flag>3) lnew_progs_flag=1; print_on_off_same(options_y,options_x+1,lnew_progs_flag); break; case 1: lnew_accs_flag++; if(lnew_accs_flag>3) lnew_accs_flag=1; print_on_off_same(options_y+1,options_x+1,lnew_accs_flag); break; case 2: lauto_paging++; if(lauto_paging>SCROLLING) lauto_paging=PAGING; print_paging_scrolling(options_y+3,options_x+1,lauto_paging); break; case 3: laccs_paging++; if(laccs_paging>SCROLLING) laccs_paging=PAGING; print_paging_scrolling(options_y+4,options_x+1,laccs_paging); break; case 4: lsets_paging++; if(lsets_paging>SCROLLING) lsets_paging=PAGING; print_paging_scrolling(options_y+5,options_x+1,lsets_paging); break; case 5: lother_paging++; if(lother_paging>SCROLLING) lother_paging=PAGING; print_paging_scrolling(options_y+6,options_x+1,lother_paging); break; } } } } } } } } }
/** * This function reads a file that contains a list of Elag grammar names, * and it compiles them into the file 'outname'. However, if the result * automaton is too big, it will be saved in several automata inside * the output file. */ int compile_elag_rules(char* rulesname,char* outname, const VersatileEncodingConfig* vec,language_t* language) { u_printf("Compilation of %s\n",rulesname); U_FILE* f=NULL; U_FILE* frules=u_fopen(ASCII,rulesname,U_READ); if (frules==NULL) { fatal_error("Cannot open file '%s'\n",rulesname); } U_FILE* out=u_fopen(ASCII,outname,U_WRITE); if (out==NULL) { fatal_error("cannot open file '%s'\n",outname); } /* Name of the file that contains the result automaton */ char fstoutname[FILENAME_MAX]; int nbRules=0; char buf[FILENAME_MAX]; time_t start_time=time(0); Fst2Automaton* res=NULL; Fst2Automaton* A; int fst_number=0; Ustring* ustr=new_Ustring(); char buf2[FILENAME_MAX]; char directory[FILENAME_MAX]; get_path(rulesname,directory); while (af_fgets(buf,FILENAME_MAX,frules->f)) { /* We read one by one the Elag grammar names in the .lst file */ chomp(buf); if (*buf=='\0') { /* If we have an empty line */ continue; } if (!is_absolute_path(buf)) { strcpy(buf2,buf); sprintf(buf,"%s%s",directory,buf2); } u_printf("\n%s...\n",buf); remove_extension(buf); strcat(buf,".elg"); if ((f=u_fopen(ASCII,buf,U_READ))==NULL) { /* If the .elg file doesn't exist, we create one */ remove_extension(buf); u_printf("Precompiling %s.fst2\n",buf); strcat(buf,".fst2"); elRule* rule=new_elRule(buf,vec,language); if (rule==NULL) { fatal_error("Unable to read grammar '%s'\n",buf); } if ((A=compile_elag_rule(rule,language))==NULL) { fatal_error("Unable to compile rule '%s'\n",buf); } free_elRule(rule); } else { /* If there is already .elg, we use it */ u_fclose(f); A=load_elag_grammar_automaton(vec,buf,language); if (A==NULL) { fatal_error("Unable to load '%s'\n",buf); } } if (A->automaton->number_of_states==0) { error("Grammar %s forbids everything!\n",buf); } if (res!=NULL) { /* If there is already an automaton, we intersect it with the new one */ SingleGraph tmp=res->automaton; res->automaton=elag_intersection(language,tmp,A->automaton,GRAMMAR_GRAMMAR); free_SingleGraph(tmp,NULL); free_Fst2Automaton(A,NULL); trim(res->automaton,NULL); } else { res=A; } nbRules++; if (res->automaton->number_of_states>MAX_GRAM_SIZE) { /* If the automaton is too large, we will split the grammar * into several automata */ elag_minimize(res->automaton,1); sprintf(fstoutname,"%s-%d.elg",outname,fst_number++); u_fprintf(out,"<%s>\n",fstoutname); u_printf("Splitting big grammar in '%s' (%d states)\n",fstoutname,res->automaton->number_of_states); u_sprintf(ustr,"%s: compiled elag grammar",fstoutname); free(res->name); res->name=u_strdup(ustr->str); save_automaton(res,fstoutname,vec,FST_GRAMMAR); free_Fst2Automaton(res,NULL); res=NULL; } } if (res!=NULL) { /* We save the last automaton, if any */ sprintf(fstoutname,"%s-%d.elg",outname,fst_number++); u_fprintf(out,"<%s>\n",fstoutname); u_printf("Saving grammar in '%s'(%d states)\n",fstoutname,res->automaton->number_of_states); elag_minimize(res->automaton,1); u_sprintf(ustr,"%s: compiled elag grammar",fstoutname); free(res->name); res->name=u_strdup(ustr->str); save_automaton(res,fstoutname,vec,FST_GRAMMAR); free_Fst2Automaton(res,free_symbol); } time_t end_time=time(0); u_fclose(frules); u_fclose(out); free_Ustring(ustr); u_printf("\nDone.\nElapsed time: %.0f s\n",difftime(end_time,start_time)); u_printf("\n%d rule%s from %s compiled in %s (%d automat%s)\n", nbRules,(nbRules>1)?"s":"",rulesname,outname,fst_number, (fst_number>1)?"a":"on"); return 0; }
/** * The same than main, but no call to setBufferMode. */ int main_Fst2Check(int argc,char* const argv[]) { if (argc==1) { usage(); return 0; } int check_recursion=0,tfst_check=0; int append_output=0; int display_statistics=0; char no_empty_graph_warning=0; char output[FILENAME_MAX]=""; Encoding encoding_output = DEFAULT_ENCODING_OUTPUT; int bom_output = DEFAULT_BOM_OUTPUT; int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT; int val,index=-1; struct OptVars* vars=new_OptVars(); while (EOF!=(val=getopt_long_TS(argc,argv,optstring_Fst2Check,lopts_Fst2Check,&index,vars))) { switch(val) { case 'a': append_output=1; break; case 'y': check_recursion=1; break; case 'n': check_recursion=0; break; case 's': display_statistics=1; break; case 't': tfst_check=1; /* If we have a tfst sentence graph, we must not report * compilation failure in the case of an empty graph. It * may be because of a sentence graph previously emptied by ELAG */ no_empty_graph_warning=1; break; case 'e': no_empty_graph_warning=1; break; case 'h': usage(); free_OptVars(vars); return 0; case ':': if (index==-1) fatal_error("Missing argument for option -%c\n",vars->optopt); else fatal_error("Missing argument for option --%s\n",lopts_Fst2Check[index].name); case 'k': if (vars->optarg[0]=='\0') { fatal_error("Empty input_encoding argument\n"); } decode_reading_encoding_parameter(&mask_encoding_compatibility_input,vars->optarg); break; case 'q': if (vars->optarg[0]=='\0') { fatal_error("Empty output_encoding argument\n"); } decode_writing_encoding_parameter(&encoding_output,&bom_output,vars->optarg); break; case 'o': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty output file name\n"); } strcpy(output,vars->optarg); break; case '?': if (index==-1) fatal_error("Invalid option -%c\n",vars->optopt); else fatal_error("Invalid option --%s\n",vars->optarg); break; } index=-1; } if (vars->optind!=argc-1) { free_OptVars(vars); error("Invalid arguments: rerun with --help\n"); return 1; } char fst2_file_name[FILENAME_MAX]; remove_extension(argv[vars->optind],fst2_file_name); strcpy(fst2_file_name,argv[vars->optind]); U_FILE* ferr=NULL; free_OptVars(vars); if (output[0]!=0) { if (append_output == 0) { ferr=u_fopen_creating_versatile_encoding(encoding_output,bom_output, output, U_WRITE); } else { ferr = u_fopen_versatile_encoding(encoding_output,bom_output,mask_encoding_compatibility_input | ALL_ENCODING_BOM_POSSIBLE,output,U_APPEND); } } if (display_statistics) { display_fst2_file_stat(fst2_file_name,ferr); } if (check_recursion) { if (!OK_for_Locate_write_error(fst2_file_name,no_empty_graph_warning,ferr)) { if (ferr != NULL) { u_fclose(ferr); } return 1; } } if (tfst_check) { if (!valid_sentence_automaton_write_error(fst2_file_name,ferr)) { if (ferr != NULL) { u_fclose(ferr); } return 1; } } if (ferr != NULL) { u_fclose(ferr); } if ((check_recursion) || (tfst_check)) { u_printf("%s fst2 check has succeeded\n",fst2_file_name); } return 0; }