int main_Reg2Grf(int argc,char* const argv[]) { if (argc==1) { usage(); return 0; } Encoding encoding_output = DEFAULT_ENCODING_OUTPUT; int bom_output = DEFAULT_BOM_OUTPUT; int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT; int val,index=-1; struct OptVars* vars=new_OptVars(); while (EOF!=(val=getopt_long_TS(argc,argv,optstring_Reg2Grf,lopts_Reg2Grf,&index,vars))) { switch(val) { case 'k': if (vars->optarg[0]=='\0') { fatal_error("Empty input_encoding argument\n"); } decode_reading_encoding_parameter(&mask_encoding_compatibility_input,vars->optarg); break; case 'q': if (vars->optarg[0]=='\0') { fatal_error("Empty output_encoding argument\n"); } decode_writing_encoding_parameter(&encoding_output,&bom_output,vars->optarg); break; case 'h': usage(); return 0; case ':': if (index==-1) fatal_error("Missing argument for option -%c\n",vars->optopt); else fatal_error("Missing argument for option --%s\n",lopts_Reg2Grf[index].name); case '?': if (index==-1) fatal_error("Invalid option -%c\n",vars->optopt); else fatal_error("Invalid option --%s\n",vars->optarg); break; } index=-1; } if (vars->optind!=argc-1) { fatal_error("Invalid arguments: rerun with --help\n"); } U_FILE* f=u_fopen_existing_versatile_encoding(mask_encoding_compatibility_input,argv[vars->optind],U_READ); if (f==NULL) { fatal_error("Cannot open file %s\n",argv[vars->optind]); } /* We read the regular expression in the file */ unichar exp[REG_EXP_MAX_LENGTH]; if ((REG_EXP_MAX_LENGTH-1)==u_fgets(exp,REG_EXP_MAX_LENGTH,f)) { fatal_error("Too long regular expression\n"); } u_fclose(f); char grf_name[FILENAME_MAX]; get_path(argv[vars->optind],grf_name); strcat(grf_name,"regexp.grf"); if (!reg2grf(exp,grf_name,encoding_output,bom_output)) { return 1; } free_OptVars(vars); u_printf("Expression converted.\n"); return 0; }
int main_TEI2Txt(int argc,char* const argv[]) { if (argc==1) { usage(); return 0; } char output[FILENAME_MAX]=""; Encoding encoding_output = DEFAULT_ENCODING_OUTPUT; int bom_output = DEFAULT_BOM_OUTPUT; int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT; int val,index=-1; struct OptVars* vars=new_OptVars(); while (EOF!=(val=getopt_long_TS(argc,argv,optstring_TEI2Txt,lopts_TEI2Txt,&index,vars))) { switch(val) { case 'o': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty output file name\n"); } strcpy(output,vars->optarg); break; case 'k': if (vars->optarg[0]=='\0') { fatal_error("Empty input_encoding argument\n"); } decode_reading_encoding_parameter(&mask_encoding_compatibility_input,vars->optarg); break; case 'q': if (vars->optarg[0]=='\0') { fatal_error("Empty output_encoding argument\n"); } decode_writing_encoding_parameter(&encoding_output,&bom_output,vars->optarg); break; case 'h': usage(); return 0; case ':': if (index==-1) fatal_error("Missing argument for option -%c\n",vars->optopt); else fatal_error("Missing argument for option --%s\n",lopts_TEI2Txt[index].name); case '?': if (index==-1) fatal_error("Invalid option -%c\n",vars->optopt); else fatal_error("Invalid option --%s\n",vars->optarg); break; } index=-1; } if (vars->optind!=argc-1) { fatal_error("Invalid arguments: rerun with --help\n"); } if(output[0]=='\0') { remove_extension(argv[vars->optind],output); strcat(output,".txt"); } tei2txt(argv[vars->optind],output,encoding_output,bom_output); free_OptVars(vars); return 0; }
int main_Normalize(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } int mode=KEEP_CARRIAGE_RETURN; int separator_normalization=1; char rules[FILENAME_MAX]=""; char input_offsets[FILENAME_MAX]=""; char output_offsets[FILENAME_MAX]=""; VersatileEncodingConfig vec=VEC_DEFAULT; int convLFtoCRLF=1; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_Normalize,lopts_Normalize,&index))) { switch(val) { case 'l': convLFtoCRLF=0; break; case 'n': mode=REMOVE_CARRIAGE_RETURN; break; case 'r': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty replacement rule file name\n"); return USAGE_ERROR_CODE; } strcpy(rules,options.vars()->optarg); break; case 1: separator_normalization=0; break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case '$': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty input offset file name\n"); return USAGE_ERROR_CODE; } strcpy(input_offsets,options.vars()->optarg); break; case '@': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output offset file name\n"); return USAGE_ERROR_CODE; } strcpy(output_offsets,options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_Normalize[index].name); return USAGE_ERROR_CODE; break; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; break; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } vector_offset* v_input_offsets=NULL; vector_offset* v_output_offsets=NULL; U_FILE* f_output_offsets=NULL; if (output_offsets[0]!='\0') { /* We deal with offsets only if we have to produce output offsets */ if (input_offsets[0]!='\0') { v_input_offsets=load_offsets(&vec,input_offsets); } f_output_offsets=u_fopen(&vec, output_offsets, U_WRITE); if (f_output_offsets==NULL) { error("Cannot create offset file %s\n",output_offsets); return DEFAULT_ERROR_CODE; } v_output_offsets=new_vector_offset(); } char tmp_file[FILENAME_MAX]; get_extension(argv[options.vars()->optind],tmp_file); if (!strcmp(tmp_file, ".snt")) { /* If the file to process has already the .snt extension, we temporary rename it to * .snt.normalizing */ strcpy(tmp_file,argv[options.vars()->optind]); strcat(tmp_file,".normalizing"); af_rename(argv[options.vars()->optind],tmp_file); } else { strcpy(tmp_file,argv[options.vars()->optind]); } /* We set the destination file */ char dest_file[FILENAME_MAX]; remove_extension(argv[options.vars()->optind],dest_file); strcat(dest_file,".snt"); u_printf("Normalizing %s...\n",argv[options.vars()->optind]); int return_value = normalize(tmp_file, dest_file, &vec, mode, convLFtoCRLF, rules, v_output_offsets, separator_normalization); u_printf("\n"); /* If we have used a temporary file, we delete it */ if (strcmp(tmp_file,argv[options.vars()->optind])) { af_remove(tmp_file); } process_offsets(v_input_offsets,v_output_offsets,f_output_offsets); u_fclose(f_output_offsets); free_vector_offset(v_input_offsets); free_vector_offset(v_output_offsets); u_printf((return_value==SUCCESS_RETURN_CODE) ? "Done.\n" : "Unsuccessfull.\n"); return return_value; }
InstallLogger::InstallLogger(int argc,char* const argv[]) : ule(ule_default_init), init_done(0) { ClearUniLoggerSpaceStruct(0); if (argc==1) { usage(); return; } Encoding encoding_output = DEFAULT_ENCODING_OUTPUT; int bom_output = DEFAULT_BOM_OUTPUT; int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_CreateLog,lopts_CreateLog,&index))) { switch(val) { case 'V': only_verify_arguments = true; break; case 'h': usage(); return; case 'n': ule.store_file_in_content = 0; break; case 'i': ule.store_file_in_content = 1; break; case 'o': ule.store_file_out_content = 1; break; case 'u': ule.store_file_out_content = 0; break; case 's': ule.store_list_file_in_content = 1; break; case 't': ule.store_list_file_in_content = 0; break; case 'r': ule.store_list_file_out_content = 1; break; case 'f': ule.store_list_file_out_content = 0; break; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_CreateLog[index].name); return; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return; } decode_reading_encoding_parameter(&mask_encoding_compatibility_input,options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return; } decode_writing_encoding_parameter(&encoding_output,&bom_output,options.vars()->optarg); break; case 'g': ClearUniLoggerSpaceStruct(1); return; case 'p': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty param file\n"); return; } ClearUniLoggerSpaceStruct(1); LoadParamFile(options.vars()->optarg); return; case 'l': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty log filename\n"); return; } if (ule.szNameLog != NULL) { free((void*)ule.szNameLog); } ule.szNameLog = strdup(options.vars()->optarg); break; case 'd': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty directory\n"); return; } if (ule.szPathLog != NULL) { free((void*)ule.szPathLog); } ule.szPathLog = strdup(options.vars()->optarg); break; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return; } index=-1; } if (options.vars()->optind!=argc-1) { } if (only_verify_arguments) { // freeing all allocated memory return; } if (AddActivityLogger(&ule) != 0) { init_done = 1; } else { ClearUniLoggerSpaceStruct(1); } }
int main_ConcorDiff(int argc,char* const argv[]) { if (argc==1) { usage(); return 0; } int val,index=-1; char* out=NULL; char* font=NULL; int size=0; char foo; Encoding encoding_output = DEFAULT_ENCODING_OUTPUT; int bom_output = DEFAULT_BOM_OUTPUT; int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT; struct OptVars* vars=new_OptVars(); while (EOF!=(val=getopt_long_TS(argc,argv,optstring_ConcorDiff,lopts_ConcorDiff,&index,vars))) { switch(val) { case 'o': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty output file\n"); } out=strdup(vars->optarg); if (out==NULL) { fatal_alloc_error("main_ConcorDiff"); } break; case 'f': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty font name\n"); } font=strdup(vars->optarg); if (font==NULL) { fatal_alloc_error("main_ConcorDiff"); } break; case 's': if (1!=sscanf(vars->optarg,"%d%c",&size,&foo) || size<=0) { /* foo is used to check that the font size is not like "45gjh" */ fatal_error("Invalid font size argument: %s\n",vars->optarg); } break; case 'h': usage(); return 0; case ':': if (index==-1) fatal_error("Missing argument for option -%c\n",vars->optopt); else fatal_error("Missing argument for option --%s\n",lopts_ConcorDiff[index].name); case '?': if (index==-1) fatal_error("Invalid option -%c\n",vars->optopt); else fatal_error("Invalid option --%s\n",vars->optarg); break; case 'k': if (vars->optarg[0]=='\0') { fatal_error("Empty input_encoding argument\n"); } decode_reading_encoding_parameter(&mask_encoding_compatibility_input,vars->optarg); break; case 'q': if (vars->optarg[0]=='\0') { fatal_error("Empty output_encoding argument\n"); } decode_writing_encoding_parameter(&encoding_output,&bom_output,vars->optarg); break; } index=-1; } if (out==NULL) { fatal_error("You must specify the output file\n"); } if (font==NULL) { fatal_error("You must specify the font to use\n"); } if (size==0) { fatal_error("You must specify the font size to use\n"); } if (vars->optind!=argc-2) { error("Invalid arguments: rerun with --help\n"); return 1; } diff(encoding_output,bom_output,mask_encoding_compatibility_input,argv[vars->optind],argv[vars->optind+1],out,font,size); free(out); free(font); free_OptVars(vars); return 0; }
int main_Flatten(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } int RTN=1; int depth=10; VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; char foo; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_Flatten,lopts_Flatten,&index))) { switch(val) { case 'f': RTN=0; break; case 'r': RTN=1; break; case 'd': if (1!=sscanf(options.vars()->optarg,"%d%c",&depth,&foo) || depth<=0) { /* foo is used to check that the depth is not like "45gjh" */ error("Invalid depth argument: %s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_Flatten[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; break; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } u_printf("Loading %s...\n",argv[options.vars()->optind]); struct FST2_free_info fst2_free; Fst2* origin=load_abstract_fst2(&vec,argv[options.vars()->optind],1,&fst2_free); if (origin==NULL) { error("Cannot load %s\n",argv[options.vars()->optind]); return DEFAULT_ERROR_CODE; } char temp[FILENAME_MAX]; strcpy(temp,argv[options.vars()->optind]); strcat(temp,".tmp.fst2"); switch (flatten_fst2(origin,depth,temp,&vec,RTN)) { case EQUIVALENT_FST: u_printf("The resulting grammar is an equivalent finite-state transducer.\n"); break; case APPROXIMATIVE_FST: u_printf("The resulting grammar is a finite-state approximation.\n"); break; case EQUIVALENT_RTN: u_printf("The resulting grammar is an equivalent FST2 (RTN).\n"); break; default: error("Internal state error in Flatten's main\n"); free_abstract_Fst2(origin,&fst2_free); return DEFAULT_ERROR_CODE; } free_abstract_Fst2(origin,&fst2_free); af_remove(argv[options.vars()->optind]); af_rename(temp,argv[options.vars()->optind]); return SUCCESS_RETURN_CODE; }
int main_MultiFlex(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } char output[FILENAME_MAX]=""; char config_dir[FILENAME_MAX]=""; char alphabet[FILENAME_MAX]=""; char pkgdir[FILENAME_MAX]=""; char* named=NULL; int is_korean=0; // default policy is to compile only out of date graphs GraphRecompilationPolicy graph_recompilation_policy = ONLY_OUT_OF_DATE; //Current language's alphabet int error_check_status=SIMPLE_AND_COMPOUND_WORDS; VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_MultiFlex,lopts_MultiFlex,&index))) { switch(val) { case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty DELAF file name\n"); free(named); return USAGE_ERROR_CODE; } strcpy(output,options.vars()->optarg); break; case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet file name\n"); free(named); return USAGE_ERROR_CODE; } strcpy(alphabet,options.vars()->optarg); break; case 'd': strcpy(config_dir,options.vars()->optarg); break; case 'K': is_korean=1; break; case 's': error_check_status=ONLY_SIMPLE_WORDS; break; case 'c': error_check_status=ONLY_COMPOUND_WORDS; break; case 'f': graph_recompilation_policy = ALWAYS_RECOMPILE; break; case 'n': graph_recompilation_policy = NEVER_RECOMPILE; break; case 't': graph_recompilation_policy = ONLY_OUT_OF_DATE; break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); free(named); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); free(named); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'p': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty package directory name\n"); free(named); return USAGE_ERROR_CODE; } strcpy(pkgdir,options.vars()->optarg); break; case 'r': if (named==NULL) { named=strdup(options.vars()->optarg); if (named==NULL) { alloc_error("main_Grf2Fst2"); return ALLOC_ERROR_CODE; } } else { char* more_names = (char*)realloc((void*)named,strlen(named)+strlen(options.vars()->optarg)+2); if (more_names) { named = more_names; } else { alloc_error("main_MultiFlex"); free(named); return ALLOC_ERROR_CODE; } strcat(named,";"); strcat(named,options.vars()->optarg); } break; case 'V': only_verify_arguments = true; break; case 'h': usage(); free(named); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_MultiFlex[index].name); free(named); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); free(named); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); free(named); return USAGE_ERROR_CODE; } if (output[0]=='\0') { error("You must specify the output DELAF name\n"); free(named); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory free(named); return SUCCESS_RETURN_CODE; } //Load morphology description char morphology[FILENAME_MAX]; new_file(config_dir,"Morphology.txt",morphology); //int config_files_status=CONFIG_FILES_OK; Alphabet* alph=NULL; if (alphabet[0]!='\0') { //Load alphabet alph=load_alphabet(&vec,alphabet,1); //To be done once at the beginning of the inflection if (alph==NULL) { error("Cannot open alphabet file %s\n",alphabet); free(named); return DEFAULT_ERROR_CODE; } } //Init equivalence files char equivalences[FILENAME_MAX]; new_file(config_dir,"Equivalences.txt",equivalences); /* Korean */ Korean* korean=NULL; if (is_korean) { if (alph==NULL) { error("Cannot initialize Korean data with a NULL alphabet\n"); free(named); return DEFAULT_ERROR_CODE; } korean=new Korean(alph); } MultiFlex_ctx* p_multiFlex_ctx=new_MultiFlex_ctx(config_dir, morphology, equivalences, &vec, korean, pkgdir, named, graph_recompilation_policy); //DELAC inflection int return_value = inflect(argv[options.vars()->optind],output,p_multiFlex_ctx,alph,error_check_status); free(named); for (int count_free_fst2=0;count_free_fst2<p_multiFlex_ctx->n_fst2;count_free_fst2++) { free_abstract_Fst2(p_multiFlex_ctx->fst2[count_free_fst2],&(p_multiFlex_ctx->fst2_free[count_free_fst2])); p_multiFlex_ctx->fst2[count_free_fst2] = NULL; } free_alphabet(alph); free_MultiFlex_ctx(p_multiFlex_ctx); if (korean!=NULL) { delete korean; } u_printf("Done.\n"); return return_value; }
int main_Untokenize(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } char alphabet[FILENAME_MAX]=""; char token_file[FILENAME_MAX]=""; char dynamicSntDir[FILENAME_MAX]=""; VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; int range_start,range_stop,use_range; int token_step_number=0; range_start=range_stop=use_range=0; char foo=0; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_Untokenize,lopts_Untokenize,&index))) { switch(val) { case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet file name\n"); return USAGE_ERROR_CODE; } strcpy(alphabet,options.vars()->optarg); break; case 'd': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty snt dir name\n"); return USAGE_ERROR_CODE; } strcpy(dynamicSntDir,options.vars()->optarg); break; case 't': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty token file name\n"); return USAGE_ERROR_CODE; } strcpy(token_file,options.vars()->optarg); break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'n': if (1!=sscanf(options.vars()->optarg,"%d%c",&token_step_number,&foo) || token_step_number<=0) { /* foo is used to check that the search limit is not like "45gjh" */ error("Invalid token numbering argument: %s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; case 'r': { int param1 = 0; int param2 = 0; int ret_scan = sscanf(options.vars()->optarg,"%d,%d%c",¶m1,¶m2,&foo); if (ret_scan == 2) { range_start = param1; range_stop = param2; use_range=1; if (((range_start < -1)) || (range_stop < -1)) { /* foo is used to check that the search limit is not like "45gjh" */ error("Invalid stop count argument: %s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } } else if (1!=sscanf(options.vars()->optarg,"%d%c",&range_start,&foo) || (range_start < -1)) { /* foo is used to check that the search limit is not like "45gjh" */ error("Invalid stop count argument: %s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } use_range=1; } break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_Untokenize[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } char tokens_txt[FILENAME_MAX]; char text_cod[FILENAME_MAX]; char enter_pos[FILENAME_MAX]; if (dynamicSntDir[0]=='\0') { get_snt_path(argv[options.vars()->optind],dynamicSntDir); } strcpy(text_cod,dynamicSntDir); strcat(text_cod,"text.cod"); strcpy(enter_pos,dynamicSntDir); strcat(enter_pos,"enter.pos"); strcpy(tokens_txt,dynamicSntDir); strcat(tokens_txt,"tokens.txt"); Alphabet* alph=NULL; if (alphabet[0]!='\0') { alph=load_alphabet(&vec,alphabet); if (alph==NULL) { error("Cannot load alphabet file %s\n",alphabet); return DEFAULT_ERROR_CODE; } } ABSTRACTMAPFILE* af_text_cod=af_open_mapfile(text_cod,MAPFILE_OPTION_READ,0); if (af_text_cod==NULL) { error("Cannot open file %s\n",text_cod); free_alphabet(alph); return DEFAULT_ERROR_CODE; } ABSTRACTMAPFILE* af_enter_pos=af_open_mapfile(enter_pos,MAPFILE_OPTION_READ,0); if (af_enter_pos==NULL) { error("Cannot open file %s\n",enter_pos); af_close_mapfile(af_text_cod); free_alphabet(alph); return DEFAULT_ERROR_CODE; } U_FILE* text = u_fopen(&vec,argv[options.vars()->optind],U_WRITE); if (text==NULL) { error("Cannot create text file %s\n",argv[options.vars()->optind]); af_close_mapfile(af_enter_pos); af_close_mapfile(af_text_cod); free_alphabet(alph); return DEFAULT_ERROR_CODE; } struct text_tokens* tok=load_text_tokens(&vec,tokens_txt); u_printf("Untokenizing text...\n"); size_t nb_item = af_get_mapfile_size(af_text_cod)/sizeof(int); const int* buf=(const int*)af_get_mapfile_pointer(af_text_cod); size_t nb_item_enter_pos=0; const int* buf_enter=NULL; if (af_enter_pos!=NULL) { buf_enter=(const int*)af_get_mapfile_pointer(af_enter_pos); if (buf_enter!=NULL) { nb_item_enter_pos=af_get_mapfile_size(af_enter_pos)/sizeof(int); } } size_t count_pos=0; for (size_t i=0;i<nb_item;i++) { int is_in_range=1; if ((use_range!=0) && (i<(size_t)range_start)) { is_in_range=0; } if ((use_range!=0) && (range_stop!=0) && (i>(size_t)range_stop)) { is_in_range=0; } int is_newline=0; if (count_pos<nb_item_enter_pos) { if (i==(size_t)(*(buf_enter+count_pos))) { is_newline = 1; count_pos++; } } if (is_in_range!=0) { if (token_step_number != 0) if ((i%token_step_number)==0) u_fprintf(text,"\n\nToken %d : ", (int)i); if (is_newline!=0) { u_fprintf(text,"\n", tok->token[*(buf+i)]); } else { u_fputs(tok->token[*(buf+i)], text); } } } af_release_mapfile_pointer(af_text_cod,buf); af_release_mapfile_pointer(af_enter_pos,buf_enter); af_close_mapfile(af_enter_pos); af_close_mapfile(af_text_cod); free_text_tokens(tok); u_fclose(text); free_alphabet(alph); u_printf("\nDone.\n"); return SUCCESS_RETURN_CODE; }
int main_Extract(int argc,char* const argv[]) { if (argc==1) { usage(); return 0; } int val,index=-1; char extract_matching_units=1; char text_name[FILENAME_MAX]=""; char concord_ind[FILENAME_MAX]=""; char output[FILENAME_MAX]=""; Encoding encoding_output = DEFAULT_ENCODING_OUTPUT; int bom_output = DEFAULT_BOM_OUTPUT; int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT; struct OptVars* vars=new_OptVars(); while (EOF!=(val=getopt_long_TS(argc,argv,optstring_Extract,lopts_Extract,&index,vars))) { switch(val) { case 'y': extract_matching_units=1; break; case 'n': extract_matching_units=0; break; case 'o': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty output file name\n"); } strcpy(output,vars->optarg); break; case 'i': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty concordance file name\n"); } strcpy(concord_ind,vars->optarg); break; case 'h': usage(); return 0; case ':': if (index==-1) fatal_error("Missing argument for option -%c\n",vars->optopt); else fatal_error("Missing argument for option --%s\n",lopts_Extract[index].name); case '?': if (index==-1) fatal_error("Invalid option -%c\n",vars->optopt); else fatal_error("Invalid option --%s\n",vars->optarg); break; case 'k': if (vars->optarg[0]=='\0') { fatal_error("Empty input_encoding argument\n"); } decode_reading_encoding_parameter(&mask_encoding_compatibility_input,vars->optarg); break; case 'q': if (vars->optarg[0]=='\0') { fatal_error("Empty output_encoding argument\n"); } decode_writing_encoding_parameter(&encoding_output,&bom_output,vars->optarg); break; } index=-1; } if (output[0]=='\0') { fatal_error("You must specify the output text file\n"); } if (vars->optind!=argc-1) { fatal_error("Invalid arguments: rerun with --help\n"); } strcpy(text_name,argv[vars->optind]); struct snt_files* snt_files=new_snt_files(text_name); ABSTRACTMAPFILE* text=af_open_mapfile(snt_files->text_cod,MAPFILE_OPTION_READ,0); if (text==NULL) { error("Cannot open %s\n",snt_files->text_cod); return 1; } struct text_tokens* tok=load_text_tokens(snt_files->tokens_txt,mask_encoding_compatibility_input); if (tok==NULL) { error("Cannot load token list %s\n",snt_files->tokens_txt); af_close_mapfile(text); return 1; } if (tok->SENTENCE_MARKER==-1) { error("The text does not contain any sentence marker {S}\n"); af_close_mapfile(text); free_text_tokens(tok); return 1; } if (concord_ind[0]=='\0') { char tmp[FILENAME_MAX]; get_extension(text_name,tmp); if (strcmp(tmp,"snt")) { fatal_error("Unable to find the concord.ind file. Please explicit it\n"); } remove_extension(text_name,concord_ind); strcat(concord_ind,"_snt"); strcat(concord_ind,PATH_SEPARATOR_STRING); strcat(concord_ind,"concord.ind"); } U_FILE* concord=u_fopen_existing_versatile_encoding(mask_encoding_compatibility_input,concord_ind,U_READ); if (concord==NULL) { error("Cannot open concordance %s\n",concord_ind); af_close_mapfile(text); free_text_tokens(tok); return 1; } U_FILE* result=u_fopen_creating_versatile_encoding(encoding_output,bom_output,output,U_WRITE); if (result==NULL) { error("Cannot write output file %s\n",output); af_close_mapfile(text); u_fclose(concord); free_text_tokens(tok); return 1; } free_snt_files(snt_files); extract_units(extract_matching_units,text,tok,concord,result); af_close_mapfile(text); u_fclose(concord); u_fclose(result); free_text_tokens(tok); free_OptVars(vars); u_printf("Done.\n"); return 0; }
int main_Tokenize(int argc,char* const argv[]) { if (argc==1) { usage(); return 0; } char alphabet[FILENAME_MAX]=""; char token_file[FILENAME_MAX]=""; Encoding encoding_output = DEFAULT_ENCODING_OUTPUT; int bom_output = DEFAULT_BOM_OUTPUT; int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT; int val,index=-1; int mode=NORMAL; struct OptVars* vars=new_OptVars(); while (EOF!=(val=getopt_long_TS(argc,argv,optstring_Tokenize,lopts_Tokenize,&index,vars))) { switch(val) { case 'a': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty alphabet file name\n"); } strcpy(alphabet,vars->optarg); break; case 'c': mode=CHAR_BY_CHAR; break; case 'w': mode=NORMAL; break; case 't': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty token file name\n"); } strcpy(token_file,vars->optarg); break; case 'k': if (vars->optarg[0]=='\0') { fatal_error("Empty input_encoding argument\n"); } decode_reading_encoding_parameter(&mask_encoding_compatibility_input,vars->optarg); break; case 'q': if (vars->optarg[0]=='\0') { fatal_error("Empty output_encoding argument\n"); } decode_writing_encoding_parameter(&encoding_output,&bom_output,vars->optarg); break; case 'h': usage(); return 0; case ':': if (index==-1) fatal_error("Missing argument for option -%c\n",vars->optopt); else fatal_error("Missing argument for option --%s\n",lopts_Tokenize[index].name); case '?': if (index==-1) fatal_error("Invalid option -%c\n",vars->optopt); else fatal_error("Invalid option --%s\n",vars->optarg); break; } index=-1; } if (vars->optind!=argc-1) { fatal_error("Invalid arguments: rerun with --help\n"); } U_FILE* text; U_FILE* out; U_FILE* output; U_FILE* enter; char tokens_txt[FILENAME_MAX]; char text_cod[FILENAME_MAX]; char enter_pos[FILENAME_MAX]; Alphabet* alph=NULL; get_snt_path(argv[vars->optind],text_cod); strcat(text_cod,"text.cod"); get_snt_path(argv[vars->optind],tokens_txt); strcat(tokens_txt,"tokens.txt"); get_snt_path(argv[vars->optind],enter_pos); strcat(enter_pos,"enter.pos"); text=u_fopen_existing_versatile_encoding(mask_encoding_compatibility_input,argv[vars->optind],U_READ); if (text==NULL) { fatal_error("Cannot open text file %s\n",argv[vars->optind]); } if (alphabet[0]!='\0') { alph=load_alphabet(alphabet); if (alph==NULL) { error("Cannot load alphabet file %s\n",alphabet); u_fclose(text); return 1; } } out=u_fopen(BINARY,text_cod,U_WRITE); if (out==NULL) { error("Cannot create file %s\n",text_cod); u_fclose(text); if (alph!=NULL) { free_alphabet(alph); } return 1; } enter=u_fopen(BINARY,enter_pos,U_WRITE); if (enter==NULL) { error("Cannot create file %s\n",enter_pos); u_fclose(text); u_fclose(out); if (alph!=NULL) { free_alphabet(alph); } return 1; } vector_ptr* tokens=new_vector_ptr(4096); vector_int* n_occur=new_vector_int(4096); vector_int* n_enter_pos=new_vector_int(4096); struct hash_table* hashtable=new_hash_table((HASH_FUNCTION)hash_unichar,(EQUAL_FUNCTION)u_equal, (FREE_FUNCTION)free,NULL,(KEYCOPY_FUNCTION)keycopy); if (token_file[0]!='\0') { load_token_file(token_file,mask_encoding_compatibility_input,tokens,hashtable,n_occur); } output=u_fopen_creating_versatile_encoding(encoding_output,bom_output,tokens_txt,U_WRITE); if (output==NULL) { error("Cannot create file %s\n",tokens_txt); u_fclose(text); u_fclose(out); u_fclose(enter); if (alph!=NULL) { free_alphabet(alph); } free_hash_table(hashtable); free_vector_ptr(tokens,free); free_vector_int(n_occur); free_vector_int(n_enter_pos); return 1; } u_fprintf(output,"0000000000\n"); int SENTENCES=0; int TOKENS_TOTAL=0; int WORDS_TOTAL=0; int DIGITS_TOTAL=0; u_printf("Tokenizing text...\n"); if (mode==NORMAL) { normal_tokenization(text,out,output,alph,tokens,hashtable,n_occur,n_enter_pos, &SENTENCES,&TOKENS_TOTAL,&WORDS_TOTAL,&DIGITS_TOTAL); } else { char_by_char_tokenization(text,out,output,alph,tokens,hashtable,n_occur,n_enter_pos, &SENTENCES,&TOKENS_TOTAL,&WORDS_TOTAL,&DIGITS_TOTAL); } u_printf("\nDone.\n"); save_new_line_positions(enter,n_enter_pos); u_fclose(enter); u_fclose(text); u_fclose(out); u_fclose(output); write_number_of_tokens(tokens_txt,encoding_output,bom_output,tokens->nbelems); // we compute some statistics get_snt_path(argv[vars->optind],tokens_txt); strcat(tokens_txt,"stats.n"); output=u_fopen_creating_versatile_encoding(encoding_output,bom_output,tokens_txt,U_WRITE); if (output==NULL) { error("Cannot write %s\n",tokens_txt); } else { compute_statistics(output,tokens,alph,SENTENCES,TOKENS_TOTAL,WORDS_TOTAL,DIGITS_TOTAL); u_fclose(output); } // we save the tokens by frequence get_snt_path(argv[vars->optind],tokens_txt); strcat(tokens_txt,"tok_by_freq.txt"); output=u_fopen_creating_versatile_encoding(encoding_output,bom_output,tokens_txt,U_WRITE); if (output==NULL) { error("Cannot write %s\n",tokens_txt); } else { sort_and_save_by_frequence(output,tokens,n_occur); u_fclose(output); } // we save the tokens by alphabetical order get_snt_path(argv[vars->optind],tokens_txt); strcat(tokens_txt,"tok_by_alph.txt"); output=u_fopen_creating_versatile_encoding(encoding_output,bom_output,tokens_txt,U_WRITE); if (output==NULL) { error("Cannot write %s\n",tokens_txt); } else { sort_and_save_by_alph_order(output,tokens,n_occur); u_fclose(output); } free_hash_table(hashtable); free_vector_ptr(tokens,free); free_vector_int(n_occur); free_vector_int(n_enter_pos); if (alph!=NULL) { free_alphabet(alph); } free_OptVars(vars); return 0; }
int main_XMLizer(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } int output_style=TEI; char output[FILENAME_MAX]=""; char alphabet[FILENAME_MAX]=""; char normalization[FILENAME_MAX]=""; char segmentation[FILENAME_MAX]=""; VersatileEncodingConfig vec=VEC_DEFAULT; int convLFtoCRLF=1; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_XMLizer,lopts_XMLizer,&index))) { switch(val) { case 'x': output_style=XML; break; case 't': output_style=TEI; break; case 'n': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty normalization grammar name\n"); return USAGE_ERROR_CODE; } strcpy(normalization,options.vars()->optarg); break; case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output file name\n"); return USAGE_ERROR_CODE; } strcpy(output,options.vars()->optarg); break; case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet file name\n"); return USAGE_ERROR_CODE; } strcpy(alphabet,options.vars()->optarg); break; case 's': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty segmentation grammar name\n"); return USAGE_ERROR_CODE; } strcpy(segmentation,options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_XMLizer[index].name); return USAGE_ERROR_CODE; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (segmentation[0]=='\0') { error("You must specify the segmentation grammar to use\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } char input[FILENAME_MAX]; strcpy(input,argv[options.vars()->optind]); char snt[FILENAME_MAX]; remove_extension(input,snt); strcat(snt,"_tmp.snt"); char tmp[FILENAME_MAX]; remove_extension(input,tmp); strcat(tmp,".tmp"); normalize(input,snt,&vec,KEEP_CARRIAGE_RETURN,convLFtoCRLF,normalization,NULL,1); struct fst2txt_parameters* p=new_fst2txt_parameters(); p->vec=vec; p->input_text_file=strdup(snt); if (p->input_text_file ==NULL) { alloc_error("main_XMLizer"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } p->output_text_file_is_temp=1; p->output_text_file=strdup(tmp); if (p->output_text_file==NULL) { alloc_error("main_XMLizer"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } p->fst_file=strdup(segmentation); if (p->fst_file==NULL) { alloc_error("main_XMLizer"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } p->alphabet_file=strdup(alphabet); if (p->alphabet_file==NULL) { alloc_error("main_XMLizer"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } p->output_policy=MERGE_OUTPUTS; p->tokenization_policy=WORD_BY_WORD_TOKENIZATION; p->space_policy=DONT_START_WITH_SPACE; main_fst2txt(p); free_fst2txt_parameters(p); if (output[0]=='\0') { remove_extension(input,output); strcat(output,".xml"); } int return_value = xmlize(&vec,snt,output,output_style); af_remove(snt); af_remove(tmp); return return_value; }
int main_TEI2Txt(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } char output[FILENAME_MAX]=""; VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_TEI2Txt,lopts_TEI2Txt,&index))) { switch(val) { case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output file name\n"); return USAGE_ERROR_CODE; } strcpy(output,options.vars()->optarg); break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_TEI2Txt[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } if(output[0]=='\0') { remove_extension(argv[options.vars()->optind],output); strcat(output,".txt"); } int return_value = tei2txt(argv[options.vars()->optind],output,&vec); return return_value; }
/** * The same than main, but no call to setBufferMode. */ int main_BuildKrMwuDic(int argc,char* const argv[]) { if (argc==1) { usage(); return 0; } int val,index=-1; char output[FILENAME_MAX]=""; char inflection_dir[FILENAME_MAX]=""; char alphabet[FILENAME_MAX]=""; char dic_bin[FILENAME_MAX]=""; char dic_inf[FILENAME_MAX]=""; Encoding encoding_output = DEFAULT_ENCODING_OUTPUT; int bom_output = DEFAULT_BOM_OUTPUT; int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT; struct OptVars* vars=new_OptVars(); while (EOF!=(val=getopt_long_TS(argc,argv,optstring_BuildKrMwuDic,lopts_BuildKrMwuDic,&index,vars))) { switch(val) { case 'o': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty output file name\n"); } strcpy(output,vars->optarg); break; case 'd': if (vars->optarg[0]=='\0') { fatal_error("Empty inflection directory\n"); } strcpy(inflection_dir,vars->optarg); break; case 'a': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty alphabet file name\n"); } strcpy(alphabet,vars->optarg); break; case 'b': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty binary dictionary name\n"); } strcpy(dic_bin,vars->optarg); remove_extension(dic_bin,dic_inf); strcat(dic_inf,".inf"); break; case 'h': usage(); return 0; case ':': if (index==-1) fatal_error("Missing argument for option -%c\n",vars->optopt); else fatal_error("Missing argument for option --%s\n",lopts_BuildKrMwuDic[index].name); case '?': if (index==-1) fatal_error("Invalid option -%c\n",vars->optopt); else fatal_error("Invalid option --%s\n",vars->optarg); break; case 'k': if (vars->optarg[0]=='\0') { fatal_error("Empty input_encoding argument\n"); } decode_reading_encoding_parameter(&mask_encoding_compatibility_input,vars->optarg); break; case 'q': if (vars->optarg[0]=='\0') { fatal_error("Empty output_encoding argument\n"); } decode_writing_encoding_parameter(&encoding_output,&bom_output,vars->optarg); break; } index=-1; } if (vars->optind!=argc-1) { fatal_error("Invalid arguments: rerun with --help\n"); } if (output[0]=='\0') { fatal_error("Output file must be specified\n"); } if (inflection_dir[0]=='\0') { fatal_error("Inflection directory must be specified\n"); } if (alphabet[0]=='\0') { fatal_error("Alphabet file must be specified\n"); } if (dic_bin[0]=='\0') { fatal_error("Binary dictionary must be specified\n"); } U_FILE* delas=u_fopen_existing_versatile_encoding(mask_encoding_compatibility_input,argv[vars->optind],U_READ); if (delas==NULL) { fatal_error("Cannot open %s\n",argv[vars->optind]); } U_FILE* grf=u_fopen_existing_versatile_encoding(mask_encoding_compatibility_input,output,U_WRITE); if (grf==NULL) { fatal_error("Cannot open %s\n",output); } Alphabet* alph=load_alphabet(alphabet,1); if (alph==NULL) { fatal_error("Cannot open alphabet file %s\n",alphabet); } Korean* korean=new Korean(alph); MultiFlex_ctx* multiFlex_ctx = (MultiFlex_ctx*)malloc(sizeof(MultiFlex_ctx)); if (multiFlex_ctx==NULL) { fatal_alloc_error("main_BuildKrMwuDic"); } strcpy(multiFlex_ctx->inflection_directory,inflection_dir); if (init_transducer_tree(multiFlex_ctx)) { fatal_error("init_transducer_tree error\n"); } struct l_morpho_t* pL_MORPHO=init_langage_morph(); if (pL_MORPHO == NULL) { fatal_error("init_langage_morph error\n"); } unsigned char* bin=load_BIN_file(dic_bin); struct INF_codes* inf=load_INF_file(dic_inf); create_mwu_dictionary(delas,grf,multiFlex_ctx,korean,pL_MORPHO,encoding_output, bom_output,mask_encoding_compatibility_input,bin,inf); free(bin); free_INF_codes(inf); u_fclose(delas); u_fclose(grf); free_alphabet(alph); delete korean; free_transducer_tree(multiFlex_ctx); for (int count_free_fst2=0;count_free_fst2<multiFlex_ctx->n_fst2;count_free_fst2++) { free_abstract_Fst2(multiFlex_ctx->fst2[count_free_fst2],&(multiFlex_ctx->fst2_free[count_free_fst2])); multiFlex_ctx->fst2[count_free_fst2]=NULL; } free_language_morpho(pL_MORPHO); free(multiFlex_ctx); free_OptVars(vars); u_printf("Done.\n"); return 0; }
/** * The same than main, but no call to setBufferMode. */ int main_BuildKrMwuDic(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } int val,index=-1; char output[FILENAME_MAX]=""; char inflection_dir[FILENAME_MAX]=""; char alphabet[FILENAME_MAX]=""; char dic_bin[FILENAME_MAX]=""; char dic_inf[FILENAME_MAX]=""; // default policy is to compile only out of date graphs GraphRecompilationPolicy graph_recompilation_policy = ONLY_OUT_OF_DATE; VersatileEncodingConfig vec=VEC_DEFAULT; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_BuildKrMwuDic,lopts_BuildKrMwuDic,&index))) { switch(val) { case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output file name\n"); return USAGE_ERROR_CODE; } strcpy(output,options.vars()->optarg); break; case 'd': if (options.vars()->optarg[0]=='\0') { error("Empty inflection directory\n"); return USAGE_ERROR_CODE; } strcpy(inflection_dir,options.vars()->optarg); break; case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet file name\n"); return USAGE_ERROR_CODE; } strcpy(alphabet,options.vars()->optarg); break; case 'b': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty binary dictionary name\n"); return USAGE_ERROR_CODE; } strcpy(dic_bin,options.vars()->optarg); remove_extension(dic_bin,dic_inf); strcat(dic_inf,".inf"); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case 'f': graph_recompilation_policy = ALWAYS_RECOMPILE; break; case 'n': graph_recompilation_policy = NEVER_RECOMPILE; break; case 't': graph_recompilation_policy = ONLY_OUT_OF_DATE; break; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_BuildKrMwuDic[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (output[0]=='\0') { error("Output file must be specified\n"); return USAGE_ERROR_CODE; } if (inflection_dir[0]=='\0') { error("Inflection directory must be specified\n"); return USAGE_ERROR_CODE; } if (alphabet[0]=='\0') { error("Alphabet file must be specified\n"); return USAGE_ERROR_CODE; } if (dic_bin[0]=='\0') { error("Binary dictionary must be specified\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } U_FILE* delas=u_fopen(&vec,argv[options.vars()->optind],U_READ); if (delas==NULL) { error("Cannot open %s\n",argv[options.vars()->optind]); return DEFAULT_ERROR_CODE; } U_FILE* grf=u_fopen(&vec,output,U_WRITE); if (grf==NULL) { error("Cannot open %s\n",output); u_fclose(delas); return DEFAULT_ERROR_CODE; } Alphabet* alph=load_alphabet(&vec,alphabet,1); if (alph==NULL) { u_fclose(grf); u_fclose(delas); error("Cannot open alphabet file %s\n",alphabet); return DEFAULT_ERROR_CODE; } Korean* korean=new Korean(alph); MultiFlex_ctx* multiFlex_ctx=new_MultiFlex_ctx(inflection_dir, NULL, NULL, &vec, korean, NULL, NULL, graph_recompilation_policy); Dictionary* d=new_Dictionary(&vec,dic_bin,dic_inf); create_mwu_dictionary(delas,grf,multiFlex_ctx,d); free_Dictionary(d); u_fclose(delas); u_fclose(grf); free_alphabet(alph); delete korean; for (int count_free_fst2=0;count_free_fst2<multiFlex_ctx->n_fst2;count_free_fst2++) { free_abstract_Fst2(multiFlex_ctx->fst2[count_free_fst2],&(multiFlex_ctx->fst2_free[count_free_fst2])); multiFlex_ctx->fst2[count_free_fst2]=NULL; } free_MultiFlex_ctx(multiFlex_ctx); u_printf("Done.\n"); return SUCCESS_RETURN_CODE; }
int main_ElagComp(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; char compilename[FILENAME_MAX]=""; char directory[FILENAME_MAX]=""; char grammar[FILENAME_MAX]=""; char rule_file[FILENAME_MAX]=""; char lang[FILENAME_MAX]=""; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_ElagComp,lopts_ElagComp,&index))) { switch(val) { case 'l': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty language definition file\n"); return USAGE_ERROR_CODE; } strcpy(lang,options.vars()->optarg); break; case 'r': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty rule file\n"); return USAGE_ERROR_CODE; } strcpy(rule_file,options.vars()->optarg); get_path(rule_file,directory); break; case 'g': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty grammar file name\n"); return USAGE_ERROR_CODE; } strcpy(grammar,options.vars()->optarg); get_path(grammar,directory); break; case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output file\n"); return USAGE_ERROR_CODE; } strcpy(compilename,options.vars()->optarg); break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_ElagComp[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (lang[0]=='\0') { error("You must define the language definition file\n"); return USAGE_ERROR_CODE; } if ((rule_file[0]=='\0' && grammar[0]=='\0') || (rule_file[0]!='\0' && grammar[0]!='\0')) { error("You must define a rule list OR a grammar\n"); return USAGE_ERROR_CODE; } if (options.vars()->optind!=argc) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (rule_file[0]=='\0' && grammar[0]=='\0') { error("You must specified a grammar or a rule file name\n"); return USAGE_ERROR_CODE; } if (rule_file[0]!='\0' && grammar[0]!='\0') { error("Cannot handle both a rule file and a grammar\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } language_t* language = load_language_definition(&vec,lang); if (rule_file[0]!='\0') { /* If we work with a rule list */ if (compilename[0]=='\0') { int l=(int)strlen(rule_file); if (strcmp(rule_file+l-4,".lst")==0) { strcpy(compilename,rule_file); strcpy(compilename+l-4,".rul"); } else { sprintf(compilename,"%s.rul",rule_file); } } if (compile_elag_rules(rule_file,compilename,&vec,language)==-1) { error("An error occurred while compiling %s\n",compilename); free_language_t(language); return DEFAULT_ERROR_CODE; } u_printf("\nElag grammars are compiled in %s.\n",compilename); } else { /* If we must compile a single grammar */ char elg_file[FILENAME_MAX]; get_extension(grammar,elg_file); if (strcmp(elg_file,".fst2")) { error("Grammar '%s' should be a .fst2 file\n"); free_language_t(language); return DEFAULT_ERROR_CODE; } remove_extension(grammar,elg_file); strcat(elg_file,".elg"); if (compile_elag_grammar(grammar,elg_file,&vec,language)==-1) { error("An error occured while compiling %s\n",grammar); free_language_t(language); return DEFAULT_ERROR_CODE; } u_printf("Elag grammar is compiled into %s.\n",elg_file); } free_language_t(language); return SUCCESS_RETURN_CODE; }
int main_Cassys(int argc,char* const argv[]) { if (argc==1) { usage(); return 0; } char transducer_list_file_name[FILENAME_MAX]; bool has_transducer_list = false; char text_file_name[FILENAME_MAX]; bool has_text_file_name = false; char alphabet_file_name[FILENAME_MAX]; char transducer_filename_prefix[FILENAME_MAX]; bool has_alphabet = false; char negation_operator[0x20]; Encoding encoding_output = DEFAULT_ENCODING_OUTPUT; int bom_output = DEFAULT_BOM_OUTPUT; int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT; int must_create_directory = 1; int in_place = 0; struct transducer_name_and_mode_linked_list* transducer_name_and_mode_linked_list_arg=NULL; // decode the command line int val; int index = 1; negation_operator[0]='\0'; transducer_filename_prefix[0]='\0'; struct OptVars* vars=new_OptVars(); while (EOF != (val = getopt_long_TS(argc, argv, optstring_Cassys, lopts_Cassys, &index, vars))) { switch (val) { case 'h': usage(); free_OptVars(vars); free_transducer_name_and_mode_linked_list(transducer_name_and_mode_linked_list_arg); return 0; case 'k': if (vars->optarg[0]=='\0') { fatal_error("Empty input_encoding argument\n"); } decode_reading_encoding_parameter(&mask_encoding_compatibility_input,vars->optarg); break; case 'q': if (vars->optarg[0]=='\0') { fatal_error("Empty output_encoding argument\n"); } decode_writing_encoding_parameter(&encoding_output,&bom_output,vars->optarg); break; case 't': { if (vars -> optarg[0] == '\0') { fatal_error("Command line error : Empty file name argument\n"); } char extension_text_name[FILENAME_MAX]; get_extension(vars -> optarg, extension_text_name); if (strcmp(extension_text_name, ".snt") != 0) { fatal_error( "Command line error : File name argument %s must be a preprocessed snt file\n", vars -> optarg); } strcpy(text_file_name, vars -> optarg); has_text_file_name = true; break; } case 'l': { if(vars -> optarg[0] == '\0'){ fatal_error("Command line error : Empty transducer list argument\n"); } else { strcpy(transducer_list_file_name, vars -> optarg); has_transducer_list = true; } break; } case 'r': { if(vars -> optarg[0] == '\0'){ fatal_error("Command line error : Empty transducer directory argument\n"); } else { strcpy(transducer_filename_prefix, vars -> optarg); has_transducer_list = true; } break; } case 's': { if(vars -> optarg[0] == '\0'){ fatal_error("Command line error : Empty transducer filename argument\n"); } else { transducer_name_and_mode_linked_list_arg=add_transducer_linked_list_new_name(transducer_name_and_mode_linked_list_arg,vars -> optarg); } break; } case 'm': { if(vars -> optarg[0] == '\0'){ fatal_error("Command line error : Empty transducer mode argument\n"); } else { set_last_transducer_linked_list_mode_by_string(transducer_name_and_mode_linked_list_arg,vars -> optarg); } break; } case 'a':{ if (vars -> optarg[0] == '\0') { fatal_error("Command line error : Empty alphabet argument\n"); } else { strcpy(alphabet_file_name, vars -> optarg); has_alphabet = true; } break; } case 'g': if (vars->optarg[0]=='\0') { fatal_error("You must specify an argument for negation operator\n"); } if ((strcmp(vars->optarg,"minus")!=0) && (strcmp(vars->optarg,"-")!=0) && (strcmp(vars->optarg,"tilde")!=0) && (strcmp(vars->optarg,"~")!=0)) { fatal_error("You must specify a valid argument for negation operator\n"); } strcpy(negation_operator,vars->optarg); break; case 'i': { in_place = 1; break; } case 'd': { must_create_directory = 0; break; } default :{ fatal_error("Unknown option : %c\n",val); break; } } } index = -1; if(has_alphabet == false){ fatal_error("Command line error : no alphabet provided\nRerun with --help\n"); } if(has_text_file_name == false){ fatal_error("Command line error : no text file provided\nRerun with --help\n"); } if((has_transducer_list == false) && (transducer_name_and_mode_linked_list_arg == NULL)){ fatal_error("Command line error : no transducer list provided\nRerun with --help\n"); } // Load the list of transducers from the file transducer list and stores it in a list //struct fifo *transducer_list = load_transducer(transducer_list_file_name); if ((transducer_name_and_mode_linked_list_arg == NULL) && has_transducer_list) transducer_name_and_mode_linked_list_arg = load_transducer_list_file(transducer_list_file_name); struct fifo *transducer_list=load_transducer_from_linked_list(transducer_name_and_mode_linked_list_arg,transducer_filename_prefix); cascade(text_file_name, in_place, must_create_directory, transducer_list, alphabet_file_name,negation_operator,encoding_output,bom_output,mask_encoding_compatibility_input); free_fifo(transducer_list); free_OptVars(vars); free_transducer_name_and_mode_linked_list(transducer_name_and_mode_linked_list_arg); return 0; }
int main_Fst2Txt(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } struct fst2txt_parameters* p=new_fst2txt_parameters(); char in_offsets[FILENAME_MAX]=""; char out_offsets[FILENAME_MAX]=""; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_Fst2Txt,lopts_Fst2Txt,&index))) { switch(val) { case 't': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty text file name\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } p->input_text_file=strdup(options.vars()->optarg); if (p->input_text_file==NULL) { alloc_error("main_Fst2Txt"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } break; case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty text output file name\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } p->output_text_file=strdup(options.vars()->optarg); p->output_text_file_is_temp=0; if (p->output_text_file==NULL) { alloc_error("main_Fst2Txt"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } break; case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet file name\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } p->alphabet_file=strdup(options.vars()->optarg); if (p->alphabet_file==NULL) { alloc_error("main_Fst2Txt"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } break; case 'M': p->output_policy=MERGE_OUTPUTS; break; case 'R': p->output_policy=REPLACE_OUTPUTS; break; case 'c': p->tokenization_policy=CHAR_BY_CHAR_TOKENIZATION; break; case 'w': p->tokenization_policy=WORD_BY_WORD_TOKENIZATION; break; case 's': p->space_policy=START_WITH_SPACE; break; case 'x': p->space_policy=DONT_START_WITH_SPACE; break; case 'V': only_verify_arguments = true; break; case 'h': usage(); free_fst2txt_parameters(p); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_Fst2Txt[index].name); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(p->vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(p->vec.encoding_output),&(p->vec.bom_output),options.vars()->optarg); break; case '$': if (options.vars()->optarg[0]=='\0') { error("Empty input_offsets argument\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } strcpy(in_offsets,options.vars()->optarg); break; case '@': if (options.vars()->optarg[0]=='\0') { error("Empty output_offsets argument\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } strcpy(out_offsets,options.vars()->optarg); break; case 'l': p->convLFtoCRLF=0; break; case 'r': p->keepCR = 1; break; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } if (p->input_text_file==NULL) { error("You must specify the text file\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory free_fst2txt_parameters(p); return SUCCESS_RETURN_CODE; } if (out_offsets[0]!='\0') { /* We deal with offsets only if the program is expected to produce some */ if (in_offsets[0]!='\0') { p->v_in_offsets=load_offsets(&(p->vec),in_offsets); if (p->v_in_offsets==NULL) { error("Cannot load offset file %s\n",in_offsets); free_fst2txt_parameters(p); return DEFAULT_ERROR_CODE; } } else { /* If there is no input offset file, we create an empty offset vector * in order to avoid testing whether the vector is NULL or not */ p->v_in_offsets=new_vector_offset(1); } p->f_out_offsets=u_fopen(&(p->vec),out_offsets,U_WRITE); if (p->f_out_offsets==NULL) { error("Cannot create file %s\n",out_offsets); free_fst2txt_parameters(p); return DEFAULT_ERROR_CODE; } } if (p->output_text_file == NULL) { char tmp[FILENAME_MAX]; remove_extension(p->input_text_file, tmp); strcat(tmp, ".tmp"); p->output_text_file_is_temp=1; p->output_text_file = strdup(tmp); if (p->output_text_file == NULL) { alloc_error("main_Fst2Txt"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } } p->fst_file=strdup(argv[options.vars()->optind]); if (p->fst_file==NULL) { alloc_error("main_Fst2Txt"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } int result=main_fst2txt(p); free_fst2txt_parameters(p); return result; }
int main_PolyLex(int argc,char* const argv[]) { if (argc==1) { usage(); return 0; } int language=-1; char alphabet[FILENAME_MAX]=""; char dictionary[FILENAME_MAX]=""; char output[FILENAME_MAX]=""; char info[FILENAME_MAX]=""; Encoding encoding_output = DEFAULT_ENCODING_OUTPUT; int bom_output = DEFAULT_BOM_OUTPUT; int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT; int val,index=-1; struct OptVars* vars=new_OptVars(); while (EOF!=(val=getopt_long_TS(argc,argv,optstring_PolyLex,lopts_PolyLex,&index,vars))) { switch(val) { case 'D': language=DUTCH; break; case 'G': language=GERMAN; break; case 'N': language=NORWEGIAN; break; case 'R': language=RUSSIAN; break; case 'a': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty alphabet file name\n"); } strcpy(alphabet,vars->optarg); break; case 'd': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty dictionary file name\n"); } strcpy(dictionary,vars->optarg); break; case 'o': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty output file name\n"); } strcpy(output,vars->optarg); break; case 'i': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty information file name\n"); } strcpy(info,vars->optarg); break; case 'k': if (vars->optarg[0]=='\0') { fatal_error("Empty input_encoding argument\n"); } decode_reading_encoding_parameter(&mask_encoding_compatibility_input,vars->optarg); break; case 'q': if (vars->optarg[0]=='\0') { fatal_error("Empty output_encoding argument\n"); } decode_writing_encoding_parameter(&encoding_output,&bom_output,vars->optarg); break; case 'h': usage(); return 0; case ':': if (index==-1) fatal_error("Missing argument for option -%c\n",vars->optopt); else fatal_error("Missing argument for option --%s\n",lopts_PolyLex[index].name); case '?': if (index==-1) fatal_error("Invalid option -%c\n",vars->optopt); else fatal_error("Invalid option --%s\n",vars->optarg); break; } index=-1; } if (vars->optind!=argc-1) { fatal_error("Invalid arguments: rerun with --help\n"); } if (dictionary[0]=='\0') { fatal_error("You must specify the .bin dictionary to use\n"); } if (output[0]=='\0') { fatal_error("You must specify the output dictionary file name\n"); } if (language==-1) { fatal_error("You must specify the language\n"); } Alphabet* alph=NULL; if (alphabet[0]!='\0') { u_printf("Loading alphabet...\n"); alph=load_alphabet(alphabet); if (alph==NULL) { fatal_error("Cannot load alphabet file %s\n",alphabet); } } char temp[FILENAME_MAX]; struct string_hash* forbiddenWords=NULL; if (language==DUTCH || language==NORWEGIAN) { get_path(dictionary,temp); strcat(temp,"ForbiddenWords.txt"); forbiddenWords=load_key_list(temp,mask_encoding_compatibility_input); } u_printf("Loading BIN file...\n"); struct BIN_free_info bin_free; const unsigned char* bin=load_abstract_BIN_file(dictionary,&bin_free); if (bin==NULL) { error("Cannot load bin file %s\n",dictionary); free_alphabet(alph); free_string_hash(forbiddenWords); return 1; } strcpy(temp,dictionary); temp[strlen(dictionary)-3]='\0'; strcat(temp,"inf"); u_printf("Loading INF file...\n"); struct INF_free_info inf_free; const struct INF_codes* inf=load_abstract_INF_file(temp,&inf_free); if (inf==NULL) { error("Cannot load inf file %s\n",temp); free_alphabet(alph); free_abstract_BIN(bin,&bin_free); free_string_hash(forbiddenWords); return 1; } char tmp[FILENAME_MAX]; strcpy(tmp,argv[vars->optind]); strcat(tmp,".tmp"); U_FILE* words=u_fopen_existing_versatile_encoding(mask_encoding_compatibility_input,argv[vars->optind],U_READ); if (words==NULL) { error("Cannot open word list file %s\n",argv[vars->optind]); free_alphabet(alph); free_abstract_BIN(bin,&bin_free); free_abstract_INF(inf,&inf_free); free_string_hash(forbiddenWords); // here we return 0 in order to do not block the preprocessing // in the Unitex Java interface, if no dictionary was applied // so that there is no "err" file return 0; } U_FILE* new_unknown_words=u_fopen_existing_versatile_encoding(mask_encoding_compatibility_input,tmp,U_WRITE); if (new_unknown_words==NULL) { error("Cannot open temporary word list file %s\n",tmp); free_alphabet(alph); free_abstract_BIN(bin,&bin_free); free_abstract_INF(inf,&inf_free); u_fclose(words); free_string_hash(forbiddenWords); return 1; } U_FILE* res=u_fopen_versatile_encoding(encoding_output,bom_output,mask_encoding_compatibility_input,output,U_APPEND); if (res==NULL) { error("Cannot open result file %s\n",output); free_alphabet(alph); free_abstract_BIN(bin,&bin_free); free_abstract_INF(inf,&inf_free); u_fclose(words); u_fclose(new_unknown_words); free_string_hash(forbiddenWords); return 1; } U_FILE* debug=NULL; if (info!=NULL) { debug=u_fopen_versatile_encoding(encoding_output,bom_output,mask_encoding_compatibility_input,info,U_WRITE); if (debug==NULL) { error("Cannot open debug file %s\n",info); } } struct utags UTAG; switch(language) { case DUTCH: analyse_dutch_unknown_words(alph,bin,inf,words,res,debug,new_unknown_words,forbiddenWords); break; case GERMAN: analyse_german_compounds(alph,bin,inf,words,res,debug,new_unknown_words); break; case NORWEGIAN: analyse_norwegian_unknown_words(alph,bin,inf,words,res,debug,new_unknown_words,forbiddenWords); break; case RUSSIAN: init_russian(&UTAG); analyse_compounds(alph,bin,inf,words,res,debug,new_unknown_words,UTAG); break; } free_alphabet(alph); free_abstract_BIN(bin,&bin_free); free_abstract_INF(inf,&inf_free); u_fclose(words); u_fclose(new_unknown_words); free_string_hash(forbiddenWords); af_remove(argv[vars->optind]); af_rename(tmp,argv[vars->optind]); u_fclose(res); if (debug!=NULL) { u_fclose(debug); } free_OptVars(vars); return 0; }
int main_SpellCheck(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; char mode=0; char snt[FILENAME_MAX]=""; char txt[FILENAME_MAX]=""; char output[FILENAME_MAX]=""; char output_set=0; char output_op='A'; SpellCheckConfig config; config.max_errors=1; config.max_SP_INSERT=1; config.max_SP_SUPPR=1; config.max_SP_SWAP=1; config.max_SP_CHANGE=1; for (int i=0;i<N_SPSubOp;i++) { config.score[i]=default_scores[i]; } config.min_length1=4; config.min_length2=6; config.min_length3=12; config.input_op='D'; config.keyboard=NULL; config.allow_uppercase_initial=0; char foo; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_SpellCheck,lopts_SpellCheck,&index))) { switch(val) { case 's': { strcpy(snt,options.vars()->optarg); mode='s'; break; } case 'f': { strcpy(txt,options.vars()->optarg); mode='f'; break; } case 'o': { if (options.vars()->optarg!=NULL) { strcpy(output,options.vars()->optarg); } output_set=1; break; } case 'I': { if (!strcmp(options.vars()->optarg,"D") || !strcmp(options.vars()->optarg,"M") || !strcmp(options.vars()->optarg,"U")) { config.input_op=options.vars()->optarg[0]; } else { error("Invalid argument %s for option --input-op: should in [DMU]\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 'O': { if (!strcmp(options.vars()->optarg,"O") || !strcmp(options.vars()->optarg,"A")) { output_op=options.vars()->optarg[0]; } else { error("Invalid argument %s for option --output-op: should in [OA]\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 1: { config.keyboard=get_Keyboard(options.vars()->optarg); if (config.keyboard==NULL) { error("Invalid argument %s for option --keyboard:\nUse --show-keyboards to see possible values\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 2: { print_available_keyboards(U_STDOUT); return SUCCESS_RETURN_CODE; } case 10: { if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_errors,&foo)) { error("Invalid argument %s for --max-errors: should be an integer >=0\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 11: { if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_INSERT,&foo)) { error("Invalid argument %s for --max-insert: should be an integer >=0\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 12: { if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_SUPPR,&foo)) { error("Invalid argument %s for --max-suppr: should be an integer >=0\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 13: { if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_CHANGE,&foo)) { error("Invalid argument %s for --max-change: should be an integer >=0\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 14: { if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_SWAP,&foo)) { error("Invalid argument %s for --max-swap: should be an integer >=0\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 20: { int* scores=config.score; if (N_SPSubOp!=sscanf(options.vars()->optarg,"%d,%d,%d,%d,%d,%d,%d,%d,%d%c", scores,scores+1,scores+2,scores+3,scores+4,scores+5, scores+6,scores+7,scores+8,&foo)) { error("Invalid argument %s for option --scores. See --help-scores\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 21: { usage_scores(); return SUCCESS_RETURN_CODE; } case 22: { if (3!=sscanf(options.vars()->optarg,"%u,%u,%u%c", &config.min_length1,&config.min_length2,&config.min_length3,&foo)) { error("Invalid argument %s for option --min-lengths\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 23: { if (!strcmp(options.vars()->optarg,"yes")) { config.allow_uppercase_initial=1; } else if (!strcmp(options.vars()->optarg,"no")) { config.allow_uppercase_initial=0; } else { error("Invalid argument %s for option --upper-initial\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_SpellCheck[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind==argc) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (mode==0) { error("You must use either --snt or --file\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } config.n_dics=argc-options.vars()->optind; config.dics=(Dictionary**)malloc(config.n_dics*sizeof(Dictionary*)); if (config.dics==NULL) { alloc_error("main_SpellCheck"); return ALLOC_ERROR_CODE; } for (int i=0;i<config.n_dics;i++) { config.dics[i]=new_Dictionary(&vec,argv[i+options.vars()->optind]); if (config.dics[i]==NULL) { error("Cannot load dictionary %s\n",argv[i+options.vars()->optind]); } } config.out=U_STDOUT; config.n_input_lines=0; config.n_output_lines=0; if (mode=='s') { /* When working with a .snt, we actually want to work on its err file */ get_snt_path(snt,txt); strcat(txt,"err"); /* the output must be dlf, and we note the number of lines in the existing * dlf file, if any */ get_snt_path(snt,output); strcat(output,"dlf.n"); U_FILE* f=u_fopen(&vec,output,U_READ); if (f!=NULL) { u_fscanf(f,"%d",&(config.n_output_lines)); u_fclose(f); } get_snt_path(snt,output); strcat(output,"dlf"); output_set=1; /* and we force the values for -I and -O */ config.input_op='U'; output_op='A'; } else { /* If mode=='f', we don't have anything to do since we already * defined the default output to stdout */ } if (output_set) { if (output_op=='O') { config.out=u_fopen(&vec,output,U_WRITE); } else { config.out=u_fopen(&vec,output,U_APPEND); } if (config.out==NULL) { error("Cannot open output file %s\n",output); for (int i=0;i<config.n_dics;i++) { free_Dictionary(config.dics[i]); } free(config.dics); return DEFAULT_ERROR_CODE; } } config.modified_input=NULL; char modified_input[FILENAME_MAX]=""; if (config.input_op!='D') { strcpy(modified_input,txt); strcat(modified_input,".tmp"); config.modified_input=u_fopen(&vec,modified_input,U_WRITE); if (config.modified_input==NULL) { error("Cannot open tmp file %s\n",modified_input); if (config.out!=U_STDOUT) { u_fclose(config.out); } for (int i=0;i<config.n_dics;i++) { free_Dictionary(config.dics[i]); } free(config.dics); return DEFAULT_ERROR_CODE; } } config.in=u_fopen(&vec,txt,U_READ); if (config.in==NULL) { error("Cannot open file %s\n",txt); u_fclose(config.modified_input); if (config.out!=U_STDOUT) { u_fclose(config.out); } for (int i=0;i<config.n_dics;i++) { free_Dictionary(config.dics[i]); } free(config.dics); return DEFAULT_ERROR_CODE; } /* We perform spellchecking */ spellcheck(&config); /* And we clean */ u_fclose(config.in); if (config.modified_input!=NULL) { /* If we used a tmp file because the input file has to be modified, * it's now time to actually modify it */ u_fclose(config.modified_input); af_remove(txt); af_rename(modified_input,txt); } if (config.out!=U_STDOUT) { u_fclose(config.out); } for (int i=0;i<config.n_dics;i++) { free_Dictionary(config.dics[i]); } free(config.dics); /* Finally, we update the dlf.n and err.n files if mode=='s' */ if (mode=='s') { get_snt_path(snt,output); strcat(output,"err.n"); U_FILE* f=u_fopen(&vec,output,U_WRITE); if (f!=NULL) { u_fprintf(f,"%d",config.n_input_lines); u_fclose(f); } if (config.input_op!='D') { get_snt_path(snt,output); strcat(output,"dlf.n"); U_FILE* fw=u_fopen(&vec,output,U_WRITE); if (fw!=NULL) { u_fprintf(fw,"%d",config.n_output_lines); u_fclose(fw); } } } return SUCCESS_RETURN_CODE; }
/** * The same than main, but no call to setBufferMode. */ int main_KeyWords(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } VersatileEncodingConfig vec=VEC_DEFAULT; char tokens[FILENAME_MAX]; char output[FILENAME_MAX]=""; char alph[FILENAME_MAX]=""; char cdic[FILENAME_MAX]=""; unichar* code=u_strdup("XXX"); int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_KeyWords,lopts_KeyWords,&index))) { switch(val) { case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output\n"); free(code); return USAGE_ERROR_CODE; } strcpy(output,options.vars()->optarg); break; case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet file name\n"); free(code); return USAGE_ERROR_CODE; } strcpy(alph,options.vars()->optarg); break; case 'f': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty forbidden code\n"); free(code); return USAGE_ERROR_CODE; } free(code); code=u_strdup(options.vars()->optarg); break; case 'c': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty file name\n"); free(code); return USAGE_ERROR_CODE; } strcpy(cdic,options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); free(code); return SUCCESS_RETURN_CODE; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); free(code); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); free(code); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_KeyWords[index].name); free(code); return USAGE_ERROR_CODE; break; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); free(code); return USAGE_ERROR_CODE; break; } index=-1; } if (options.vars()->optind==argc || options.vars()->optind==argc-1) { error("Invalid arguments: rerun with --help\n"); free(code); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory free(code); return SUCCESS_RETURN_CODE; } Alphabet* alphabet=NULL; if (alph[0]!='\0') { alphabet=load_alphabet(&vec,alph); if (alphabet==NULL) { error("Cannot load alphabet file %s\n",alph); free(code); return DEFAULT_ERROR_CODE; } } strcpy(tokens,argv[(options.vars()->optind++)]); if (output[0]=='\0') { get_path(tokens,output); strcat(output,"keywords.txt"); } struct string_hash_ptr* keywords=load_tokens_by_freq(tokens,&vec); filter_non_letter_keywords(keywords,alphabet); if (cdic[0]!='\0') { load_compound_words(cdic,&vec,keywords); } for (;options.vars()->optind!=argc;(options.vars()->optind)++) { filter_keywords_with_dic(keywords,argv[options.vars()->optind],&vec,alphabet); } merge_case_equivalent_unknown_words(keywords,alphabet); struct string_hash* forbidden_lemmas=compute_forbidden_lemmas(keywords,code); remove_keywords_with_forbidden_lemma(keywords,forbidden_lemmas); free_string_hash(forbidden_lemmas); vector_ptr* sorted=sort_keywords(keywords); U_FILE* f_output=u_fopen(&vec,output,U_WRITE); if (f_output==NULL) { error("Cannot write in file %s\n",output); free_vector_ptr(sorted,(void(*)(void*))free_KeyWord_list); free_string_hash_ptr(keywords,(void(*)(void*))free_KeyWord_list); free_alphabet(alphabet); free(code); return DEFAULT_ERROR_CODE; } dump_keywords(sorted,f_output); u_fclose(f_output); free_vector_ptr(sorted,(void(*)(void*))free_KeyWord_list); free_string_hash_ptr(keywords,(void(*)(void*))free_KeyWord_list); free_alphabet(alphabet); free(code); return SUCCESS_RETURN_CODE; }
int main_SortTxt(int argc, char* const argv[]) { if (argc == 1) { usage(); return SUCCESS_RETURN_CODE; } struct sort_infos* inf = new_sort_infos(); if(!inf) { return ALLOC_ERROR_CODE; } int mode = DEFAULT; char line_info[FILENAME_MAX] = ""; char sort_order[FILENAME_MAX] = ""; VersatileEncodingConfig vec = { DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT, DEFAULT_ENCODING_OUTPUT, DEFAULT_BOM_OUTPUT }; int val, index = -1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF != (val = options.parse_long(argc, argv, optstring_SortTxt, lopts_SortTxt, &index))) { switch (val) { case 'n': inf->REMOVE_DUPLICATES = 1; break; case 'd': inf->REMOVE_DUPLICATES = 0; break; case 'r': inf->REVERSE = -1; break; case 'o': if (options.vars()->optarg[0] == '\0') { error("You must specify a non empty sort order file name\n"); free_sort_infos(inf); return USAGE_ERROR_CODE; } strcpy(sort_order, options.vars()->optarg); break; case 'l': if (options.vars()->optarg[0] == '\0') { error("You must specify a non empty information file name\n"); free_sort_infos(inf); return USAGE_ERROR_CODE; } strcpy(line_info, options.vars()->optarg); break; case 't': mode = THAI; break; case 'f': inf->factorize_inflectional_codes = 1; break; case 'V': only_verify_arguments = true; break; case 'h': usage(); free_sort_infos(inf); return SUCCESS_RETURN_CODE; case 'k': if (options.vars()->optarg[0] == '\0') { error("Empty input_encoding argument\n"); free_sort_infos(inf); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter( &(vec.mask_encoding_compatibility_input), options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0] == '\0') { error("Empty output_encoding argument\n"); free_sort_infos(inf); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output), &(vec.bom_output), options.vars()->optarg); break; case ':': index == -1 ? error("Missing argument for option -%c\n", options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_SortTxt[index].name); free_sort_infos(inf); return USAGE_ERROR_CODE; case '?': index == -1 ? error("Invalid option -%c\n", options.vars()->optopt) : error("Invalid option --%s\n", options.vars()->optarg); free_sort_infos(inf); return USAGE_ERROR_CODE; } index = -1; } if (options.vars()->optind != argc - 1) { error("Invalid arguments: rerun with --help\n"); free_sort_infos(inf); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory free_sort_infos(inf); return SUCCESS_RETURN_CODE; } if (sort_order[0] != '\0') { read_char_order(&vec, sort_order, inf); } char new_name[FILENAME_MAX]; strcpy(new_name, argv[options.vars()->optind]); strcat(new_name, ".new"); inf->f = u_fopen(&vec, argv[options.vars()->optind], U_READ); if (inf->f == NULL) { error("Cannot open file %s\n", argv[options.vars()->optind]); free_sort_infos(inf); return DEFAULT_ERROR_CODE; } inf->f_out = u_fopen(&vec, new_name, U_WRITE); if (inf->f_out == NULL) { error("Cannot open temporary file %s\n", new_name); u_fclose(inf->f); free_sort_infos(inf); return DEFAULT_ERROR_CODE; } switch (mode) { case DEFAULT: sort(inf); break; case THAI: sort_thai(inf); break; } if (line_info[0] != '\0') { U_FILE* F = u_fopen(&vec, line_info, U_WRITE); if (F == NULL) { error("Cannot write %s\n", line_info); } else { u_fprintf(F, "%d\n", inf->resulting_line_number); u_fclose(F); } } u_fclose(inf->f_out); u_fclose(inf->f); af_remove(argv[options.vars()->optind]); af_rename(new_name, argv[options.vars()->optind]); free_sort_infos(inf); u_printf("Done.\n"); return SUCCESS_RETURN_CODE; }
/** * This is the customized diff program designed to compare grf files. */ int main_SelectOutput(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_SelectOutput,lopts_SelectOutput,&index))) { switch(val) { case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case 'e': case 'o': { enum stdwrite_kind swk = (val == 'o') ? stdwrite_kind_out : stdwrite_kind_err; if (strcmp(options.vars()->optarg,"on") == 0) { SetStdWriteCB(swk,0,NULL,NULL); } else if (strcmp(options.vars()->optarg,"off") == 0) { SetStdWriteCB(swk,1,NULL,NULL); } else { error("Invalid option --%s, must be 'on' or 'off'\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_SelectOutput[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } // keep this for further modifications if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } return SUCCESS_RETURN_CODE; }
int main_Evamb(int argc,char* const argv[]) { if (argc==1) { usage(); return 0; } int val,index=-1; int sentence_number=-1; const char* outfilename=NULL; char output_name_buffer[FILENAME_MAX]=""; Encoding encoding_output = DEFAULT_ENCODING_OUTPUT; int bom_output = DEFAULT_BOM_OUTPUT; int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT; struct OptVars* vars=new_OptVars(); while (EOF!=(val=getopt_long_TS(argc,argv,optstring_Evamb,lopts_Evamb,&index,vars))) { switch(val) { case 's': { char c_foo; if (1!=sscanf(vars->optarg,"%d%c",&sentence_number,&c_foo) || sentence_number<=0) { /* foo is used to check that the sentence number is not like "45gjh" */ fatal_error("Invalid sentence number: %s\n",vars->optarg); } } break; case 'o': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty output file name\n"); } strcpy(output_name_buffer,vars->optarg); outfilename=output_name_buffer; break; case 'k': if (vars->optarg[0]=='\0') { fatal_error("Empty input_encoding argument\n"); } decode_reading_encoding_parameter(&mask_encoding_compatibility_input,vars->optarg); break; case 'q': if (vars->optarg[0]=='\0') { fatal_error("Empty output_encoding argument\n"); } decode_writing_encoding_parameter(&encoding_output,&bom_output,vars->optarg); break; case 'h': usage(); return 0; case ':': if (index==-1) fatal_error("Missing argument for option -%c\n",vars->optopt); else fatal_error("Missing argument for option --%s\n",lopts_Evamb[index].name); case '?': if (index==-1) fatal_error("Invalid option -%c\n",vars->optopt); else fatal_error("Invalid option --%s\n",vars->optarg); break; } index=-1; } if (vars->optind!=argc-1) { fatal_error("Invalid arguments: rerun with --help\n"); } u_printf("Loading '%s'...\n",argv[vars->optind]); Tfst* tfst=open_text_automaton(argv[vars->optind]); if (tfst==NULL) { fatal_error("Unable to load '%s'\n",argv[vars->optind]); } if (sentence_number>tfst->N) { fatal_error("Invalid sentence number %d: should be in [1;%d]\n",sentence_number,tfst->N); } U_FILE* outfile = (outfilename == NULL) ? U_STDOUT : u_fopen_creating_versatile_encoding(encoding_output,bom_output, outfilename, U_WRITE); if (outfile==NULL) { close_text_automaton(tfst); free_OptVars(vars); error("Cannot create file %s\n",outfilename); return 1; } if (sentence_number==-1) { /* If we have to evaluate the ambiguity rate of the whole automaton */ double lognp_total=0.0; double lmoy_total=0.0; double maxlogamb=0.0; double minlogamb=(double)INT_MAX; /* This is the number of bad automata in the text .fst2 */ int n_bad_automata=0; int maxambno=-1; int minambno=-1; for (sentence_number=1;sentence_number<=tfst->N;sentence_number++) { load_sentence(tfst,sentence_number); SingleGraph graph=tfst->automaton; if (graph->number_of_states==0 || graph->states[0]->outgoing_transitions==NULL) { n_bad_automata++; error("Sentence %d: empty automaton\n",sentence_number); } else { /* log(number of paths) */ double lognp; /* minimum/maximum path length */ int lmin,lmax; /* Approximation of the sentence length */ double lmoy; /* log(ambiguity rate) */ double logamb; lognp=evaluate_ambiguity(graph,&lmin,&lmax); lmoy=(double)(lmin+lmax)/2.0; logamb=lognp/lmoy; if (maxlogamb<logamb) { maxlogamb=logamb; maxambno=sentence_number; } if (minlogamb>logamb) { minlogamb=logamb; minambno=sentence_number; } u_printf("Sentence %d \r",sentence_number); lognp_total=lognp_total+lognp; lmoy_total=lmoy_total+lmoy; } } if (n_bad_automata>=tfst->N) { error("No stats to print because no non-empty sentence automata were found.\n"); } else { u_fprintf(outfile,"%d/%d sentence%s taken into account\n",tfst->N-n_bad_automata,tfst->N,(tfst->N>1)?"s":""); u_fprintf(outfile,"Average ambiguity rate=%.3f\n",exp(lognp_total/lmoy_total)); u_fprintf(outfile,"Minimum ambiguity rate=%.3f (sentence %d)\n",exp(minlogamb),minambno); u_fprintf(outfile,"Maximum ambiguity rate=%.3f (sentence %d)\n",exp(maxlogamb),maxambno); } } else { /* If we have to evaluate the ambiguity rate of a single sentence automaton */ load_sentence(tfst,sentence_number); SingleGraph graph=tfst->automaton; if (graph->number_of_states==0) { error("Sentence %d: empty automaton\n",sentence_number); } else { int min; int max; double lognp=evaluate_ambiguity(graph,&min,&max); double lmoy=(double)(min+max)/2.0; u_fprintf(outfile,"Sentence %d: ambiguity rate=%.3f\n",sentence_number,exp(lognp/lmoy)); } } if (outfile!=U_STDOUT) u_fclose(outfile); close_text_automaton(tfst); free_OptVars(vars); return 0; }
/** * This is the customized diff program designed to compare grf files. */ int main_GrfDiff(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; char output[FILENAME_MAX]=""; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_GrfDiff,lopts_GrfDiff,&index))) { switch(val) { case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case 1: { strcpy(output,options.vars()->optarg); break; } case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_GrfDiff[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-2) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } U_FILE* f=U_STDOUT; if (output[0]!='\0') { /* Since the output is supposed to be a diff-like one, there is no point * in outputing in a variable encoding, so we force UTF8 */ f=u_fopen(UTF8,output,U_WRITE); if (f==NULL) { error("Cannot create file %s\n",output); return DEFAULT_ERROR_CODE; } } Grf* a=load_Grf(&vec,argv[options.vars()->optind]); if (a==NULL) { if (f!=U_STDOUT) { u_fclose(f); } return DEFAULT_ERROR_CODE; } Grf* b=load_Grf(&vec,argv[options.vars()->optind+1]); if (b==NULL) { free_Grf(a); if (f!=U_STDOUT) { u_fclose(f); } return DEFAULT_ERROR_CODE; } GrfDiff* diff=grf_diff(a,b); free_Grf(a); free_Grf(b); print_diff(f,diff); if (f!=U_STDOUT) { u_fclose(f); } int different=diff->diff_ops->nbelems!=0; free_GrfDiff(diff); return different; }
int main_PolyLex(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } int language=-1; char alphabet[FILENAME_MAX]=""; char name_bin[FILENAME_MAX]=""; char output[FILENAME_MAX]=""; char info[FILENAME_MAX]=""; VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_PolyLex,lopts_PolyLex,&index))) { switch(val) { case 'D': language=DUTCH; break; case 'G': language=GERMAN; break; case 'N': language=NORWEGIAN; break; case 'R': language=RUSSIAN; break; case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet file name\n"); return USAGE_ERROR_CODE; } strcpy(alphabet,options.vars()->optarg); break; case 'd': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty dictionary file name\n"); return USAGE_ERROR_CODE; } strcpy(name_bin,options.vars()->optarg); break; case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output file name\n"); return USAGE_ERROR_CODE; } strcpy(output,options.vars()->optarg); break; case 'i': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty information file name\n"); return USAGE_ERROR_CODE; } strcpy(info,options.vars()->optarg); break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_PolyLex[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (name_bin[0]=='\0') { error("You must specify the .bin dictionary to use\n"); return USAGE_ERROR_CODE; } if (output[0]=='\0') { error("You must specify the output dictionary file name\n"); return USAGE_ERROR_CODE; } if (language==-1) { error("You must specify the language\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } Alphabet* alph=NULL; if (alphabet[0]!='\0') { u_printf("Loading alphabet...\n"); alph=load_alphabet(&vec,alphabet); if (alph==NULL) { error("Cannot load alphabet file %s\n",alphabet); return USAGE_ERROR_CODE; } } char name_inf[FILENAME_MAX]; struct string_hash* forbiddenWords=NULL; if (language==DUTCH || language==NORWEGIAN) { get_path(name_bin,name_inf); strcat(name_inf,"ForbiddenWords.txt"); forbiddenWords=load_key_list(&vec,name_inf); if (forbiddenWords==NULL) { /* If there was no file, we don't want to block the process */ forbiddenWords=new_string_hash(DONT_USE_VALUES); } } strcpy(name_inf,name_bin); name_inf[strlen(name_bin)-3]='\0'; strcat(name_inf,"inf"); Dictionary* d=new_Dictionary(&vec,name_bin,name_inf); if (d==NULL) { error("Cannot load dictionary %s\n",name_bin); free_string_hash(forbiddenWords); free_alphabet(alph); return DEFAULT_ERROR_CODE; } char tmp[FILENAME_MAX]; strcpy(tmp,argv[options.vars()->optind]); strcat(tmp,".tmp"); U_FILE* words=u_fopen(&vec,argv[options.vars()->optind],U_READ); if (words==NULL) { error("Cannot open word list file %s\n",argv[options.vars()->optind]); free_Dictionary(d); free_string_hash(forbiddenWords); free_alphabet(alph); // here we return 0 in order to do not block the preprocessing // in the Unitex/GramLab IDE interface, if no dictionary was applied // so that there is no "err" file return SUCCESS_RETURN_CODE; } U_FILE* new_unknown_words=u_fopen(&vec,tmp,U_WRITE); if (new_unknown_words==NULL) { error("Cannot open temporary word list file %s\n",tmp); u_fclose(words); free_Dictionary(d); free_string_hash(forbiddenWords); free_alphabet(alph); return DEFAULT_ERROR_CODE; } U_FILE* res=u_fopen(&vec,output,U_APPEND); if (res==NULL) { error("Cannot open result file %s\n",output); u_fclose(new_unknown_words); u_fclose(words); free_Dictionary(d); free_string_hash(forbiddenWords); free_alphabet(alph); u_fclose(words); return DEFAULT_ERROR_CODE; } U_FILE* debug=NULL; if ((*info)!='\0') { debug=u_fopen(&vec,info,U_WRITE); if (debug==NULL) { error("Cannot open debug file %s\n",info); } } struct utags UTAG; switch(language) { case DUTCH: analyse_dutch_unknown_words(alph, d, words, res, debug, new_unknown_words, forbiddenWords); break; case GERMAN: analyse_german_compounds(alph, d, words, res, debug, new_unknown_words); break; case NORWEGIAN: analyse_norwegian_unknown_words(alph, d, words, res, debug, new_unknown_words, forbiddenWords); break; case RUSSIAN: init_russian(&UTAG); analyse_compounds(alph, d, words, res, debug, new_unknown_words, UTAG); break; } free_alphabet(alph); free_Dictionary(d); u_fclose(words); u_fclose(new_unknown_words); free_string_hash(forbiddenWords); af_remove(argv[options.vars()->optind]); af_rename(tmp,argv[options.vars()->optind]); u_fclose(res); if (debug!=NULL) { u_fclose(debug); } return SUCCESS_RETURN_CODE; }
/** * The same than main, but no call to setBufferMode. */ int main_Concord(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } int val,index=-1; struct conc_opt* concord_options = new_conc_opt(); char foo; VersatileEncodingConfig vec=VEC_DEFAULT; int ret; char offset_file[FILENAME_MAX]=""; char PRLG[FILENAME_MAX]=""; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_Concord,lopts_Concord,&index))) { switch(val) { case 'f': if (options.vars()->optarg[0]=='\0') { error("Empty font name argument\n"); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } concord_options->fontname=strdup(options.vars()->optarg); if (concord_options->fontname==NULL) { alloc_error("main_Concord"); free_conc_opt(concord_options); return ALLOC_ERROR_CODE; } break; case 's': if (1!=sscanf(options.vars()->optarg,"%d%c",&(concord_options->fontsize),&foo)) { /* foo is used to check that the font size is not like "45gjh" */ error("Invalid font size argument: %s\n",options.vars()->optarg); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } break; case 'l': ret=sscanf(options.vars()->optarg,"%d%c%c",&(concord_options->left_context),&foo,&foo); if (ret==0 || ret==3 || (ret==2 && foo!='s') || concord_options->left_context<0) { error("Invalid left context argument: %s\n",options.vars()->optarg); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } if (ret==2) { concord_options->left_context_until_eos=1; } break; case 'r': ret=sscanf(options.vars()->optarg,"%d%c%c",&(concord_options->right_context),&foo,&foo); if (ret==0 || ret==3 || (ret==2 && foo!='s') || concord_options->right_context<0) { error("Invalid right context argument: %s\n",options.vars()->optarg); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } if (ret==2) { concord_options->right_context_until_eos=1; } break; case 'L': concord_options->convLFtoCRLF=0; break; case 0: concord_options->sort_mode=TEXT_ORDER; break; case 1: concord_options->sort_mode=LEFT_CENTER; break; case 2: concord_options->sort_mode=LEFT_RIGHT; break; case 3: concord_options->sort_mode=CENTER_LEFT; break; case 4: concord_options->sort_mode=CENTER_RIGHT; break; case 5: concord_options->sort_mode=RIGHT_LEFT; break; case 6: concord_options->sort_mode=RIGHT_CENTER; break; case 7: concord_options->result_mode=DIFF_; break; case 8: concord_options->only_ambiguous=1; break; case 9: { strcpy(PRLG,options.vars()->optarg); char* pos=strchr(PRLG,','); if (pos==NULL || pos==PRLG || *(pos+1)=='\0') { error("Invalid argument for option --PRLG: %s\n",options.vars()->optarg); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } *pos='\0'; strcpy(offset_file,pos+1); break; } case 10: concord_options->only_matches=1; break; case 11: concord_options->result_mode=LEMMATIZE_; break; case 12: concord_options->result_mode=CSV_; break; case 'H': concord_options->result_mode=HTML_; break; case 't': { concord_options->result_mode=TEXT_; if (options.vars()->optarg!=NULL) { strcpy(concord_options->output,options.vars()->optarg); } break; } case 'g': concord_options->result_mode=GLOSSANET_; if (options.vars()->optarg[0]=='\0') { error("Empty glossanet script argument\n"); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } concord_options->script=strdup(options.vars()->optarg); if (concord_options->script==NULL) { alloc_error("main_Concord"); free_conc_opt(concord_options); return ALLOC_ERROR_CODE; } break; case 'p': concord_options->result_mode=SCRIPT_; if (options.vars()->optarg[0]=='\0') { error("Empty script argument\n"); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } concord_options->script=strdup(options.vars()->optarg); if (concord_options->script==NULL) { alloc_error("main_Concord"); free_conc_opt(concord_options); return ALLOC_ERROR_CODE; } break; case 'i': concord_options->result_mode=INDEX_; break; case 'u': concord_options->result_mode=UIMA_; if (options.vars()->optarg!=NULL) { strcpy(offset_file,options.vars()->optarg); } concord_options->original_file_offsets=1; break; case 'e': concord_options->result_mode=XML_; if (options.vars()->optarg!=NULL) { strcpy(offset_file, options.vars()->optarg); concord_options->original_file_offsets=1; } break; case 'w': concord_options->result_mode=XML_WITH_HEADER_; if (options.vars()->optarg!=NULL) { strcpy(offset_file, options.vars()->optarg); concord_options->original_file_offsets = 1; } break; case '$': if (options.vars()->optarg[0]=='\0') { error("Empty input_offsets argument\n"); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } strcpy(concord_options->input_offsets,options.vars()->optarg); break; case '@': if (options.vars()->optarg[0]=='\0') { error("Empty output_offsets argument\n"); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } strcpy(concord_options->output_offsets,options.vars()->optarg); break; case 'A': concord_options->result_mode=AXIS_; break; case 'x': concord_options->result_mode=XALIGN_; break; case 'm': concord_options->result_mode=MERGE_; if (options.vars()->optarg[0]=='\0') { error("Empty output file name argument\n"); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } strcpy(concord_options->output,options.vars()->optarg); break; case 'a': if (options.vars()->optarg[0]=='\0') { error("Empty alphabet argument\n"); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } concord_options->sort_alphabet=strdup(options.vars()->optarg); if (concord_options->sort_alphabet==NULL) { alloc_error("main_Concord"); free_conc_opt(concord_options); return ALLOC_ERROR_CODE; } break; case 'T': concord_options->thai_mode=1; break; case 'd': if (options.vars()->optarg[0]=='\0') { error("Empty snt directory argument\n"); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } strcpy(concord_options->working_directory,options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); free_conc_opt(concord_options); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_Concord[index].name); free_conc_opt(concord_options); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); free_conc_opt(concord_options); return USAGE_ERROR_CODE; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } if (concord_options->fontname==NULL || concord_options->fontsize<=0) { if (concord_options->result_mode==HTML_ || concord_options->result_mode==GLOSSANET_) { error("The specified output mode is an HTML file: you must specify font parameters\n"); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } } if (only_verify_arguments) { // freeing all allocated memory free_conc_opt(concord_options); return SUCCESS_RETURN_CODE; } U_FILE* concor=u_fopen(&vec,argv[options.vars()->optind],U_READ); if (concor==NULL) { error("Cannot open concordance index file %s\n",argv[options.vars()->optind]); free_conc_opt(concord_options); return DEFAULT_ERROR_CODE; } if (concord_options->working_directory[0]=='\0') { get_path(argv[options.vars()->optind],concord_options->working_directory); } if (concord_options->only_matches) { concord_options->left_context=0; concord_options->right_context=0; } /* We compute the name of the files associated to the text */ struct snt_files* snt_files=new_snt_files_from_path(concord_options->working_directory); ABSTRACTMAPFILE* text=af_open_mapfile(snt_files->text_cod,MAPFILE_OPTION_READ,0); if (text==NULL) { error("Cannot open file %s\n",snt_files->text_cod); free_snt_files(snt_files); u_fclose(concor); free_conc_opt(concord_options); return DEFAULT_ERROR_CODE; } struct text_tokens* tok=load_text_tokens(&vec,snt_files->tokens_txt); if (tok==NULL) { error("Cannot load text token file %s\n",snt_files->tokens_txt); af_close_mapfile(text); free_snt_files(snt_files); u_fclose(concor); free_conc_opt(concord_options); return DEFAULT_ERROR_CODE; } U_FILE* f_enter=u_fopen(BINARY,snt_files->enter_pos,U_READ); int n_enter_char=0; int* enter_pos=NULL; /* New lines are encoded in 'enter.pos' files. Those files will disappear in the future */ if (f_enter==NULL) { error("Cannot open file %s\n",snt_files->enter_pos); } else { long size=get_file_size(f_enter); enter_pos=(int*)malloc(size); if (enter_pos==NULL) { alloc_error("main_Concord"); u_fclose(f_enter); free_text_tokens(tok); af_close_mapfile(text); free_snt_files(snt_files); u_fclose(concor); free_conc_opt(concord_options); return ALLOC_ERROR_CODE; } n_enter_char=(int)fread(enter_pos,sizeof(int),size/sizeof(int),f_enter); if (n_enter_char!=(int)(size/sizeof(int))) { error("Read error on enter.pos file in main_Concord\n"); u_fclose(f_enter); free(enter_pos); free_text_tokens(tok); af_close_mapfile(text); free_snt_files(snt_files); u_fclose(concor); free_conc_opt(concord_options); return DEFAULT_ERROR_CODE; } u_fclose(f_enter); } if (concord_options->result_mode==INDEX_ || concord_options->result_mode==UIMA_ || concord_options->result_mode==XML_ || concord_options->result_mode==XML_WITH_HEADER_ || concord_options->result_mode==AXIS_) { /* We force some options for index, uima and axis files */ concord_options->left_context=0; concord_options->right_context=0; concord_options->sort_mode=TEXT_ORDER; } if (concord_options->only_ambiguous && concord_options->result_mode!=LEMMATIZE_) { /* We force text order when displaying only ambiguous outputs */ concord_options->sort_mode=TEXT_ORDER; } if (concord_options->result_mode==HTML_ || concord_options->result_mode==DIFF_ || concord_options->result_mode==LEMMATIZE_) { /* We need the offset file if and only if we have to produce * an html concordance with positions in .snt file */ concord_options->snt_offsets=load_snt_offsets(snt_files->snt_offsets_pos); if (concord_options->snt_offsets==NULL) { error("Cannot read snt offset file %s\n",snt_files->snt_offsets_pos); free(enter_pos); free_text_tokens(tok); af_close_mapfile(text); free_snt_files(snt_files); u_fclose(concor); free_conc_opt(concord_options); return DEFAULT_ERROR_CODE; } } if (offset_file[0]!='\0') { concord_options->uima_offsets=load_uima_offsets(&vec,offset_file); if (concord_options->uima_offsets==NULL) { error("Cannot read offset file %s\n",offset_file); free(enter_pos); free_text_tokens(tok); af_close_mapfile(text); free_snt_files(snt_files); u_fclose(concor); free_conc_opt(concord_options); return DEFAULT_ERROR_CODE; } } if (PRLG[0]!='\0') { concord_options->PRLG_data=load_PRLG_data(&vec,PRLG); if (concord_options->PRLG_data==NULL) { error("Cannot read PRLG file %s\n",PRLG); free(enter_pos); free_text_tokens(tok); af_close_mapfile(text); free_snt_files(snt_files); u_fclose(concor); free_conc_opt(concord_options); return DEFAULT_ERROR_CODE; } } if (concord_options->result_mode==CSV_) { concord_options->sort_mode=TEXT_ORDER; concord_options->only_matches=1; } /* Once we have set all parameters, we call the function that * will actually create the concordance. */ create_concordance(&vec,concor,text,tok,n_enter_char,enter_pos,concord_options); free(enter_pos); free_text_tokens(tok); af_close_mapfile(text); free_snt_files(snt_files); u_fclose(concor); free_conc_opt(concord_options); u_printf("Done.\n"); return SUCCESS_RETURN_CODE; }
int main_Uncompress(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; char output[FILENAME_MAX]=""; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_Uncompress,lopts_Uncompress,&index))) { switch(val) { case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output file name\n"); return USAGE_ERROR_CODE; } strcpy(output,options.vars()->optarg); break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_Uncompress[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } if (output[0]=='\0') { remove_extension(argv[options.vars()->optind],output); strcat(output,".dic"); } U_FILE* f=u_fopen(&vec,output,U_WRITE); if (f==NULL) { error("Cannot open file %s\n",output); return DEFAULT_ERROR_CODE; } char inf_file[FILENAME_MAX]; remove_extension(argv[options.vars()->optind],inf_file); strcat(inf_file,".inf"); u_printf("Uncompressing %s...\n",argv[options.vars()->optind]); Dictionary* d=new_Dictionary(&vec,argv[options.vars()->optind],inf_file); if (d!=NULL) { rebuild_dictionary(d,f); } u_fclose(f); free_Dictionary(d); u_printf("Done.\n"); return SUCCESS_RETURN_CODE; }
int main_RebuildTfst(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } VersatileEncodingConfig vec=VEC_DEFAULT; int val, index=-1; bool only_verify_arguments = false; UnitexGetOpt options; int save_statistics=1; while (EOF!=(val=options.parse_long(argc,argv,optstring_RebuildTfst,lopts_RebuildTfst,&index))) { switch (val) { case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'S': save_statistics = 0; break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n", options.vars()->optopt) : error("Missing argument for option --%s\n", lopts_RebuildTfst[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n", options.vars()->optopt) : error("Invalid option --%s\n", options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } char input_tfst[FILENAME_MAX]; char input_tind[FILENAME_MAX]; strcpy(input_tfst,argv[options.vars()->optind]); remove_extension(input_tfst,input_tind); strcat(input_tind,".tind"); u_printf("Loading %s...\n",input_tfst); Tfst* tfst = open_text_automaton(&vec,input_tfst); if (tfst==NULL) { error("Unable to load %s automaton\n",input_tfst); return DEFAULT_ERROR_CODE; } char basedir[FILENAME_MAX]; get_path(input_tfst,basedir); char output_tfst[FILENAME_MAX]; sprintf(output_tfst, "%s.new.tfst",input_tfst); char output_tind[FILENAME_MAX]; sprintf(output_tind, "%s.new.tind",input_tfst); U_FILE* f_tfst; if ((f_tfst = u_fopen(&vec,output_tfst,U_WRITE)) == NULL) { error("Unable to open %s for writing\n", output_tfst); close_text_automaton(tfst); return DEFAULT_ERROR_CODE; } U_FILE* f_tind; if ((f_tind = u_fopen(BINARY,output_tind,U_WRITE)) == NULL) { u_fclose(f_tfst); close_text_automaton(tfst); error("Unable to open %s for writing\n", output_tind); return DEFAULT_ERROR_CODE; } /* We use this hash table to rebuild files tfst_tags_by_freq/alph.txt */ struct hash_table* form_frequencies=new_hash_table((HASH_FUNCTION)hash_unichar,(EQUAL_FUNCTION)u_equal, (FREE_FUNCTION)free,NULL,(KEYCOPY_FUNCTION)keycopy); u_fprintf(f_tfst,"%010d\n",tfst->N); for (int i = 1; i <= tfst->N; i++) { if ((i % 100) == 0) { u_printf("%d/%d sentences rebuilt...\n", i, tfst->N); } load_sentence(tfst,i); char grfname[FILENAME_MAX]; sprintf(grfname, "%ssentence%d.grf", basedir, i); unichar** tags=NULL; int n_tags=-1; if (fexists(grfname)) { /* If there is a .grf for the current sentence, then we must * take it into account */ if (0==pseudo_main_Grf2Fst2(&vec,grfname,0,NULL,1,1,NULL,NULL,0)) { /* We proceed only if the graph compilation was a success */ char fst2name[FILENAME_MAX]; sprintf(fst2name, "%ssentence%d.fst2", basedir, i); struct FST2_free_info fst2_free; Fst2* fst2=load_abstract_fst2(&vec,fst2name,0,&fst2_free); af_remove(fst2name); free_SingleGraph(tfst->automaton,NULL); tfst->automaton=create_copy_of_fst2_subgraph(fst2,1); tags=create_tfst_tags(fst2,&n_tags); free_abstract_Fst2(fst2,&fst2_free); } else { error("Error: %s is not a valid sentence automaton\n",grfname); } } save_current_sentence(tfst,f_tfst,f_tind,tags,n_tags,form_frequencies); if (tags!=NULL) { /* If necessary, we free the tags we created */ for (int count_tags=0;count_tags<n_tags;count_tags++) { free(tags[count_tags]); } free(tags); } } u_printf("Text automaton rebuilt.\n"); u_fclose(f_tind); u_fclose(f_tfst); close_text_automaton(tfst); /* Finally, we save statistics */ if (save_statistics) { char tfst_tags_by_freq[FILENAME_MAX]; char tfst_tags_by_alph[FILENAME_MAX]; strcpy(tfst_tags_by_freq, basedir); strcat(tfst_tags_by_freq, "tfst_tags_by_freq.txt"); strcpy(tfst_tags_by_alph, basedir); strcat(tfst_tags_by_alph, "tfst_tags_by_alph.txt"); U_FILE* f_tfst_tags_by_freq = u_fopen(&vec, tfst_tags_by_freq, U_WRITE); if (f_tfst_tags_by_freq == NULL) { error("Cannot open %s\n", tfst_tags_by_freq); } U_FILE* f_tfst_tags_by_alph = u_fopen(&vec, tfst_tags_by_alph, U_WRITE); if (f_tfst_tags_by_alph == NULL) { error("Cannot open %s\n", tfst_tags_by_alph); } sort_and_save_tfst_stats(form_frequencies, f_tfst_tags_by_freq, f_tfst_tags_by_alph); u_fclose(f_tfst_tags_by_freq); u_fclose(f_tfst_tags_by_alph); } free_hash_table(form_frequencies); /* make a backup and replace old automaton with new */ char backup_tfst[FILENAME_MAX]; char backup_tind[FILENAME_MAX]; sprintf(backup_tfst,"%s.bck",input_tfst); sprintf(backup_tind,"%s.bck",input_tind); /* We remove the existing backup files, if any */ af_remove(backup_tfst); af_remove(backup_tind); af_rename(input_tfst,backup_tfst); af_rename(input_tind,backup_tind); af_rename(output_tfst,input_tfst); af_rename(output_tind,input_tind); u_printf("\nYou can find a backup of the original files in:\n %s\nand %s\n", backup_tfst,backup_tind); return SUCCESS_RETURN_CODE; }
/* * This function behaves in the same way that a main one, except that it does * not invoke the setBufferMode function. */ int main_LocateTfst(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; char text[FILENAME_MAX]=""; char alphabet[FILENAME_MAX]=""; int is_korean=0; int tilde_negation_operator=1; int selected_negation_operator=0; int tagging=0; int single_tags_only=0; int match_word_boundaries=1; MatchPolicy match_policy=LONGEST_MATCHES; OutputPolicy output_policy=IGNORE_OUTPUTS; AmbiguousOutputPolicy ambiguous_output_policy=ALLOW_AMBIGUOUS_OUTPUTS; VariableErrorPolicy variable_error_policy=IGNORE_VARIABLE_ERRORS; int search_limit=NO_MATCH_LIMIT; char foo; vector_ptr* injected=new_vector_ptr(); bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_LocateTfst,lopts_LocateTfst,&index))) { switch(val) { case 't': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty .tfst name\n"); free_vector_ptr(injected); return USAGE_ERROR_CODE; } strcpy(text,options.vars()->optarg); break; case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet name\n"); free_vector_ptr(injected); return USAGE_ERROR_CODE; } strcpy(alphabet,options.vars()->optarg); break; case 'K': is_korean=1; match_word_boundaries=0; break; case 'l': search_limit=NO_MATCH_LIMIT; break; case 'g': if (options.vars()->optarg[0]=='\0') { error("You must specify an argument for negation operator\n"); free_vector_ptr(injected); return USAGE_ERROR_CODE; } selected_negation_operator=1; if ((strcmp(options.vars()->optarg,"minus")==0) || (strcmp(options.vars()->optarg,"-")==0)) { tilde_negation_operator=0; } else if ((strcmp(options.vars()->optarg,"tilde")!=0) && (strcmp(options.vars()->optarg,"~")!=0)) { error("You must specify a valid argument for negation operator\n"); free_vector_ptr(injected); return USAGE_ERROR_CODE; } break; case 'n': if (1!=sscanf(options.vars()->optarg,"%d%c",&search_limit,&foo) || search_limit<=0) { /* foo is used to check that the search limit is not like "45gjh" */ error("Invalid search limit argument: %s\n",options.vars()->optarg); free_vector_ptr(injected); return USAGE_ERROR_CODE; } break; case 'S': match_policy=SHORTEST_MATCHES; break; case 'L': match_policy=LONGEST_MATCHES; break; case 'A': match_policy=ALL_MATCHES; break; case 'I': output_policy=IGNORE_OUTPUTS; break; case 'M': output_policy=MERGE_OUTPUTS; break; case 'R': output_policy=REPLACE_OUTPUTS; break; case 'X': variable_error_policy=EXIT_ON_VARIABLE_ERRORS; break; case 'Y': variable_error_policy=IGNORE_VARIABLE_ERRORS; break; case 'Z': variable_error_policy=BACKTRACK_ON_VARIABLE_ERRORS; break; case 'b': ambiguous_output_policy=ALLOW_AMBIGUOUS_OUTPUTS; break; case 'z': ambiguous_output_policy=IGNORE_AMBIGUOUS_OUTPUTS; break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case 1: tagging=1; break; case 2: single_tags_only=1; break; case 3: match_word_boundaries=0; break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); free_vector_ptr(injected); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); free_vector_ptr(injected); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'v': { unichar* key=u_strdup(options.vars()->optarg); unichar* value=u_strchr(key,'='); if (value==NULL) { error("Invalid variable injection: %s\n",options.vars()->optarg); free_vector_ptr(injected); return USAGE_ERROR_CODE; } (*value)='\0'; value++; value=u_strdup(value); vector_ptr_add(injected,key); vector_ptr_add(injected,value); break; } case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_LocateTfst[index].name); free_vector_ptr(injected); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); free_vector_ptr(injected); return USAGE_ERROR_CODE; break; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); free_vector_ptr(injected); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory free_vector_ptr(injected); return SUCCESS_RETURN_CODE; } if (selected_negation_operator==0) { get_graph_compatibility_mode_by_file(&vec,&tilde_negation_operator); } char grammar[FILENAME_MAX]; char output[FILENAME_MAX]; strcpy(grammar,argv[options.vars()->optind]); get_path(text,output); strcat(output,"concord.ind"); int OK=locate_tfst(text, grammar, alphabet, output, &vec, match_policy, output_policy, ambiguous_output_policy, variable_error_policy, search_limit, is_korean, tilde_negation_operator, injected, tagging, single_tags_only, match_word_boundaries); free_vector_ptr(injected); return (!OK); }
int main_CheckDic(int argc,char* const argv[]) { if (argc==1) { usage(); return 0; } int is_a_DELAF=-1; int strict_unprotected=0; int skip_path=0; char alph[FILENAME_MAX]=""; Encoding encoding_output = DEFAULT_ENCODING_OUTPUT; int bom_output = DEFAULT_BOM_OUTPUT; int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT; int val,index=-1; int space_warnings=1; struct OptVars* vars=new_OptVars(); while (EOF!=(val=getopt_long_TS(argc,argv,optstring_CheckDic,lopts_CheckDic,&index,vars))) { switch(val) { case 'f': is_a_DELAF=1; break; case 's': is_a_DELAF=0; break; case 'h': usage(); return 0; case 'r': strict_unprotected=1; break; case 't': strict_unprotected=0; break; case 'n': space_warnings=0; break; case 'p': skip_path=1; break; case 'a': if (vars->optarg[0]=='\0') { fatal_error("Empty alphabet argument\n"); } strcpy(alph,vars->optarg); break; case 'k': if (vars->optarg[0]=='\0') { fatal_error("Empty input_encoding argument\n"); } decode_reading_encoding_parameter(&mask_encoding_compatibility_input,vars->optarg); break; case 'q': if (vars->optarg[0]=='\0') { fatal_error("Empty output_encoding argument\n"); } decode_writing_encoding_parameter(&encoding_output,&bom_output,vars->optarg); break; case ':': if (index==-1) fatal_error("Missing argument for option -%c\n",vars->optopt); else fatal_error("Missing argument for option --%s\n",lopts_CheckDic[index].name); case '?': if (index==-1) fatal_error("Invalid option -%c\n",vars->optopt); else fatal_error("Invalid option --%s\n",vars->optarg); break; } index=-1; } if (is_a_DELAF==-1 || vars->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return 1; } U_FILE* dic=u_fopen_existing_versatile_encoding(mask_encoding_compatibility_input,argv[vars->optind],U_READ); if (dic==NULL) { fatal_error("Cannot open dictionary %s\n",argv[vars->optind]); } Alphabet* alphabet0=NULL; if (alph[0]!='\0') { alphabet0=load_alphabet(alph,1); } char output_filename[FILENAME_MAX]; get_path(argv[vars->optind],output_filename); strcat(output_filename,"CHECK_DIC.TXT"); U_FILE* out=u_fopen_versatile_encoding(encoding_output,bom_output,mask_encoding_compatibility_input,output_filename,U_WRITE); if (out==NULL) { u_fclose(dic); fatal_error("Cannot create %s\n",output_filename); } u_printf("Checking %s...\n",argv[vars->optind]); unichar line[CHECKDIC_LINE_SIZE]; int line_number=1; /* * We declare and initialize an array in order to know which * letters are used in the dictionary. */ int i; char* alphabet=(char*)malloc(sizeof(char)*MAX_NUMBER_OF_UNICODE_CHARS); if (alphabet==NULL) { fatal_alloc_error("CheckDic's main"); } memset(alphabet,0,sizeof(char)*MAX_NUMBER_OF_UNICODE_CHARS); /* * We use two structures for the storage of the codes found in the * dictionary. Note that 'semantic_codes' is used to store both grammatical and * semantic codes. */ struct string_hash* semantic_codes=new_string_hash(); struct string_hash* inflectional_codes=new_string_hash(); struct string_hash* simple_lemmas=new_string_hash(DONT_USE_VALUES); struct string_hash* compound_lemmas=new_string_hash(DONT_USE_VALUES); int n_simple_entries=0; int n_compound_entries=0; /* * We read all the lines and check them. */ while (EOF!=u_fgets_limit2(line,DIC_LINE_SIZE,dic)) { if (line[0]=='\0') { /* If we have an empty line, we print a unicode error message * into the output file */ u_fprintf(out,"Line %d: empty line\n",line_number); } else if (line[0]=='/') { /* If a line starts with '/', it is a commment line, so * we ignore it */ } else { /* If we have a line to check, we check it according to the * dictionary type */ check_DELA_line(line,out,is_a_DELAF,line_number,alphabet,semantic_codes, inflectional_codes,simple_lemmas,compound_lemmas, &n_simple_entries,&n_compound_entries,alphabet0,strict_unprotected); } /* At regular intervals, we display a message on the standard * output to show that the program is working */ if (line_number%10000==0) { u_printf("%d lines read...\r",line_number); } line_number++; } u_printf("%d lines read\n",line_number-1); u_fclose(dic); /* * Once we have checked all the lines, we print some informations * in the output file. */ u_fprintf(out,"-----------------------------------\n"); u_fprintf(out,"------------- Stats -------------\n"); u_fprintf(out,"-----------------------------------\n"); if (skip_path != 0) { char filename_without_path[FILENAME_MAX]; remove_path(argv[vars->optind],filename_without_path); u_fprintf(out,"File: %s\n",filename_without_path); } else { u_fprintf(out,"File: %s\n",argv[vars->optind]); } u_fprintf(out,"Type: %s\n",is_a_DELAF?"DELAF":"DELAS"); u_fprintf(out,"%d line%s read\n",line_number-1,(line_number-1>1)?"s":""); u_fprintf(out,"%d simple entr%s ",n_simple_entries,(n_simple_entries>1)?"ies":"y"); u_fprintf(out,"for %d distinct lemma%s\n",simple_lemmas->size,(simple_lemmas->size>1)?"s":""); u_fprintf(out,"%d compound entr%s ",n_compound_entries,(n_compound_entries>1)?"ies":"y"); u_fprintf(out,"for %d distinct lemma%s\n",compound_lemmas->size,(compound_lemmas->size>1)?"s":""); /** * We print the list of the characters that are used, with * their unicode numbers shown in hexadecimal. This can be useful * to detect different characters that are graphically identical * like 'A' (upper of latin 'a' or upper of greek alpha ?). */ u_fprintf(out,"-----------------------------------\n"); u_fprintf(out,"---- All chars used in forms ----\n"); u_fprintf(out,"-----------------------------------\n"); unichar r[4]; unichar r2[7]; r[1]=' '; r[2]='('; r[3]='\0'; r2[5]='\n'; r2[6]='\0'; for (i=0;i<MAX_NUMBER_OF_UNICODE_CHARS;i++) { if (alphabet[i]) { u_fprintf(out,"%C (%04X)\n",i,i); } } /* * Then we print the list of all grammatical and semantic codes used in the * dictionary. If a code contains a non ASCII character, a space or a tabulation, * we print a warning. */ u_fprintf(out,"-------------------------------------------------------------\n"); u_fprintf(out,"---- %3d grammatical/semantic code%s",semantic_codes->size,(semantic_codes->size>1)?"s used in dictionary ----\n":" used in dictionary -----\n"); u_fprintf(out,"-------------------------------------------------------------\n"); unichar comment[2000]; for (i=0;i<semantic_codes->size;i++) { /* We print the code, followed if necessary by a warning */ u_fprintf(out,"%S",semantic_codes->value[i]); if (warning_on_code(semantic_codes->value[i],comment,space_warnings)) { u_fprintf(out," %S",comment); } u_fprintf(out,"\n"); } /* * Finally, we print the list of inflectional codes, * with warnings in the case of non ASCII letters, spaces * or tabulations. */ u_fprintf(out,"-----------------------------------------------------\n"); u_fprintf(out,"---- %3d inflectional code%s",inflectional_codes->size,(inflectional_codes->size>1)?"s used in dictionary ----\n":" used in dictionary -----\n"); u_fprintf(out,"-----------------------------------------------------\n"); for (i=0;i<inflectional_codes->size;i++) { u_fprintf(out,"%S",inflectional_codes->value[i]); if (warning_on_code(inflectional_codes->value[i],comment,space_warnings)) { u_fprintf(out," %S",comment); } u_fprintf(out,"\n"); } u_fclose(out); free_OptVars(vars); u_printf("Done.\n"); /* Note that we don't free anything since it would only waste time */ free(alphabet); if (alphabet0!=NULL) { free_alphabet(alphabet0); } #if (defined(UNITEX_LIBRARY) || defined(UNITEX_RELEASE_MEMORY_AT_EXIT)) /* cleanup for no leak on library */ free_string_hash(semantic_codes); free_string_hash(inflectional_codes); free_string_hash(simple_lemmas); free_string_hash(compound_lemmas); #endif return 0; }