int main_SpellCheck(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; char mode=0; char snt[FILENAME_MAX]=""; char txt[FILENAME_MAX]=""; char output[FILENAME_MAX]=""; char output_set=0; char output_op='A'; SpellCheckConfig config; config.max_errors=1; config.max_SP_INSERT=1; config.max_SP_SUPPR=1; config.max_SP_SWAP=1; config.max_SP_CHANGE=1; for (int i=0;i<N_SPSubOp;i++) { config.score[i]=default_scores[i]; } config.min_length1=4; config.min_length2=6; config.min_length3=12; config.input_op='D'; config.keyboard=NULL; config.allow_uppercase_initial=0; char foo; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_SpellCheck,lopts_SpellCheck,&index))) { switch(val) { case 's': { strcpy(snt,options.vars()->optarg); mode='s'; break; } case 'f': { strcpy(txt,options.vars()->optarg); mode='f'; break; } case 'o': { if (options.vars()->optarg!=NULL) { strcpy(output,options.vars()->optarg); } output_set=1; break; } case 'I': { if (!strcmp(options.vars()->optarg,"D") || !strcmp(options.vars()->optarg,"M") || !strcmp(options.vars()->optarg,"U")) { config.input_op=options.vars()->optarg[0]; } else { error("Invalid argument %s for option --input-op: should in [DMU]\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 'O': { if (!strcmp(options.vars()->optarg,"O") || !strcmp(options.vars()->optarg,"A")) { output_op=options.vars()->optarg[0]; } else { error("Invalid argument %s for option --output-op: should in [OA]\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 1: { config.keyboard=get_Keyboard(options.vars()->optarg); if (config.keyboard==NULL) { error("Invalid argument %s for option --keyboard:\nUse --show-keyboards to see possible values\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 2: { print_available_keyboards(U_STDOUT); return SUCCESS_RETURN_CODE; } case 10: { if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_errors,&foo)) { error("Invalid argument %s for --max-errors: should be an integer >=0\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 11: { if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_INSERT,&foo)) { error("Invalid argument %s for --max-insert: should be an integer >=0\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 12: { if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_SUPPR,&foo)) { error("Invalid argument %s for --max-suppr: should be an integer >=0\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 13: { if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_CHANGE,&foo)) { error("Invalid argument %s for --max-change: should be an integer >=0\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 14: { if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_SWAP,&foo)) { error("Invalid argument %s for --max-swap: should be an integer >=0\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 20: { int* scores=config.score; if (N_SPSubOp!=sscanf(options.vars()->optarg,"%d,%d,%d,%d,%d,%d,%d,%d,%d%c", scores,scores+1,scores+2,scores+3,scores+4,scores+5, scores+6,scores+7,scores+8,&foo)) { error("Invalid argument %s for option --scores. See --help-scores\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 21: { usage_scores(); return SUCCESS_RETURN_CODE; } case 22: { if (3!=sscanf(options.vars()->optarg,"%u,%u,%u%c", &config.min_length1,&config.min_length2,&config.min_length3,&foo)) { error("Invalid argument %s for option --min-lengths\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 23: { if (!strcmp(options.vars()->optarg,"yes")) { config.allow_uppercase_initial=1; } else if (!strcmp(options.vars()->optarg,"no")) { config.allow_uppercase_initial=0; } else { error("Invalid argument %s for option --upper-initial\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_SpellCheck[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind==argc) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (mode==0) { error("You must use either --snt or --file\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } config.n_dics=argc-options.vars()->optind; config.dics=(Dictionary**)malloc(config.n_dics*sizeof(Dictionary*)); if (config.dics==NULL) { alloc_error("main_SpellCheck"); return ALLOC_ERROR_CODE; } for (int i=0;i<config.n_dics;i++) { config.dics[i]=new_Dictionary(&vec,argv[i+options.vars()->optind]); if (config.dics[i]==NULL) { error("Cannot load dictionary %s\n",argv[i+options.vars()->optind]); } } config.out=U_STDOUT; config.n_input_lines=0; config.n_output_lines=0; if (mode=='s') { /* When working with a .snt, we actually want to work on its err file */ get_snt_path(snt,txt); strcat(txt,"err"); /* the output must be dlf, and we note the number of lines in the existing * dlf file, if any */ get_snt_path(snt,output); strcat(output,"dlf.n"); U_FILE* f=u_fopen(&vec,output,U_READ); if (f!=NULL) { u_fscanf(f,"%d",&(config.n_output_lines)); u_fclose(f); } get_snt_path(snt,output); strcat(output,"dlf"); output_set=1; /* and we force the values for -I and -O */ config.input_op='U'; output_op='A'; } else { /* If mode=='f', we don't have anything to do since we already * defined the default output to stdout */ } if (output_set) { if (output_op=='O') { config.out=u_fopen(&vec,output,U_WRITE); } else { config.out=u_fopen(&vec,output,U_APPEND); } if (config.out==NULL) { error("Cannot open output file %s\n",output); for (int i=0;i<config.n_dics;i++) { free_Dictionary(config.dics[i]); } free(config.dics); return DEFAULT_ERROR_CODE; } } config.modified_input=NULL; char modified_input[FILENAME_MAX]=""; if (config.input_op!='D') { strcpy(modified_input,txt); strcat(modified_input,".tmp"); config.modified_input=u_fopen(&vec,modified_input,U_WRITE); if (config.modified_input==NULL) { error("Cannot open tmp file %s\n",modified_input); if (config.out!=U_STDOUT) { u_fclose(config.out); } for (int i=0;i<config.n_dics;i++) { free_Dictionary(config.dics[i]); } free(config.dics); return DEFAULT_ERROR_CODE; } } config.in=u_fopen(&vec,txt,U_READ); if (config.in==NULL) { error("Cannot open file %s\n",txt); u_fclose(config.modified_input); if (config.out!=U_STDOUT) { u_fclose(config.out); } for (int i=0;i<config.n_dics;i++) { free_Dictionary(config.dics[i]); } free(config.dics); return DEFAULT_ERROR_CODE; } /* We perform spellchecking */ spellcheck(&config); /* And we clean */ u_fclose(config.in); if (config.modified_input!=NULL) { /* If we used a tmp file because the input file has to be modified, * it's now time to actually modify it */ u_fclose(config.modified_input); af_remove(txt); af_rename(modified_input,txt); } if (config.out!=U_STDOUT) { u_fclose(config.out); } for (int i=0;i<config.n_dics;i++) { free_Dictionary(config.dics[i]); } free(config.dics); /* Finally, we update the dlf.n and err.n files if mode=='s' */ if (mode=='s') { get_snt_path(snt,output); strcat(output,"err.n"); U_FILE* f=u_fopen(&vec,output,U_WRITE); if (f!=NULL) { u_fprintf(f,"%d",config.n_input_lines); u_fclose(f); } if (config.input_op!='D') { get_snt_path(snt,output); strcat(output,"dlf.n"); U_FILE* fw=u_fopen(&vec,output,U_WRITE); if (fw!=NULL) { u_fprintf(fw,"%d",config.n_output_lines); u_fclose(fw); } } } return SUCCESS_RETURN_CODE; }
int main_Evamb(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } int val,index=-1; int sentence_number=-1; const char* outfilename=NULL; char output_name_buffer[FILENAME_MAX]=""; VersatileEncodingConfig vec=VEC_DEFAULT; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_Evamb,lopts_Evamb,&index))) { switch(val) { case 's': { char c_foo; if (1!=sscanf(options.vars()->optarg,"%d%c",&sentence_number,&c_foo) || sentence_number<=0) { /* foo is used to check that the sentence number is not like "45gjh" */ error("Invalid sentence number: %s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } } break; case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output file name\n"); return USAGE_ERROR_CODE; } strcpy(output_name_buffer,options.vars()->optarg); outfilename=output_name_buffer; break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_Evamb[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } u_printf("Loading '%s'...\n",argv[options.vars()->optind]); Tfst* tfst=open_text_automaton(&vec,argv[options.vars()->optind]); if (tfst==NULL) { error("Unable to load '%s'\n",argv[options.vars()->optind]); return DEFAULT_ERROR_CODE; } if (sentence_number>tfst->N) { error("Invalid sentence number %d: should be in [1;%d]\n",sentence_number,tfst->N); close_text_automaton(tfst); return DEFAULT_ERROR_CODE; } U_FILE* outfile = (outfilename == NULL) ? U_STDOUT : u_fopen(&vec,outfilename,U_WRITE); if (outfile==NULL) { error("Cannot create file %s\n",outfilename); close_text_automaton(tfst); return DEFAULT_ERROR_CODE; } if (sentence_number==-1) { /* If we have to evaluate the ambiguity rate of the whole automaton */ double lognp_total=0.0; double lmoy_total=0.0; double maxlogamb=0.0; double minlogamb=(double)INT_MAX; /* This is the number of bad automata in the text .fst2 */ int n_bad_automata=0; int maxambno=-1; int minambno=-1; for (sentence_number=1;sentence_number<=tfst->N;sentence_number++) { load_sentence(tfst,sentence_number); SingleGraph graph=tfst->automaton; if (graph->number_of_states==0 || graph->states[0]->outgoing_transitions==NULL) { n_bad_automata++; error("Sentence %d: empty automaton\n",sentence_number); } else { /* log(number of paths) */ double lognp; /* minimum/maximum path length */ int lmin,lmax; /* Approximation of the sentence length */ double lmoy; /* log(ambiguity rate) */ double logamb; lognp=evaluate_ambiguity(graph,&lmin,&lmax); lmoy=(double)(lmin+lmax)/2.0; logamb=lognp/lmoy; if (maxlogamb<logamb) { maxlogamb=logamb; maxambno=sentence_number; } if (minlogamb>logamb) { minlogamb=logamb; minambno=sentence_number; } u_printf("Sentence %d \r",sentence_number); lognp_total=lognp_total+lognp; lmoy_total=lmoy_total+lmoy; } } if (n_bad_automata>=tfst->N) { error("No stats to print because no non-empty sentence automata were found.\n"); } else { u_fprintf(outfile,"%d/%d sentence%s taken into account\n",tfst->N-n_bad_automata,tfst->N,(tfst->N>1)?"s":""); u_fprintf(outfile,"Average ambiguity rate=%.3f\n",exp(lognp_total/lmoy_total)); u_fprintf(outfile,"Minimum ambiguity rate=%.3f (sentence %d)\n",exp(minlogamb),minambno); u_fprintf(outfile,"Maximum ambiguity rate=%.3f (sentence %d)\n",exp(maxlogamb),maxambno); } } else { /* If we have to evaluate the ambiguity rate of a single sentence automaton */ load_sentence(tfst,sentence_number); SingleGraph graph=tfst->automaton; if (graph->number_of_states==0) { error("Sentence %d: empty automaton\n",sentence_number); } else { int min; int max; double lognp=evaluate_ambiguity(graph,&min,&max); double lmoy=(double)(min+max)/2.0; u_fprintf(outfile,"Sentence %d: ambiguity rate=%.3f\n",sentence_number,exp(lognp/lmoy)); } } if (outfile!=U_STDOUT) { u_fclose(outfile); } close_text_automaton(tfst); return SUCCESS_RETURN_CODE; }
int main_Fst2Txt(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } struct fst2txt_parameters* p=new_fst2txt_parameters(); char in_offsets[FILENAME_MAX]=""; char out_offsets[FILENAME_MAX]=""; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_Fst2Txt,lopts_Fst2Txt,&index))) { switch(val) { case 't': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty text file name\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } p->input_text_file=strdup(options.vars()->optarg); if (p->input_text_file==NULL) { alloc_error("main_Fst2Txt"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } break; case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty text output file name\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } p->output_text_file=strdup(options.vars()->optarg); p->output_text_file_is_temp=0; if (p->output_text_file==NULL) { alloc_error("main_Fst2Txt"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } break; case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet file name\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } p->alphabet_file=strdup(options.vars()->optarg); if (p->alphabet_file==NULL) { alloc_error("main_Fst2Txt"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } break; case 'M': p->output_policy=MERGE_OUTPUTS; break; case 'R': p->output_policy=REPLACE_OUTPUTS; break; case 'c': p->tokenization_policy=CHAR_BY_CHAR_TOKENIZATION; break; case 'w': p->tokenization_policy=WORD_BY_WORD_TOKENIZATION; break; case 's': p->space_policy=START_WITH_SPACE; break; case 'x': p->space_policy=DONT_START_WITH_SPACE; break; case 'V': only_verify_arguments = true; break; case 'h': usage(); free_fst2txt_parameters(p); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_Fst2Txt[index].name); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(p->vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(p->vec.encoding_output),&(p->vec.bom_output),options.vars()->optarg); break; case '$': if (options.vars()->optarg[0]=='\0') { error("Empty input_offsets argument\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } strcpy(in_offsets,options.vars()->optarg); break; case '@': if (options.vars()->optarg[0]=='\0') { error("Empty output_offsets argument\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } strcpy(out_offsets,options.vars()->optarg); break; case 'l': p->convLFtoCRLF=0; break; case 'r': p->keepCR = 1; break; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } if (p->input_text_file==NULL) { error("You must specify the text file\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory free_fst2txt_parameters(p); return SUCCESS_RETURN_CODE; } if (out_offsets[0]!='\0') { /* We deal with offsets only if the program is expected to produce some */ if (in_offsets[0]!='\0') { p->v_in_offsets=load_offsets(&(p->vec),in_offsets); if (p->v_in_offsets==NULL) { error("Cannot load offset file %s\n",in_offsets); free_fst2txt_parameters(p); return DEFAULT_ERROR_CODE; } } else { /* If there is no input offset file, we create an empty offset vector * in order to avoid testing whether the vector is NULL or not */ p->v_in_offsets=new_vector_offset(1); } p->f_out_offsets=u_fopen(&(p->vec),out_offsets,U_WRITE); if (p->f_out_offsets==NULL) { error("Cannot create file %s\n",out_offsets); free_fst2txt_parameters(p); return DEFAULT_ERROR_CODE; } } if (p->output_text_file == NULL) { char tmp[FILENAME_MAX]; remove_extension(p->input_text_file, tmp); strcat(tmp, ".tmp"); p->output_text_file_is_temp=1; p->output_text_file = strdup(tmp); if (p->output_text_file == NULL) { alloc_error("main_Fst2Txt"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } } p->fst_file=strdup(argv[options.vars()->optind]); if (p->fst_file==NULL) { alloc_error("main_Fst2Txt"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } int result=main_fst2txt(p); free_fst2txt_parameters(p); return result; }
InstallLogger::InstallLogger(int argc,char* const argv[]) : ule(ule_default_init), init_done(0) { ClearUniLoggerSpaceStruct(0); if (argc==1) { usage(); return; } Encoding encoding_output = DEFAULT_ENCODING_OUTPUT; int bom_output = DEFAULT_BOM_OUTPUT; int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_CreateLog,lopts_CreateLog,&index))) { switch(val) { case 'V': only_verify_arguments = true; break; case 'h': usage(); return; case 'n': ule.store_file_in_content = 0; break; case 'i': ule.store_file_in_content = 1; break; case 'o': ule.store_file_out_content = 1; break; case 'u': ule.store_file_out_content = 0; break; case 's': ule.store_list_file_in_content = 1; break; case 't': ule.store_list_file_in_content = 0; break; case 'r': ule.store_list_file_out_content = 1; break; case 'f': ule.store_list_file_out_content = 0; break; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_CreateLog[index].name); return; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return; } decode_reading_encoding_parameter(&mask_encoding_compatibility_input,options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return; } decode_writing_encoding_parameter(&encoding_output,&bom_output,options.vars()->optarg); break; case 'g': ClearUniLoggerSpaceStruct(1); return; case 'p': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty param file\n"); return; } ClearUniLoggerSpaceStruct(1); LoadParamFile(options.vars()->optarg); return; case 'l': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty log filename\n"); return; } if (ule.szNameLog != NULL) { free((void*)ule.szNameLog); } ule.szNameLog = strdup(options.vars()->optarg); break; case 'd': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty directory\n"); return; } if (ule.szPathLog != NULL) { free((void*)ule.szPathLog); } ule.szPathLog = strdup(options.vars()->optarg); break; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return; } index=-1; } if (options.vars()->optind!=argc-1) { } if (only_verify_arguments) { // freeing all allocated memory return; } if (AddActivityLogger(&ule) != 0) { init_done = 1; } else { ClearUniLoggerSpaceStruct(1); } }
int main_Normalize(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } int mode=KEEP_CARRIAGE_RETURN; int separator_normalization=1; char rules[FILENAME_MAX]=""; char input_offsets[FILENAME_MAX]=""; char output_offsets[FILENAME_MAX]=""; VersatileEncodingConfig vec=VEC_DEFAULT; int convLFtoCRLF=1; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_Normalize,lopts_Normalize,&index))) { switch(val) { case 'l': convLFtoCRLF=0; break; case 'n': mode=REMOVE_CARRIAGE_RETURN; break; case 'r': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty replacement rule file name\n"); return USAGE_ERROR_CODE; } strcpy(rules,options.vars()->optarg); break; case 1: separator_normalization=0; break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case '$': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty input offset file name\n"); return USAGE_ERROR_CODE; } strcpy(input_offsets,options.vars()->optarg); break; case '@': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output offset file name\n"); return USAGE_ERROR_CODE; } strcpy(output_offsets,options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_Normalize[index].name); return USAGE_ERROR_CODE; break; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; break; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } vector_offset* v_input_offsets=NULL; vector_offset* v_output_offsets=NULL; U_FILE* f_output_offsets=NULL; if (output_offsets[0]!='\0') { /* We deal with offsets only if we have to produce output offsets */ if (input_offsets[0]!='\0') { v_input_offsets=load_offsets(&vec,input_offsets); } f_output_offsets=u_fopen(&vec, output_offsets, U_WRITE); if (f_output_offsets==NULL) { error("Cannot create offset file %s\n",output_offsets); return DEFAULT_ERROR_CODE; } v_output_offsets=new_vector_offset(); } char tmp_file[FILENAME_MAX]; get_extension(argv[options.vars()->optind],tmp_file); if (!strcmp(tmp_file, ".snt")) { /* If the file to process has already the .snt extension, we temporary rename it to * .snt.normalizing */ strcpy(tmp_file,argv[options.vars()->optind]); strcat(tmp_file,".normalizing"); af_rename(argv[options.vars()->optind],tmp_file); } else { strcpy(tmp_file,argv[options.vars()->optind]); } /* We set the destination file */ char dest_file[FILENAME_MAX]; remove_extension(argv[options.vars()->optind],dest_file); strcat(dest_file,".snt"); u_printf("Normalizing %s...\n",argv[options.vars()->optind]); int return_value = normalize(tmp_file, dest_file, &vec, mode, convLFtoCRLF, rules, v_output_offsets, separator_normalization); u_printf("\n"); /* If we have used a temporary file, we delete it */ if (strcmp(tmp_file,argv[options.vars()->optind])) { af_remove(tmp_file); } process_offsets(v_input_offsets,v_output_offsets,f_output_offsets); u_fclose(f_output_offsets); free_vector_offset(v_input_offsets); free_vector_offset(v_output_offsets); u_printf((return_value==SUCCESS_RETURN_CODE) ? "Done.\n" : "Unsuccessfull.\n"); return return_value; }
int main_MultiFlex(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } char output[FILENAME_MAX]=""; char config_dir[FILENAME_MAX]=""; char alphabet[FILENAME_MAX]=""; char pkgdir[FILENAME_MAX]=""; char* named=NULL; int is_korean=0; // default policy is to compile only out of date graphs GraphRecompilationPolicy graph_recompilation_policy = ONLY_OUT_OF_DATE; //Current language's alphabet int error_check_status=SIMPLE_AND_COMPOUND_WORDS; VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_MultiFlex,lopts_MultiFlex,&index))) { switch(val) { case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty DELAF file name\n"); free(named); return USAGE_ERROR_CODE; } strcpy(output,options.vars()->optarg); break; case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet file name\n"); free(named); return USAGE_ERROR_CODE; } strcpy(alphabet,options.vars()->optarg); break; case 'd': strcpy(config_dir,options.vars()->optarg); break; case 'K': is_korean=1; break; case 's': error_check_status=ONLY_SIMPLE_WORDS; break; case 'c': error_check_status=ONLY_COMPOUND_WORDS; break; case 'f': graph_recompilation_policy = ALWAYS_RECOMPILE; break; case 'n': graph_recompilation_policy = NEVER_RECOMPILE; break; case 't': graph_recompilation_policy = ONLY_OUT_OF_DATE; break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); free(named); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); free(named); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'p': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty package directory name\n"); free(named); return USAGE_ERROR_CODE; } strcpy(pkgdir,options.vars()->optarg); break; case 'r': if (named==NULL) { named=strdup(options.vars()->optarg); if (named==NULL) { alloc_error("main_Grf2Fst2"); return ALLOC_ERROR_CODE; } } else { char* more_names = (char*)realloc((void*)named,strlen(named)+strlen(options.vars()->optarg)+2); if (more_names) { named = more_names; } else { alloc_error("main_MultiFlex"); free(named); return ALLOC_ERROR_CODE; } strcat(named,";"); strcat(named,options.vars()->optarg); } break; case 'V': only_verify_arguments = true; break; case 'h': usage(); free(named); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_MultiFlex[index].name); free(named); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); free(named); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); free(named); return USAGE_ERROR_CODE; } if (output[0]=='\0') { error("You must specify the output DELAF name\n"); free(named); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory free(named); return SUCCESS_RETURN_CODE; } //Load morphology description char morphology[FILENAME_MAX]; new_file(config_dir,"Morphology.txt",morphology); //int config_files_status=CONFIG_FILES_OK; Alphabet* alph=NULL; if (alphabet[0]!='\0') { //Load alphabet alph=load_alphabet(&vec,alphabet,1); //To be done once at the beginning of the inflection if (alph==NULL) { error("Cannot open alphabet file %s\n",alphabet); free(named); return DEFAULT_ERROR_CODE; } } //Init equivalence files char equivalences[FILENAME_MAX]; new_file(config_dir,"Equivalences.txt",equivalences); /* Korean */ Korean* korean=NULL; if (is_korean) { if (alph==NULL) { error("Cannot initialize Korean data with a NULL alphabet\n"); free(named); return DEFAULT_ERROR_CODE; } korean=new Korean(alph); } MultiFlex_ctx* p_multiFlex_ctx=new_MultiFlex_ctx(config_dir, morphology, equivalences, &vec, korean, pkgdir, named, graph_recompilation_policy); //DELAC inflection int return_value = inflect(argv[options.vars()->optind],output,p_multiFlex_ctx,alph,error_check_status); free(named); for (int count_free_fst2=0;count_free_fst2<p_multiFlex_ctx->n_fst2;count_free_fst2++) { free_abstract_Fst2(p_multiFlex_ctx->fst2[count_free_fst2],&(p_multiFlex_ctx->fst2_free[count_free_fst2])); p_multiFlex_ctx->fst2[count_free_fst2] = NULL; } free_alphabet(alph); free_MultiFlex_ctx(p_multiFlex_ctx); if (korean!=NULL) { delete korean; } u_printf("Done.\n"); return return_value; }
int main_PolyLex(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } int language=-1; char alphabet[FILENAME_MAX]=""; char name_bin[FILENAME_MAX]=""; char output[FILENAME_MAX]=""; char info[FILENAME_MAX]=""; VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_PolyLex,lopts_PolyLex,&index))) { switch(val) { case 'D': language=DUTCH; break; case 'G': language=GERMAN; break; case 'N': language=NORWEGIAN; break; case 'R': language=RUSSIAN; break; case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet file name\n"); return USAGE_ERROR_CODE; } strcpy(alphabet,options.vars()->optarg); break; case 'd': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty dictionary file name\n"); return USAGE_ERROR_CODE; } strcpy(name_bin,options.vars()->optarg); break; case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output file name\n"); return USAGE_ERROR_CODE; } strcpy(output,options.vars()->optarg); break; case 'i': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty information file name\n"); return USAGE_ERROR_CODE; } strcpy(info,options.vars()->optarg); break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_PolyLex[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (name_bin[0]=='\0') { error("You must specify the .bin dictionary to use\n"); return USAGE_ERROR_CODE; } if (output[0]=='\0') { error("You must specify the output dictionary file name\n"); return USAGE_ERROR_CODE; } if (language==-1) { error("You must specify the language\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } Alphabet* alph=NULL; if (alphabet[0]!='\0') { u_printf("Loading alphabet...\n"); alph=load_alphabet(&vec,alphabet); if (alph==NULL) { error("Cannot load alphabet file %s\n",alphabet); return USAGE_ERROR_CODE; } } char name_inf[FILENAME_MAX]; struct string_hash* forbiddenWords=NULL; if (language==DUTCH || language==NORWEGIAN) { get_path(name_bin,name_inf); strcat(name_inf,"ForbiddenWords.txt"); forbiddenWords=load_key_list(&vec,name_inf); if (forbiddenWords==NULL) { /* If there was no file, we don't want to block the process */ forbiddenWords=new_string_hash(DONT_USE_VALUES); } } strcpy(name_inf,name_bin); name_inf[strlen(name_bin)-3]='\0'; strcat(name_inf,"inf"); Dictionary* d=new_Dictionary(&vec,name_bin,name_inf); if (d==NULL) { error("Cannot load dictionary %s\n",name_bin); free_string_hash(forbiddenWords); free_alphabet(alph); return DEFAULT_ERROR_CODE; } char tmp[FILENAME_MAX]; strcpy(tmp,argv[options.vars()->optind]); strcat(tmp,".tmp"); U_FILE* words=u_fopen(&vec,argv[options.vars()->optind],U_READ); if (words==NULL) { error("Cannot open word list file %s\n",argv[options.vars()->optind]); free_Dictionary(d); free_string_hash(forbiddenWords); free_alphabet(alph); // here we return 0 in order to do not block the preprocessing // in the Unitex/GramLab IDE interface, if no dictionary was applied // so that there is no "err" file return SUCCESS_RETURN_CODE; } U_FILE* new_unknown_words=u_fopen(&vec,tmp,U_WRITE); if (new_unknown_words==NULL) { error("Cannot open temporary word list file %s\n",tmp); u_fclose(words); free_Dictionary(d); free_string_hash(forbiddenWords); free_alphabet(alph); return DEFAULT_ERROR_CODE; } U_FILE* res=u_fopen(&vec,output,U_APPEND); if (res==NULL) { error("Cannot open result file %s\n",output); u_fclose(new_unknown_words); u_fclose(words); free_Dictionary(d); free_string_hash(forbiddenWords); free_alphabet(alph); u_fclose(words); return DEFAULT_ERROR_CODE; } U_FILE* debug=NULL; if ((*info)!='\0') { debug=u_fopen(&vec,info,U_WRITE); if (debug==NULL) { error("Cannot open debug file %s\n",info); } } struct utags UTAG; switch(language) { case DUTCH: analyse_dutch_unknown_words(alph, d, words, res, debug, new_unknown_words, forbiddenWords); break; case GERMAN: analyse_german_compounds(alph, d, words, res, debug, new_unknown_words); break; case NORWEGIAN: analyse_norwegian_unknown_words(alph, d, words, res, debug, new_unknown_words, forbiddenWords); break; case RUSSIAN: init_russian(&UTAG); analyse_compounds(alph, d, words, res, debug, new_unknown_words, UTAG); break; } free_alphabet(alph); free_Dictionary(d); u_fclose(words); u_fclose(new_unknown_words); free_string_hash(forbiddenWords); af_remove(argv[options.vars()->optind]); af_rename(tmp,argv[options.vars()->optind]); u_fclose(res); if (debug!=NULL) { u_fclose(debug); } return SUCCESS_RETURN_CODE; }
int main_PackFile(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } char junk_prefix[FILENAME_MAX+0x20]=""; char include_filename[FILENAME_MAX+0x20]=""; char global_comment[FILENAME_MAX+0x20]=""; Encoding encoding_output = DEFAULT_ENCODING_OUTPUT; int bom_output = DEFAULT_BOM_OUTPUT; int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT; int val,index=-1; int quiet=0; int add_one_file_only=1; int append=0; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_PackFile,lopts_PackFile,&index))) { switch(val) { case 'm': quiet=1; break; case 'p': add_one_file_only=0; break; case 'a': append=1; break; case 'i': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty include file name\n"); return USAGE_ERROR_CODE; } strcpy(include_filename,options.vars()->optarg); break; case 'j': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty junk prefix file name\n"); return USAGE_ERROR_CODE; } strcpy(junk_prefix,options.vars()->optarg); break; case 'g': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty global comment\n"); return USAGE_ERROR_CODE; } strcpy(global_comment,options.vars()->optarg); break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&mask_encoding_compatibility_input,options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&encoding_output,&bom_output,options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_PackFile[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } const char* ulpFile=argv[options.vars()->optind]; if (ulpFile == NULL) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if ((*ulpFile)=='\0') { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } int retValue = buildPackFile(ulpFile,append, global_comment, include_filename, add_one_file_only, junk_prefix, quiet); if (retValue == 0) { error("Error creating %s\n", ulpFile); return DEFAULT_ERROR_CODE; } else { return SUCCESS_RETURN_CODE; } }
/** * The same than main, but no call to setBufferMode. */ int main_KeyWords(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } VersatileEncodingConfig vec=VEC_DEFAULT; char tokens[FILENAME_MAX]; char output[FILENAME_MAX]=""; char alph[FILENAME_MAX]=""; char cdic[FILENAME_MAX]=""; unichar* code=u_strdup("XXX"); int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_KeyWords,lopts_KeyWords,&index))) { switch(val) { case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output\n"); free(code); return USAGE_ERROR_CODE; } strcpy(output,options.vars()->optarg); break; case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet file name\n"); free(code); return USAGE_ERROR_CODE; } strcpy(alph,options.vars()->optarg); break; case 'f': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty forbidden code\n"); free(code); return USAGE_ERROR_CODE; } free(code); code=u_strdup(options.vars()->optarg); break; case 'c': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty file name\n"); free(code); return USAGE_ERROR_CODE; } strcpy(cdic,options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); free(code); return SUCCESS_RETURN_CODE; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); free(code); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); free(code); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_KeyWords[index].name); free(code); return USAGE_ERROR_CODE; break; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); free(code); return USAGE_ERROR_CODE; break; } index=-1; } if (options.vars()->optind==argc || options.vars()->optind==argc-1) { error("Invalid arguments: rerun with --help\n"); free(code); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory free(code); return SUCCESS_RETURN_CODE; } Alphabet* alphabet=NULL; if (alph[0]!='\0') { alphabet=load_alphabet(&vec,alph); if (alphabet==NULL) { error("Cannot load alphabet file %s\n",alph); free(code); return DEFAULT_ERROR_CODE; } } strcpy(tokens,argv[(options.vars()->optind++)]); if (output[0]=='\0') { get_path(tokens,output); strcat(output,"keywords.txt"); } struct string_hash_ptr* keywords=load_tokens_by_freq(tokens,&vec); filter_non_letter_keywords(keywords,alphabet); if (cdic[0]!='\0') { load_compound_words(cdic,&vec,keywords); } for (;options.vars()->optind!=argc;(options.vars()->optind)++) { filter_keywords_with_dic(keywords,argv[options.vars()->optind],&vec,alphabet); } merge_case_equivalent_unknown_words(keywords,alphabet); struct string_hash* forbidden_lemmas=compute_forbidden_lemmas(keywords,code); remove_keywords_with_forbidden_lemma(keywords,forbidden_lemmas); free_string_hash(forbidden_lemmas); vector_ptr* sorted=sort_keywords(keywords); U_FILE* f_output=u_fopen(&vec,output,U_WRITE); if (f_output==NULL) { error("Cannot write in file %s\n",output); free_vector_ptr(sorted,(void(*)(void*))free_KeyWord_list); free_string_hash_ptr(keywords,(void(*)(void*))free_KeyWord_list); free_alphabet(alphabet); free(code); return DEFAULT_ERROR_CODE; } dump_keywords(sorted,f_output); u_fclose(f_output); free_vector_ptr(sorted,(void(*)(void*))free_KeyWord_list); free_string_hash_ptr(keywords,(void(*)(void*))free_KeyWord_list); free_alphabet(alphabet); free(code); return SUCCESS_RETURN_CODE; }
int main_PersistResource(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } const char *resource_file = NULL; int res_graph=0; int res_alphabet=0; int res_dico=0; int unpersist=0; int verbose=0; VersatileEncodingConfig vec = VEC_DEFAULT; int val,index=-1; bool only_verify_arguments = false; const char*output_file = NULL; const char*resource_type = NULL; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_PersistResource,lopts_PersistResource,&index))) { switch(val) { case 'a': res_alphabet = 1; resource_type = "alphabet"; break; case 'g': res_graph = 1; resource_type = "graph"; break; case 'd': res_dico = 1; resource_type = "dictionary"; break; case 'u': unpersist = 1; break; case 'v': verbose = 1; break; case 'o': if (options.vars()->optarg[0]=='\0') { error("Empty output argument\n"); return USAGE_ERROR_CODE; } output_file = options.vars()->optarg; // FIXME(gvollant) break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt): error("Missing argument for option --%s\n",lopts_PersistResource[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; case 'k': case 'q': /* ignore -k and -q parameter instead to raise an error */ break; } index=-1; } if ((res_graph+res_alphabet+res_dico) != 1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if ((output_file!=NULL) && (unpersist!=0)) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } resource_file = argv[options.vars()->optind]; size_t size_buf_persisted_filename = strlen(resource_file) + 0x200; char* buf_persisted_filename = (char*)malloc(size_buf_persisted_filename +1); if (buf_persisted_filename == NULL) { alloc_error("PersistResource's main"); return ALLOC_ERROR_CODE; } *buf_persisted_filename='\0'; if (unpersist == 0) { int result = 0; if (res_alphabet) result = standard_load_persistence_alphabet(resource_file, buf_persisted_filename, size_buf_persisted_filename); if (res_graph) result = standard_load_persistence_fst2(resource_file, buf_persisted_filename, size_buf_persisted_filename); if (res_dico) result = standard_load_persistence_dictionary(resource_file, buf_persisted_filename, size_buf_persisted_filename); if (result && verbose) u_printf("Success on persist %s resource %s to persisted name %s\n", resource_type, resource_file, buf_persisted_filename); if (!result) error("The %s resource %s cannnot be persisted\n", resource_type, resource_file); if (result && (output_file != NULL)) { U_FILE* text = u_fopen(&vec, output_file, U_WRITE); if (text == NULL) { error("Cannot create text file %s\n", output_file); free(buf_persisted_filename); return DEFAULT_ERROR_CODE; } u_fprintf(text, "%s", buf_persisted_filename); u_fclose(text); } free(buf_persisted_filename); return result ? SUCCESS_RETURN_CODE : DEFAULT_ERROR_CODE; } else { if (res_alphabet) standard_unload_persistence_alphabet(resource_file); if (res_graph) standard_unload_persistence_fst2(resource_file); if (res_dico) standard_unload_persistence_dictionary(resource_file); u_printf("The %s resource %s unpersisted\n", resource_type, resource_file); return SUCCESS_RETURN_CODE; } }
int main_XMLizer(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } int output_style=TEI; char output[FILENAME_MAX]=""; char alphabet[FILENAME_MAX]=""; char normalization[FILENAME_MAX]=""; char segmentation[FILENAME_MAX]=""; VersatileEncodingConfig vec=VEC_DEFAULT; int convLFtoCRLF=1; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_XMLizer,lopts_XMLizer,&index))) { switch(val) { case 'x': output_style=XML; break; case 't': output_style=TEI; break; case 'n': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty normalization grammar name\n"); return USAGE_ERROR_CODE; } strcpy(normalization,options.vars()->optarg); break; case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output file name\n"); return USAGE_ERROR_CODE; } strcpy(output,options.vars()->optarg); break; case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet file name\n"); return USAGE_ERROR_CODE; } strcpy(alphabet,options.vars()->optarg); break; case 's': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty segmentation grammar name\n"); return USAGE_ERROR_CODE; } strcpy(segmentation,options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_XMLizer[index].name); return USAGE_ERROR_CODE; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (segmentation[0]=='\0') { error("You must specify the segmentation grammar to use\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } char input[FILENAME_MAX]; strcpy(input,argv[options.vars()->optind]); char snt[FILENAME_MAX]; remove_extension(input,snt); strcat(snt,"_tmp.snt"); char tmp[FILENAME_MAX]; remove_extension(input,tmp); strcat(tmp,".tmp"); normalize(input,snt,&vec,KEEP_CARRIAGE_RETURN,convLFtoCRLF,normalization,NULL,1); struct fst2txt_parameters* p=new_fst2txt_parameters(); p->vec=vec; p->input_text_file=strdup(snt); if (p->input_text_file ==NULL) { alloc_error("main_XMLizer"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } p->output_text_file_is_temp=1; p->output_text_file=strdup(tmp); if (p->output_text_file==NULL) { alloc_error("main_XMLizer"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } p->fst_file=strdup(segmentation); if (p->fst_file==NULL) { alloc_error("main_XMLizer"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } p->alphabet_file=strdup(alphabet); if (p->alphabet_file==NULL) { alloc_error("main_XMLizer"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } p->output_policy=MERGE_OUTPUTS; p->tokenization_policy=WORD_BY_WORD_TOKENIZATION; p->space_policy=DONT_START_WITH_SPACE; main_fst2txt(p); free_fst2txt_parameters(p); if (output[0]=='\0') { remove_extension(input,output); strcat(output,".xml"); } int return_value = xmlize(&vec,snt,output,output_style); af_remove(snt); af_remove(tmp); return return_value; }
int main_TEI2Txt(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } char output[FILENAME_MAX]=""; VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_TEI2Txt,lopts_TEI2Txt,&index))) { switch(val) { case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output file name\n"); return USAGE_ERROR_CODE; } strcpy(output,options.vars()->optarg); break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_TEI2Txt[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } if(output[0]=='\0') { remove_extension(argv[options.vars()->optind],output); strcat(output,".txt"); } int return_value = tei2txt(argv[options.vars()->optind],output,&vec); return return_value; }
/** * The same than main, but no call to setBufferMode. */ int main_BuildKrMwuDic(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } int val,index=-1; char output[FILENAME_MAX]=""; char inflection_dir[FILENAME_MAX]=""; char alphabet[FILENAME_MAX]=""; char dic_bin[FILENAME_MAX]=""; char dic_inf[FILENAME_MAX]=""; // default policy is to compile only out of date graphs GraphRecompilationPolicy graph_recompilation_policy = ONLY_OUT_OF_DATE; VersatileEncodingConfig vec=VEC_DEFAULT; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_BuildKrMwuDic,lopts_BuildKrMwuDic,&index))) { switch(val) { case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output file name\n"); return USAGE_ERROR_CODE; } strcpy(output,options.vars()->optarg); break; case 'd': if (options.vars()->optarg[0]=='\0') { error("Empty inflection directory\n"); return USAGE_ERROR_CODE; } strcpy(inflection_dir,options.vars()->optarg); break; case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet file name\n"); return USAGE_ERROR_CODE; } strcpy(alphabet,options.vars()->optarg); break; case 'b': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty binary dictionary name\n"); return USAGE_ERROR_CODE; } strcpy(dic_bin,options.vars()->optarg); remove_extension(dic_bin,dic_inf); strcat(dic_inf,".inf"); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case 'f': graph_recompilation_policy = ALWAYS_RECOMPILE; break; case 'n': graph_recompilation_policy = NEVER_RECOMPILE; break; case 't': graph_recompilation_policy = ONLY_OUT_OF_DATE; break; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_BuildKrMwuDic[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (output[0]=='\0') { error("Output file must be specified\n"); return USAGE_ERROR_CODE; } if (inflection_dir[0]=='\0') { error("Inflection directory must be specified\n"); return USAGE_ERROR_CODE; } if (alphabet[0]=='\0') { error("Alphabet file must be specified\n"); return USAGE_ERROR_CODE; } if (dic_bin[0]=='\0') { error("Binary dictionary must be specified\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } U_FILE* delas=u_fopen(&vec,argv[options.vars()->optind],U_READ); if (delas==NULL) { error("Cannot open %s\n",argv[options.vars()->optind]); return DEFAULT_ERROR_CODE; } U_FILE* grf=u_fopen(&vec,output,U_WRITE); if (grf==NULL) { error("Cannot open %s\n",output); u_fclose(delas); return DEFAULT_ERROR_CODE; } Alphabet* alph=load_alphabet(&vec,alphabet,1); if (alph==NULL) { u_fclose(grf); u_fclose(delas); error("Cannot open alphabet file %s\n",alphabet); return DEFAULT_ERROR_CODE; } Korean* korean=new Korean(alph); MultiFlex_ctx* multiFlex_ctx=new_MultiFlex_ctx(inflection_dir, NULL, NULL, &vec, korean, NULL, NULL, graph_recompilation_policy); Dictionary* d=new_Dictionary(&vec,dic_bin,dic_inf); create_mwu_dictionary(delas,grf,multiFlex_ctx,d); free_Dictionary(d); u_fclose(delas); u_fclose(grf); free_alphabet(alph); delete korean; for (int count_free_fst2=0;count_free_fst2<multiFlex_ctx->n_fst2;count_free_fst2++) { free_abstract_Fst2(multiFlex_ctx->fst2[count_free_fst2],&(multiFlex_ctx->fst2_free[count_free_fst2])); multiFlex_ctx->fst2[count_free_fst2]=NULL; } free_MultiFlex_ctx(multiFlex_ctx); u_printf("Done.\n"); return SUCCESS_RETURN_CODE; }
int main_ElagComp(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; char compilename[FILENAME_MAX]=""; char directory[FILENAME_MAX]=""; char grammar[FILENAME_MAX]=""; char rule_file[FILENAME_MAX]=""; char lang[FILENAME_MAX]=""; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_ElagComp,lopts_ElagComp,&index))) { switch(val) { case 'l': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty language definition file\n"); return USAGE_ERROR_CODE; } strcpy(lang,options.vars()->optarg); break; case 'r': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty rule file\n"); return USAGE_ERROR_CODE; } strcpy(rule_file,options.vars()->optarg); get_path(rule_file,directory); break; case 'g': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty grammar file name\n"); return USAGE_ERROR_CODE; } strcpy(grammar,options.vars()->optarg); get_path(grammar,directory); break; case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output file\n"); return USAGE_ERROR_CODE; } strcpy(compilename,options.vars()->optarg); break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_ElagComp[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (lang[0]=='\0') { error("You must define the language definition file\n"); return USAGE_ERROR_CODE; } if ((rule_file[0]=='\0' && grammar[0]=='\0') || (rule_file[0]!='\0' && grammar[0]!='\0')) { error("You must define a rule list OR a grammar\n"); return USAGE_ERROR_CODE; } if (options.vars()->optind!=argc) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (rule_file[0]=='\0' && grammar[0]=='\0') { error("You must specified a grammar or a rule file name\n"); return USAGE_ERROR_CODE; } if (rule_file[0]!='\0' && grammar[0]!='\0') { error("Cannot handle both a rule file and a grammar\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } language_t* language = load_language_definition(&vec,lang); if (rule_file[0]!='\0') { /* If we work with a rule list */ if (compilename[0]=='\0') { int l=(int)strlen(rule_file); if (strcmp(rule_file+l-4,".lst")==0) { strcpy(compilename,rule_file); strcpy(compilename+l-4,".rul"); } else { sprintf(compilename,"%s.rul",rule_file); } } if (compile_elag_rules(rule_file,compilename,&vec,language)==-1) { error("An error occurred while compiling %s\n",compilename); free_language_t(language); return DEFAULT_ERROR_CODE; } u_printf("\nElag grammars are compiled in %s.\n",compilename); } else { /* If we must compile a single grammar */ char elg_file[FILENAME_MAX]; get_extension(grammar,elg_file); if (strcmp(elg_file,".fst2")) { error("Grammar '%s' should be a .fst2 file\n"); free_language_t(language); return DEFAULT_ERROR_CODE; } remove_extension(grammar,elg_file); strcat(elg_file,".elg"); if (compile_elag_grammar(grammar,elg_file,&vec,language)==-1) { error("An error occured while compiling %s\n",grammar); free_language_t(language); return DEFAULT_ERROR_CODE; } u_printf("Elag grammar is compiled into %s.\n",elg_file); } free_language_t(language); return SUCCESS_RETURN_CODE; }
int main_Untokenize(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } char alphabet[FILENAME_MAX]=""; char token_file[FILENAME_MAX]=""; char dynamicSntDir[FILENAME_MAX]=""; VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; int range_start,range_stop,use_range; int token_step_number=0; range_start=range_stop=use_range=0; char foo=0; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_Untokenize,lopts_Untokenize,&index))) { switch(val) { case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet file name\n"); return USAGE_ERROR_CODE; } strcpy(alphabet,options.vars()->optarg); break; case 'd': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty snt dir name\n"); return USAGE_ERROR_CODE; } strcpy(dynamicSntDir,options.vars()->optarg); break; case 't': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty token file name\n"); return USAGE_ERROR_CODE; } strcpy(token_file,options.vars()->optarg); break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'n': if (1!=sscanf(options.vars()->optarg,"%d%c",&token_step_number,&foo) || token_step_number<=0) { /* foo is used to check that the search limit is not like "45gjh" */ error("Invalid token numbering argument: %s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; case 'r': { int param1 = 0; int param2 = 0; int ret_scan = sscanf(options.vars()->optarg,"%d,%d%c",¶m1,¶m2,&foo); if (ret_scan == 2) { range_start = param1; range_stop = param2; use_range=1; if (((range_start < -1)) || (range_stop < -1)) { /* foo is used to check that the search limit is not like "45gjh" */ error("Invalid stop count argument: %s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } } else if (1!=sscanf(options.vars()->optarg,"%d%c",&range_start,&foo) || (range_start < -1)) { /* foo is used to check that the search limit is not like "45gjh" */ error("Invalid stop count argument: %s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } use_range=1; } break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_Untokenize[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } char tokens_txt[FILENAME_MAX]; char text_cod[FILENAME_MAX]; char enter_pos[FILENAME_MAX]; if (dynamicSntDir[0]=='\0') { get_snt_path(argv[options.vars()->optind],dynamicSntDir); } strcpy(text_cod,dynamicSntDir); strcat(text_cod,"text.cod"); strcpy(enter_pos,dynamicSntDir); strcat(enter_pos,"enter.pos"); strcpy(tokens_txt,dynamicSntDir); strcat(tokens_txt,"tokens.txt"); Alphabet* alph=NULL; if (alphabet[0]!='\0') { alph=load_alphabet(&vec,alphabet); if (alph==NULL) { error("Cannot load alphabet file %s\n",alphabet); return DEFAULT_ERROR_CODE; } } ABSTRACTMAPFILE* af_text_cod=af_open_mapfile(text_cod,MAPFILE_OPTION_READ,0); if (af_text_cod==NULL) { error("Cannot open file %s\n",text_cod); free_alphabet(alph); return DEFAULT_ERROR_CODE; } ABSTRACTMAPFILE* af_enter_pos=af_open_mapfile(enter_pos,MAPFILE_OPTION_READ,0); if (af_enter_pos==NULL) { error("Cannot open file %s\n",enter_pos); af_close_mapfile(af_text_cod); free_alphabet(alph); return DEFAULT_ERROR_CODE; } U_FILE* text = u_fopen(&vec,argv[options.vars()->optind],U_WRITE); if (text==NULL) { error("Cannot create text file %s\n",argv[options.vars()->optind]); af_close_mapfile(af_enter_pos); af_close_mapfile(af_text_cod); free_alphabet(alph); return DEFAULT_ERROR_CODE; } struct text_tokens* tok=load_text_tokens(&vec,tokens_txt); u_printf("Untokenizing text...\n"); size_t nb_item = af_get_mapfile_size(af_text_cod)/sizeof(int); const int* buf=(const int*)af_get_mapfile_pointer(af_text_cod); size_t nb_item_enter_pos=0; const int* buf_enter=NULL; if (af_enter_pos!=NULL) { buf_enter=(const int*)af_get_mapfile_pointer(af_enter_pos); if (buf_enter!=NULL) { nb_item_enter_pos=af_get_mapfile_size(af_enter_pos)/sizeof(int); } } size_t count_pos=0; for (size_t i=0;i<nb_item;i++) { int is_in_range=1; if ((use_range!=0) && (i<(size_t)range_start)) { is_in_range=0; } if ((use_range!=0) && (range_stop!=0) && (i>(size_t)range_stop)) { is_in_range=0; } int is_newline=0; if (count_pos<nb_item_enter_pos) { if (i==(size_t)(*(buf_enter+count_pos))) { is_newline = 1; count_pos++; } } if (is_in_range!=0) { if (token_step_number != 0) if ((i%token_step_number)==0) u_fprintf(text,"\n\nToken %d : ", (int)i); if (is_newline!=0) { u_fprintf(text,"\n", tok->token[*(buf+i)]); } else { u_fputs(tok->token[*(buf+i)], text); } } } af_release_mapfile_pointer(af_text_cod,buf); af_release_mapfile_pointer(af_enter_pos,buf_enter); af_close_mapfile(af_enter_pos); af_close_mapfile(af_text_cod); free_text_tokens(tok); u_fclose(text); free_alphabet(alph); u_printf("\nDone.\n"); return SUCCESS_RETURN_CODE; }
/** * This is the customized diff program designed to compare grf files. */ int main_SelectOutput(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_SelectOutput,lopts_SelectOutput,&index))) { switch(val) { case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case 'e': case 'o': { enum stdwrite_kind swk = (val == 'o') ? stdwrite_kind_out : stdwrite_kind_err; if (strcmp(options.vars()->optarg,"on") == 0) { SetStdWriteCB(swk,0,NULL,NULL); } else if (strcmp(options.vars()->optarg,"off") == 0) { SetStdWriteCB(swk,1,NULL,NULL); } else { error("Invalid option --%s, must be 'on' or 'off'\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_SelectOutput[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } // keep this for further modifications if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } return SUCCESS_RETURN_CODE; }
int main_SortTxt(int argc, char* const argv[]) { if (argc == 1) { usage(); return SUCCESS_RETURN_CODE; } struct sort_infos* inf = new_sort_infos(); if(!inf) { return ALLOC_ERROR_CODE; } int mode = DEFAULT; char line_info[FILENAME_MAX] = ""; char sort_order[FILENAME_MAX] = ""; VersatileEncodingConfig vec = { DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT, DEFAULT_ENCODING_OUTPUT, DEFAULT_BOM_OUTPUT }; int val, index = -1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF != (val = options.parse_long(argc, argv, optstring_SortTxt, lopts_SortTxt, &index))) { switch (val) { case 'n': inf->REMOVE_DUPLICATES = 1; break; case 'd': inf->REMOVE_DUPLICATES = 0; break; case 'r': inf->REVERSE = -1; break; case 'o': if (options.vars()->optarg[0] == '\0') { error("You must specify a non empty sort order file name\n"); free_sort_infos(inf); return USAGE_ERROR_CODE; } strcpy(sort_order, options.vars()->optarg); break; case 'l': if (options.vars()->optarg[0] == '\0') { error("You must specify a non empty information file name\n"); free_sort_infos(inf); return USAGE_ERROR_CODE; } strcpy(line_info, options.vars()->optarg); break; case 't': mode = THAI; break; case 'f': inf->factorize_inflectional_codes = 1; break; case 'V': only_verify_arguments = true; break; case 'h': usage(); free_sort_infos(inf); return SUCCESS_RETURN_CODE; case 'k': if (options.vars()->optarg[0] == '\0') { error("Empty input_encoding argument\n"); free_sort_infos(inf); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter( &(vec.mask_encoding_compatibility_input), options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0] == '\0') { error("Empty output_encoding argument\n"); free_sort_infos(inf); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output), &(vec.bom_output), options.vars()->optarg); break; case ':': index == -1 ? error("Missing argument for option -%c\n", options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_SortTxt[index].name); free_sort_infos(inf); return USAGE_ERROR_CODE; case '?': index == -1 ? error("Invalid option -%c\n", options.vars()->optopt) : error("Invalid option --%s\n", options.vars()->optarg); free_sort_infos(inf); return USAGE_ERROR_CODE; } index = -1; } if (options.vars()->optind != argc - 1) { error("Invalid arguments: rerun with --help\n"); free_sort_infos(inf); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory free_sort_infos(inf); return SUCCESS_RETURN_CODE; } if (sort_order[0] != '\0') { read_char_order(&vec, sort_order, inf); } char new_name[FILENAME_MAX]; strcpy(new_name, argv[options.vars()->optind]); strcat(new_name, ".new"); inf->f = u_fopen(&vec, argv[options.vars()->optind], U_READ); if (inf->f == NULL) { error("Cannot open file %s\n", argv[options.vars()->optind]); free_sort_infos(inf); return DEFAULT_ERROR_CODE; } inf->f_out = u_fopen(&vec, new_name, U_WRITE); if (inf->f_out == NULL) { error("Cannot open temporary file %s\n", new_name); u_fclose(inf->f); free_sort_infos(inf); return DEFAULT_ERROR_CODE; } switch (mode) { case DEFAULT: sort(inf); break; case THAI: sort_thai(inf); break; } if (line_info[0] != '\0') { U_FILE* F = u_fopen(&vec, line_info, U_WRITE); if (F == NULL) { error("Cannot write %s\n", line_info); } else { u_fprintf(F, "%d\n", inf->resulting_line_number); u_fclose(F); } } u_fclose(inf->f_out); u_fclose(inf->f); af_remove(argv[options.vars()->optind]); af_rename(new_name, argv[options.vars()->optind]); free_sort_infos(inf); u_printf("Done.\n"); return SUCCESS_RETURN_CODE; }
/** * This is the customized diff program designed to compare grf files. */ int main_GrfDiff(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; char output[FILENAME_MAX]=""; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_GrfDiff,lopts_GrfDiff,&index))) { switch(val) { case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case 1: { strcpy(output,options.vars()->optarg); break; } case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_GrfDiff[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-2) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } U_FILE* f=U_STDOUT; if (output[0]!='\0') { /* Since the output is supposed to be a diff-like one, there is no point * in outputing in a variable encoding, so we force UTF8 */ f=u_fopen(UTF8,output,U_WRITE); if (f==NULL) { error("Cannot create file %s\n",output); return DEFAULT_ERROR_CODE; } } Grf* a=load_Grf(&vec,argv[options.vars()->optind]); if (a==NULL) { if (f!=U_STDOUT) { u_fclose(f); } return DEFAULT_ERROR_CODE; } Grf* b=load_Grf(&vec,argv[options.vars()->optind+1]); if (b==NULL) { free_Grf(a); if (f!=U_STDOUT) { u_fclose(f); } return DEFAULT_ERROR_CODE; } GrfDiff* diff=grf_diff(a,b); free_Grf(a); free_Grf(b); print_diff(f,diff); if (f!=U_STDOUT) { u_fclose(f); } int different=diff->diff_ops->nbelems!=0; free_GrfDiff(diff); return different; }
int main_Flatten(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } int RTN=1; int depth=10; VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; char foo; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_Flatten,lopts_Flatten,&index))) { switch(val) { case 'f': RTN=0; break; case 'r': RTN=1; break; case 'd': if (1!=sscanf(options.vars()->optarg,"%d%c",&depth,&foo) || depth<=0) { /* foo is used to check that the depth is not like "45gjh" */ error("Invalid depth argument: %s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_Flatten[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; break; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } u_printf("Loading %s...\n",argv[options.vars()->optind]); struct FST2_free_info fst2_free; Fst2* origin=load_abstract_fst2(&vec,argv[options.vars()->optind],1,&fst2_free); if (origin==NULL) { error("Cannot load %s\n",argv[options.vars()->optind]); return DEFAULT_ERROR_CODE; } char temp[FILENAME_MAX]; strcpy(temp,argv[options.vars()->optind]); strcat(temp,".tmp.fst2"); switch (flatten_fst2(origin,depth,temp,&vec,RTN)) { case EQUIVALENT_FST: u_printf("The resulting grammar is an equivalent finite-state transducer.\n"); break; case APPROXIMATIVE_FST: u_printf("The resulting grammar is a finite-state approximation.\n"); break; case EQUIVALENT_RTN: u_printf("The resulting grammar is an equivalent FST2 (RTN).\n"); break; default: error("Internal state error in Flatten's main\n"); free_abstract_Fst2(origin,&fst2_free); return DEFAULT_ERROR_CODE; } free_abstract_Fst2(origin,&fst2_free); af_remove(argv[options.vars()->optind]); af_rename(temp,argv[options.vars()->optind]); return SUCCESS_RETURN_CODE; }
/** * The same than main, but no call to setBufferMode. */ int main_Concord(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } int val,index=-1; struct conc_opt* concord_options = new_conc_opt(); char foo; VersatileEncodingConfig vec=VEC_DEFAULT; int ret; char offset_file[FILENAME_MAX]=""; char PRLG[FILENAME_MAX]=""; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_Concord,lopts_Concord,&index))) { switch(val) { case 'f': if (options.vars()->optarg[0]=='\0') { error("Empty font name argument\n"); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } concord_options->fontname=strdup(options.vars()->optarg); if (concord_options->fontname==NULL) { alloc_error("main_Concord"); free_conc_opt(concord_options); return ALLOC_ERROR_CODE; } break; case 's': if (1!=sscanf(options.vars()->optarg,"%d%c",&(concord_options->fontsize),&foo)) { /* foo is used to check that the font size is not like "45gjh" */ error("Invalid font size argument: %s\n",options.vars()->optarg); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } break; case 'l': ret=sscanf(options.vars()->optarg,"%d%c%c",&(concord_options->left_context),&foo,&foo); if (ret==0 || ret==3 || (ret==2 && foo!='s') || concord_options->left_context<0) { error("Invalid left context argument: %s\n",options.vars()->optarg); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } if (ret==2) { concord_options->left_context_until_eos=1; } break; case 'r': ret=sscanf(options.vars()->optarg,"%d%c%c",&(concord_options->right_context),&foo,&foo); if (ret==0 || ret==3 || (ret==2 && foo!='s') || concord_options->right_context<0) { error("Invalid right context argument: %s\n",options.vars()->optarg); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } if (ret==2) { concord_options->right_context_until_eos=1; } break; case 'L': concord_options->convLFtoCRLF=0; break; case 0: concord_options->sort_mode=TEXT_ORDER; break; case 1: concord_options->sort_mode=LEFT_CENTER; break; case 2: concord_options->sort_mode=LEFT_RIGHT; break; case 3: concord_options->sort_mode=CENTER_LEFT; break; case 4: concord_options->sort_mode=CENTER_RIGHT; break; case 5: concord_options->sort_mode=RIGHT_LEFT; break; case 6: concord_options->sort_mode=RIGHT_CENTER; break; case 7: concord_options->result_mode=DIFF_; break; case 8: concord_options->only_ambiguous=1; break; case 9: { strcpy(PRLG,options.vars()->optarg); char* pos=strchr(PRLG,','); if (pos==NULL || pos==PRLG || *(pos+1)=='\0') { error("Invalid argument for option --PRLG: %s\n",options.vars()->optarg); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } *pos='\0'; strcpy(offset_file,pos+1); break; } case 10: concord_options->only_matches=1; break; case 11: concord_options->result_mode=LEMMATIZE_; break; case 12: concord_options->result_mode=CSV_; break; case 'H': concord_options->result_mode=HTML_; break; case 't': { concord_options->result_mode=TEXT_; if (options.vars()->optarg!=NULL) { strcpy(concord_options->output,options.vars()->optarg); } break; } case 'g': concord_options->result_mode=GLOSSANET_; if (options.vars()->optarg[0]=='\0') { error("Empty glossanet script argument\n"); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } concord_options->script=strdup(options.vars()->optarg); if (concord_options->script==NULL) { alloc_error("main_Concord"); free_conc_opt(concord_options); return ALLOC_ERROR_CODE; } break; case 'p': concord_options->result_mode=SCRIPT_; if (options.vars()->optarg[0]=='\0') { error("Empty script argument\n"); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } concord_options->script=strdup(options.vars()->optarg); if (concord_options->script==NULL) { alloc_error("main_Concord"); free_conc_opt(concord_options); return ALLOC_ERROR_CODE; } break; case 'i': concord_options->result_mode=INDEX_; break; case 'u': concord_options->result_mode=UIMA_; if (options.vars()->optarg!=NULL) { strcpy(offset_file,options.vars()->optarg); } concord_options->original_file_offsets=1; break; case 'e': concord_options->result_mode=XML_; if (options.vars()->optarg!=NULL) { strcpy(offset_file, options.vars()->optarg); concord_options->original_file_offsets=1; } break; case 'w': concord_options->result_mode=XML_WITH_HEADER_; if (options.vars()->optarg!=NULL) { strcpy(offset_file, options.vars()->optarg); concord_options->original_file_offsets = 1; } break; case '$': if (options.vars()->optarg[0]=='\0') { error("Empty input_offsets argument\n"); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } strcpy(concord_options->input_offsets,options.vars()->optarg); break; case '@': if (options.vars()->optarg[0]=='\0') { error("Empty output_offsets argument\n"); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } strcpy(concord_options->output_offsets,options.vars()->optarg); break; case 'A': concord_options->result_mode=AXIS_; break; case 'x': concord_options->result_mode=XALIGN_; break; case 'm': concord_options->result_mode=MERGE_; if (options.vars()->optarg[0]=='\0') { error("Empty output file name argument\n"); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } strcpy(concord_options->output,options.vars()->optarg); break; case 'a': if (options.vars()->optarg[0]=='\0') { error("Empty alphabet argument\n"); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } concord_options->sort_alphabet=strdup(options.vars()->optarg); if (concord_options->sort_alphabet==NULL) { alloc_error("main_Concord"); free_conc_opt(concord_options); return ALLOC_ERROR_CODE; } break; case 'T': concord_options->thai_mode=1; break; case 'd': if (options.vars()->optarg[0]=='\0') { error("Empty snt directory argument\n"); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } strcpy(concord_options->working_directory,options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); free_conc_opt(concord_options); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_Concord[index].name); free_conc_opt(concord_options); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); free_conc_opt(concord_options); return USAGE_ERROR_CODE; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } if (concord_options->fontname==NULL || concord_options->fontsize<=0) { if (concord_options->result_mode==HTML_ || concord_options->result_mode==GLOSSANET_) { error("The specified output mode is an HTML file: you must specify font parameters\n"); free_conc_opt(concord_options); return USAGE_ERROR_CODE; } } if (only_verify_arguments) { // freeing all allocated memory free_conc_opt(concord_options); return SUCCESS_RETURN_CODE; } U_FILE* concor=u_fopen(&vec,argv[options.vars()->optind],U_READ); if (concor==NULL) { error("Cannot open concordance index file %s\n",argv[options.vars()->optind]); free_conc_opt(concord_options); return DEFAULT_ERROR_CODE; } if (concord_options->working_directory[0]=='\0') { get_path(argv[options.vars()->optind],concord_options->working_directory); } if (concord_options->only_matches) { concord_options->left_context=0; concord_options->right_context=0; } /* We compute the name of the files associated to the text */ struct snt_files* snt_files=new_snt_files_from_path(concord_options->working_directory); ABSTRACTMAPFILE* text=af_open_mapfile(snt_files->text_cod,MAPFILE_OPTION_READ,0); if (text==NULL) { error("Cannot open file %s\n",snt_files->text_cod); free_snt_files(snt_files); u_fclose(concor); free_conc_opt(concord_options); return DEFAULT_ERROR_CODE; } struct text_tokens* tok=load_text_tokens(&vec,snt_files->tokens_txt); if (tok==NULL) { error("Cannot load text token file %s\n",snt_files->tokens_txt); af_close_mapfile(text); free_snt_files(snt_files); u_fclose(concor); free_conc_opt(concord_options); return DEFAULT_ERROR_CODE; } U_FILE* f_enter=u_fopen(BINARY,snt_files->enter_pos,U_READ); int n_enter_char=0; int* enter_pos=NULL; /* New lines are encoded in 'enter.pos' files. Those files will disappear in the future */ if (f_enter==NULL) { error("Cannot open file %s\n",snt_files->enter_pos); } else { long size=get_file_size(f_enter); enter_pos=(int*)malloc(size); if (enter_pos==NULL) { alloc_error("main_Concord"); u_fclose(f_enter); free_text_tokens(tok); af_close_mapfile(text); free_snt_files(snt_files); u_fclose(concor); free_conc_opt(concord_options); return ALLOC_ERROR_CODE; } n_enter_char=(int)fread(enter_pos,sizeof(int),size/sizeof(int),f_enter); if (n_enter_char!=(int)(size/sizeof(int))) { error("Read error on enter.pos file in main_Concord\n"); u_fclose(f_enter); free(enter_pos); free_text_tokens(tok); af_close_mapfile(text); free_snt_files(snt_files); u_fclose(concor); free_conc_opt(concord_options); return DEFAULT_ERROR_CODE; } u_fclose(f_enter); } if (concord_options->result_mode==INDEX_ || concord_options->result_mode==UIMA_ || concord_options->result_mode==XML_ || concord_options->result_mode==XML_WITH_HEADER_ || concord_options->result_mode==AXIS_) { /* We force some options for index, uima and axis files */ concord_options->left_context=0; concord_options->right_context=0; concord_options->sort_mode=TEXT_ORDER; } if (concord_options->only_ambiguous && concord_options->result_mode!=LEMMATIZE_) { /* We force text order when displaying only ambiguous outputs */ concord_options->sort_mode=TEXT_ORDER; } if (concord_options->result_mode==HTML_ || concord_options->result_mode==DIFF_ || concord_options->result_mode==LEMMATIZE_) { /* We need the offset file if and only if we have to produce * an html concordance with positions in .snt file */ concord_options->snt_offsets=load_snt_offsets(snt_files->snt_offsets_pos); if (concord_options->snt_offsets==NULL) { error("Cannot read snt offset file %s\n",snt_files->snt_offsets_pos); free(enter_pos); free_text_tokens(tok); af_close_mapfile(text); free_snt_files(snt_files); u_fclose(concor); free_conc_opt(concord_options); return DEFAULT_ERROR_CODE; } } if (offset_file[0]!='\0') { concord_options->uima_offsets=load_uima_offsets(&vec,offset_file); if (concord_options->uima_offsets==NULL) { error("Cannot read offset file %s\n",offset_file); free(enter_pos); free_text_tokens(tok); af_close_mapfile(text); free_snt_files(snt_files); u_fclose(concor); free_conc_opt(concord_options); return DEFAULT_ERROR_CODE; } } if (PRLG[0]!='\0') { concord_options->PRLG_data=load_PRLG_data(&vec,PRLG); if (concord_options->PRLG_data==NULL) { error("Cannot read PRLG file %s\n",PRLG); free(enter_pos); free_text_tokens(tok); af_close_mapfile(text); free_snt_files(snt_files); u_fclose(concor); free_conc_opt(concord_options); return DEFAULT_ERROR_CODE; } } if (concord_options->result_mode==CSV_) { concord_options->sort_mode=TEXT_ORDER; concord_options->only_matches=1; } /* Once we have set all parameters, we call the function that * will actually create the concordance. */ create_concordance(&vec,concor,text,tok,n_enter_char,enter_pos,concord_options); free(enter_pos); free_text_tokens(tok); af_close_mapfile(text); free_snt_files(snt_files); u_fclose(concor); free_conc_opt(concord_options); u_printf("Done.\n"); return SUCCESS_RETURN_CODE; }
int main_Uncompress(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; char output[FILENAME_MAX]=""; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_Uncompress,lopts_Uncompress,&index))) { switch(val) { case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output file name\n"); return USAGE_ERROR_CODE; } strcpy(output,options.vars()->optarg); break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_Uncompress[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } if (output[0]=='\0') { remove_extension(argv[options.vars()->optind],output); strcat(output,".dic"); } U_FILE* f=u_fopen(&vec,output,U_WRITE); if (f==NULL) { error("Cannot open file %s\n",output); return DEFAULT_ERROR_CODE; } char inf_file[FILENAME_MAX]; remove_extension(argv[options.vars()->optind],inf_file); strcat(inf_file,".inf"); u_printf("Uncompressing %s...\n",argv[options.vars()->optind]); Dictionary* d=new_Dictionary(&vec,argv[options.vars()->optind],inf_file); if (d!=NULL) { rebuild_dictionary(d,f); } u_fclose(f); free_Dictionary(d); u_printf("Done.\n"); return SUCCESS_RETURN_CODE; }
int main_RebuildTfst(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } VersatileEncodingConfig vec=VEC_DEFAULT; int val, index=-1; bool only_verify_arguments = false; UnitexGetOpt options; int save_statistics=1; while (EOF!=(val=options.parse_long(argc,argv,optstring_RebuildTfst,lopts_RebuildTfst,&index))) { switch (val) { case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'S': save_statistics = 0; break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n", options.vars()->optopt) : error("Missing argument for option --%s\n", lopts_RebuildTfst[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n", options.vars()->optopt) : error("Invalid option --%s\n", options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } char input_tfst[FILENAME_MAX]; char input_tind[FILENAME_MAX]; strcpy(input_tfst,argv[options.vars()->optind]); remove_extension(input_tfst,input_tind); strcat(input_tind,".tind"); u_printf("Loading %s...\n",input_tfst); Tfst* tfst = open_text_automaton(&vec,input_tfst); if (tfst==NULL) { error("Unable to load %s automaton\n",input_tfst); return DEFAULT_ERROR_CODE; } char basedir[FILENAME_MAX]; get_path(input_tfst,basedir); char output_tfst[FILENAME_MAX]; sprintf(output_tfst, "%s.new.tfst",input_tfst); char output_tind[FILENAME_MAX]; sprintf(output_tind, "%s.new.tind",input_tfst); U_FILE* f_tfst; if ((f_tfst = u_fopen(&vec,output_tfst,U_WRITE)) == NULL) { error("Unable to open %s for writing\n", output_tfst); close_text_automaton(tfst); return DEFAULT_ERROR_CODE; } U_FILE* f_tind; if ((f_tind = u_fopen(BINARY,output_tind,U_WRITE)) == NULL) { u_fclose(f_tfst); close_text_automaton(tfst); error("Unable to open %s for writing\n", output_tind); return DEFAULT_ERROR_CODE; } /* We use this hash table to rebuild files tfst_tags_by_freq/alph.txt */ struct hash_table* form_frequencies=new_hash_table((HASH_FUNCTION)hash_unichar,(EQUAL_FUNCTION)u_equal, (FREE_FUNCTION)free,NULL,(KEYCOPY_FUNCTION)keycopy); u_fprintf(f_tfst,"%010d\n",tfst->N); for (int i = 1; i <= tfst->N; i++) { if ((i % 100) == 0) { u_printf("%d/%d sentences rebuilt...\n", i, tfst->N); } load_sentence(tfst,i); char grfname[FILENAME_MAX]; sprintf(grfname, "%ssentence%d.grf", basedir, i); unichar** tags=NULL; int n_tags=-1; if (fexists(grfname)) { /* If there is a .grf for the current sentence, then we must * take it into account */ if (0==pseudo_main_Grf2Fst2(&vec,grfname,0,NULL,1,1,NULL,NULL,0)) { /* We proceed only if the graph compilation was a success */ char fst2name[FILENAME_MAX]; sprintf(fst2name, "%ssentence%d.fst2", basedir, i); struct FST2_free_info fst2_free; Fst2* fst2=load_abstract_fst2(&vec,fst2name,0,&fst2_free); af_remove(fst2name); free_SingleGraph(tfst->automaton,NULL); tfst->automaton=create_copy_of_fst2_subgraph(fst2,1); tags=create_tfst_tags(fst2,&n_tags); free_abstract_Fst2(fst2,&fst2_free); } else { error("Error: %s is not a valid sentence automaton\n",grfname); } } save_current_sentence(tfst,f_tfst,f_tind,tags,n_tags,form_frequencies); if (tags!=NULL) { /* If necessary, we free the tags we created */ for (int count_tags=0;count_tags<n_tags;count_tags++) { free(tags[count_tags]); } free(tags); } } u_printf("Text automaton rebuilt.\n"); u_fclose(f_tind); u_fclose(f_tfst); close_text_automaton(tfst); /* Finally, we save statistics */ if (save_statistics) { char tfst_tags_by_freq[FILENAME_MAX]; char tfst_tags_by_alph[FILENAME_MAX]; strcpy(tfst_tags_by_freq, basedir); strcat(tfst_tags_by_freq, "tfst_tags_by_freq.txt"); strcpy(tfst_tags_by_alph, basedir); strcat(tfst_tags_by_alph, "tfst_tags_by_alph.txt"); U_FILE* f_tfst_tags_by_freq = u_fopen(&vec, tfst_tags_by_freq, U_WRITE); if (f_tfst_tags_by_freq == NULL) { error("Cannot open %s\n", tfst_tags_by_freq); } U_FILE* f_tfst_tags_by_alph = u_fopen(&vec, tfst_tags_by_alph, U_WRITE); if (f_tfst_tags_by_alph == NULL) { error("Cannot open %s\n", tfst_tags_by_alph); } sort_and_save_tfst_stats(form_frequencies, f_tfst_tags_by_freq, f_tfst_tags_by_alph); u_fclose(f_tfst_tags_by_freq); u_fclose(f_tfst_tags_by_alph); } free_hash_table(form_frequencies); /* make a backup and replace old automaton with new */ char backup_tfst[FILENAME_MAX]; char backup_tind[FILENAME_MAX]; sprintf(backup_tfst,"%s.bck",input_tfst); sprintf(backup_tind,"%s.bck",input_tind); /* We remove the existing backup files, if any */ af_remove(backup_tfst); af_remove(backup_tind); af_rename(input_tfst,backup_tfst); af_rename(input_tind,backup_tind); af_rename(output_tfst,input_tfst); af_rename(output_tind,input_tind); u_printf("\nYou can find a backup of the original files in:\n %s\nand %s\n", backup_tfst,backup_tind); return SUCCESS_RETURN_CODE; }
/* * This function behaves in the same way that a main one, except that it does * not invoke the setBufferMode function. */ int main_LocateTfst(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; char text[FILENAME_MAX]=""; char alphabet[FILENAME_MAX]=""; int is_korean=0; int tilde_negation_operator=1; int selected_negation_operator=0; int tagging=0; int single_tags_only=0; int match_word_boundaries=1; MatchPolicy match_policy=LONGEST_MATCHES; OutputPolicy output_policy=IGNORE_OUTPUTS; AmbiguousOutputPolicy ambiguous_output_policy=ALLOW_AMBIGUOUS_OUTPUTS; VariableErrorPolicy variable_error_policy=IGNORE_VARIABLE_ERRORS; int search_limit=NO_MATCH_LIMIT; char foo; vector_ptr* injected=new_vector_ptr(); bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_LocateTfst,lopts_LocateTfst,&index))) { switch(val) { case 't': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty .tfst name\n"); free_vector_ptr(injected); return USAGE_ERROR_CODE; } strcpy(text,options.vars()->optarg); break; case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet name\n"); free_vector_ptr(injected); return USAGE_ERROR_CODE; } strcpy(alphabet,options.vars()->optarg); break; case 'K': is_korean=1; match_word_boundaries=0; break; case 'l': search_limit=NO_MATCH_LIMIT; break; case 'g': if (options.vars()->optarg[0]=='\0') { error("You must specify an argument for negation operator\n"); free_vector_ptr(injected); return USAGE_ERROR_CODE; } selected_negation_operator=1; if ((strcmp(options.vars()->optarg,"minus")==0) || (strcmp(options.vars()->optarg,"-")==0)) { tilde_negation_operator=0; } else if ((strcmp(options.vars()->optarg,"tilde")!=0) && (strcmp(options.vars()->optarg,"~")!=0)) { error("You must specify a valid argument for negation operator\n"); free_vector_ptr(injected); return USAGE_ERROR_CODE; } break; case 'n': if (1!=sscanf(options.vars()->optarg,"%d%c",&search_limit,&foo) || search_limit<=0) { /* foo is used to check that the search limit is not like "45gjh" */ error("Invalid search limit argument: %s\n",options.vars()->optarg); free_vector_ptr(injected); return USAGE_ERROR_CODE; } break; case 'S': match_policy=SHORTEST_MATCHES; break; case 'L': match_policy=LONGEST_MATCHES; break; case 'A': match_policy=ALL_MATCHES; break; case 'I': output_policy=IGNORE_OUTPUTS; break; case 'M': output_policy=MERGE_OUTPUTS; break; case 'R': output_policy=REPLACE_OUTPUTS; break; case 'X': variable_error_policy=EXIT_ON_VARIABLE_ERRORS; break; case 'Y': variable_error_policy=IGNORE_VARIABLE_ERRORS; break; case 'Z': variable_error_policy=BACKTRACK_ON_VARIABLE_ERRORS; break; case 'b': ambiguous_output_policy=ALLOW_AMBIGUOUS_OUTPUTS; break; case 'z': ambiguous_output_policy=IGNORE_AMBIGUOUS_OUTPUTS; break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case 1: tagging=1; break; case 2: single_tags_only=1; break; case 3: match_word_boundaries=0; break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); free_vector_ptr(injected); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); free_vector_ptr(injected); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'v': { unichar* key=u_strdup(options.vars()->optarg); unichar* value=u_strchr(key,'='); if (value==NULL) { error("Invalid variable injection: %s\n",options.vars()->optarg); free_vector_ptr(injected); return USAGE_ERROR_CODE; } (*value)='\0'; value++; value=u_strdup(value); vector_ptr_add(injected,key); vector_ptr_add(injected,value); break; } case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_LocateTfst[index].name); free_vector_ptr(injected); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); free_vector_ptr(injected); return USAGE_ERROR_CODE; break; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); free_vector_ptr(injected); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory free_vector_ptr(injected); return SUCCESS_RETURN_CODE; } if (selected_negation_operator==0) { get_graph_compatibility_mode_by_file(&vec,&tilde_negation_operator); } char grammar[FILENAME_MAX]; char output[FILENAME_MAX]; strcpy(grammar,argv[options.vars()->optind]); get_path(text,output); strcat(output,"concord.ind"); int OK=locate_tfst(text, grammar, alphabet, output, &vec, match_policy, output_policy, ambiguous_output_policy, variable_error_policy, search_limit, is_korean, tilde_negation_operator, injected, tagging, single_tags_only, match_word_boundaries); free_vector_ptr(injected); return (!OK); }
int main_DuplicateFile(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } const char *input_file = NULL; const char *output_file = NULL; int do_delete=0; int do_recursive_delete=0; int do_move=0; int do_make_dir=0; int do_make_dir_parent=0; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_DuplicateFile,lopts_DuplicateFile,&index))) { switch(val) { case 'a': do_make_dir = 1; break; case 'p': do_make_dir_parent = 1; break; case 'd': do_delete = 1; break; case 'r': do_delete = do_recursive_delete = 1; break; case 'i': if (options.vars()->optarg[0]=='\0') { error("Empty input argument\n"); return USAGE_ERROR_CODE; } input_file = options.vars()->optarg; break; case 'm': if (options.vars()->optarg[0]=='\0') { error("Empty move argument\n"); return USAGE_ERROR_CODE; } input_file = options.vars()->optarg; do_move=1; break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt): error("Missing argument for option --%s\n",lopts_DuplicateFile[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; case 'k': case 'q': /* ignore -k and -q parameter instead to raise an error */ break; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } output_file = argv[options.vars()->optind]; if ((input_file==NULL) && (do_delete==0) && (do_make_dir==0) && (do_make_dir_parent ==0)) { error("You must specify the input_file file\n"); return USAGE_ERROR_CODE; } if ((input_file!=NULL) && (do_delete==1)) { error("You cannot specify input_file when delete\n"); return USAGE_ERROR_CODE; } if (output_file==NULL) { error("You must specify the output_file file\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } int result = 0; if (input_file != NULL) { if (do_move == 0) { u_printf("copy file %s to %s\n",input_file,output_file); /* af_copy return 0 if success, -1 with reading problem, 1 writing problem */ result=af_copy(input_file,output_file); } else { u_printf("move file %s to %s\n",input_file,output_file); result=af_rename(input_file,output_file); } } else if (do_make_dir != 0) { u_printf("make dir %s\n", output_file); result = mkDirPortable(output_file); } else if (do_make_dir_parent != 0) { u_printf("make dir %s with parent\n", output_file); result = mkDirRecursiveIfNeeded(output_file); } else { if (do_recursive_delete == 0) { u_printf("remove file %s\n",output_file); result=af_remove(output_file); } else { u_printf("remove folder %s\n", output_file); af_remove_folder(output_file); result=0; } } u_printf((result==0) ? "Done.\n" : "Unsucessfull.\n"); return result; }