/** * This function takes two concordance index 'in1' and 'in2', and builds * the associated concordances 'out1' and 'out2'. */ void create_text_concordances(const VersatileEncodingConfig* vec,const char* in1,const char* in2,const char* out1,const char* out2) { pseudo_main_Concord(vec,in1,NULL,0,20,80,NULL,"--diff",NULL,NULL,0,0,0); char f[FILENAME_MAX]; get_path(in1,f); strcat(f,"concord.txt"); af_remove(out1); af_rename(f,out1); pseudo_main_Concord(vec,in2,NULL,0,20,80,NULL,"--diff",NULL,NULL,0,0,0); af_remove(out2); af_rename(f,out2); }
/** * This function takes a unicode string representing a regular expression and * compiles it into a .grf file. It returns 1 in case of success; 0 otherwise. */ int reg2grf(const unichar* regexp,const char* name_grf, const VersatileEncodingConfig* vec) { if (regexp[0]=='\0') { error("You must specify a non empty regular expression\n"); return 0; } U_FILE* out=u_fopen(vec,name_grf,U_WRITE); if (out==NULL) { error("Cannot open the output file for the regular expression\n"); return 0; } struct reg2grf_info* INFO=new_reg2grf_info(); /* We create the initial and final states that must have numbers 0 and 1 */ add_state(INFO,u_strdup("<E>")); add_state(INFO,u_strdup("")); /* We print the grf header */ u_fprintf(out,"#Unigraph\n"); u_fprintf(out,"SIZE 1313 950\n"); u_fprintf(out,"FONT Times New Roman: 12\n"); u_fprintf(out,"OFONT Times New Roman:B 12\n"); u_fprintf(out,"BCOLOR 16777215\n"); u_fprintf(out,"FCOLOR 0\n"); u_fprintf(out,"ACOLOR 12632256\n"); u_fprintf(out,"SCOLOR 16711680\n"); u_fprintf(out,"CCOLOR 255\n"); u_fprintf(out,"DBOXES y\n"); u_fprintf(out,"DFRAME y\n"); u_fprintf(out,"DDATE y\n"); u_fprintf(out,"DFILE y\n"); u_fprintf(out,"DDIR y\n"); u_fprintf(out,"DRIG n\n"); u_fprintf(out,"DRST n\n"); u_fprintf(out,"FITS 100\n"); u_fprintf(out,"PORIENT L\n"); u_fprintf(out,"#\n"); int input_state; int output_state; int result=reg_2_grf(regexp,&input_state,&output_state,INFO); if (result!=1) { u_fclose(out); af_remove(name_grf); free_reg2grf_info(INFO); if (result==0) { error("Syntax error in regular expression\n"); } return 0; } /* If the compilation has successed, we must link the resulting automaton piece * to the grf's initial and final states */ add_transition(0,input_state,INFO); add_transition(output_state,1,INFO); save_states(out,INFO); free_reg2grf_info(INFO); u_fclose(out); return 1; }
static void remove_file_in_path(char* path, const char* filename, int mandatory) { if (!path) return; char * end_path = path + strlen(path); strcpy(end_path, filename); if (mandatory || fexists(path)) af_remove(path); *end_path = '\0'; }
int main_fst2txt(struct fst2txt_parameters* p) { p->f_input=u_fopen_existing_versatile_encoding(p->mask_encoding_compatibility_input,p->text_file,U_READ); if (p->f_input==NULL) { error("Cannot open file %s\n",p->text_file); return 1; } p->text_buffer=new_buffer_for_file(UNICHAR_BUFFER,p->f_input,CAPACITY_LIMIT); p->buffer=p->text_buffer->unichar_buffer; p->f_output=u_fopen_creating_versatile_encoding(p->encoding_output,p->bom_output,p->temp_file,U_WRITE); if (p->f_output==NULL) { error("Cannot open temporary file %s\n",p->temp_file); u_fclose(p->f_input); return 1; } p->fst2=load_abstract_fst2(p->fst_file,1,NULL); if (p->fst2==NULL) { error("Cannot load grammar %s\n",p->fst_file); u_fclose(p->f_input); u_fclose(p->f_output); return 1; } if (p->alphabet_file!=NULL && p->alphabet_file[0]!='\0') { p->alphabet=load_alphabet(p->alphabet_file); if (p->alphabet==NULL) { error("Cannot load alphabet file %s\n",p->alphabet_file); u_fclose(p->f_input); u_fclose(p->f_output); free_abstract_Fst2(p->fst2,NULL); return 1; } } u_printf("Applying %s in %s mode...\n",p->fst_file,(p->output_policy==MERGE_OUTPUTS)?"merge":"replace"); build_state_token_trees(p); parse_text(p); u_fclose(p->f_input); u_fclose(p->f_output); af_remove(p->text_file); af_rename(p->temp_file,p->text_file); u_printf("Done.\n"); return 0; }
int main_PolyLex(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } int language=-1; char alphabet[FILENAME_MAX]=""; char name_bin[FILENAME_MAX]=""; char output[FILENAME_MAX]=""; char info[FILENAME_MAX]=""; VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_PolyLex,lopts_PolyLex,&index))) { switch(val) { case 'D': language=DUTCH; break; case 'G': language=GERMAN; break; case 'N': language=NORWEGIAN; break; case 'R': language=RUSSIAN; break; case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet file name\n"); return USAGE_ERROR_CODE; } strcpy(alphabet,options.vars()->optarg); break; case 'd': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty dictionary file name\n"); return USAGE_ERROR_CODE; } strcpy(name_bin,options.vars()->optarg); break; case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output file name\n"); return USAGE_ERROR_CODE; } strcpy(output,options.vars()->optarg); break; case 'i': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty information file name\n"); return USAGE_ERROR_CODE; } strcpy(info,options.vars()->optarg); break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_PolyLex[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (name_bin[0]=='\0') { error("You must specify the .bin dictionary to use\n"); return USAGE_ERROR_CODE; } if (output[0]=='\0') { error("You must specify the output dictionary file name\n"); return USAGE_ERROR_CODE; } if (language==-1) { error("You must specify the language\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } Alphabet* alph=NULL; if (alphabet[0]!='\0') { u_printf("Loading alphabet...\n"); alph=load_alphabet(&vec,alphabet); if (alph==NULL) { error("Cannot load alphabet file %s\n",alphabet); return USAGE_ERROR_CODE; } } char name_inf[FILENAME_MAX]; struct string_hash* forbiddenWords=NULL; if (language==DUTCH || language==NORWEGIAN) { get_path(name_bin,name_inf); strcat(name_inf,"ForbiddenWords.txt"); forbiddenWords=load_key_list(&vec,name_inf); if (forbiddenWords==NULL) { /* If there was no file, we don't want to block the process */ forbiddenWords=new_string_hash(DONT_USE_VALUES); } } strcpy(name_inf,name_bin); name_inf[strlen(name_bin)-3]='\0'; strcat(name_inf,"inf"); Dictionary* d=new_Dictionary(&vec,name_bin,name_inf); if (d==NULL) { error("Cannot load dictionary %s\n",name_bin); free_string_hash(forbiddenWords); free_alphabet(alph); return DEFAULT_ERROR_CODE; } char tmp[FILENAME_MAX]; strcpy(tmp,argv[options.vars()->optind]); strcat(tmp,".tmp"); U_FILE* words=u_fopen(&vec,argv[options.vars()->optind],U_READ); if (words==NULL) { error("Cannot open word list file %s\n",argv[options.vars()->optind]); free_Dictionary(d); free_string_hash(forbiddenWords); free_alphabet(alph); // here we return 0 in order to do not block the preprocessing // in the Unitex/GramLab IDE interface, if no dictionary was applied // so that there is no "err" file return SUCCESS_RETURN_CODE; } U_FILE* new_unknown_words=u_fopen(&vec,tmp,U_WRITE); if (new_unknown_words==NULL) { error("Cannot open temporary word list file %s\n",tmp); u_fclose(words); free_Dictionary(d); free_string_hash(forbiddenWords); free_alphabet(alph); return DEFAULT_ERROR_CODE; } U_FILE* res=u_fopen(&vec,output,U_APPEND); if (res==NULL) { error("Cannot open result file %s\n",output); u_fclose(new_unknown_words); u_fclose(words); free_Dictionary(d); free_string_hash(forbiddenWords); free_alphabet(alph); u_fclose(words); return DEFAULT_ERROR_CODE; } U_FILE* debug=NULL; if ((*info)!='\0') { debug=u_fopen(&vec,info,U_WRITE); if (debug==NULL) { error("Cannot open debug file %s\n",info); } } struct utags UTAG; switch(language) { case DUTCH: analyse_dutch_unknown_words(alph, d, words, res, debug, new_unknown_words, forbiddenWords); break; case GERMAN: analyse_german_compounds(alph, d, words, res, debug, new_unknown_words); break; case NORWEGIAN: analyse_norwegian_unknown_words(alph, d, words, res, debug, new_unknown_words, forbiddenWords); break; case RUSSIAN: init_russian(&UTAG); analyse_compounds(alph, d, words, res, debug, new_unknown_words, UTAG); break; } free_alphabet(alph); free_Dictionary(d); u_fclose(words); u_fclose(new_unknown_words); free_string_hash(forbiddenWords); af_remove(argv[options.vars()->optind]); af_rename(tmp,argv[options.vars()->optind]); u_fclose(res); if (debug!=NULL) { u_fclose(debug); } return SUCCESS_RETURN_CODE; }
int main_Flatten(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } int RTN=1; int depth=10; VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; char foo; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_Flatten,lopts_Flatten,&index))) { switch(val) { case 'f': RTN=0; break; case 'r': RTN=1; break; case 'd': if (1!=sscanf(options.vars()->optarg,"%d%c",&depth,&foo) || depth<=0) { /* foo is used to check that the depth is not like "45gjh" */ error("Invalid depth argument: %s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_Flatten[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; break; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } u_printf("Loading %s...\n",argv[options.vars()->optind]); struct FST2_free_info fst2_free; Fst2* origin=load_abstract_fst2(&vec,argv[options.vars()->optind],1,&fst2_free); if (origin==NULL) { error("Cannot load %s\n",argv[options.vars()->optind]); return DEFAULT_ERROR_CODE; } char temp[FILENAME_MAX]; strcpy(temp,argv[options.vars()->optind]); strcat(temp,".tmp.fst2"); switch (flatten_fst2(origin,depth,temp,&vec,RTN)) { case EQUIVALENT_FST: u_printf("The resulting grammar is an equivalent finite-state transducer.\n"); break; case APPROXIMATIVE_FST: u_printf("The resulting grammar is a finite-state approximation.\n"); break; case EQUIVALENT_RTN: u_printf("The resulting grammar is an equivalent FST2 (RTN).\n"); break; default: error("Internal state error in Flatten's main\n"); free_abstract_Fst2(origin,&fst2_free); return DEFAULT_ERROR_CODE; } free_abstract_Fst2(origin,&fst2_free); af_remove(argv[options.vars()->optind]); af_rename(temp,argv[options.vars()->optind]); return SUCCESS_RETURN_CODE; }
int main_SortTxt(int argc, char* const argv[]) { if (argc == 1) { usage(); return SUCCESS_RETURN_CODE; } struct sort_infos* inf = new_sort_infos(); if(!inf) { return ALLOC_ERROR_CODE; } int mode = DEFAULT; char line_info[FILENAME_MAX] = ""; char sort_order[FILENAME_MAX] = ""; VersatileEncodingConfig vec = { DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT, DEFAULT_ENCODING_OUTPUT, DEFAULT_BOM_OUTPUT }; int val, index = -1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF != (val = options.parse_long(argc, argv, optstring_SortTxt, lopts_SortTxt, &index))) { switch (val) { case 'n': inf->REMOVE_DUPLICATES = 1; break; case 'd': inf->REMOVE_DUPLICATES = 0; break; case 'r': inf->REVERSE = -1; break; case 'o': if (options.vars()->optarg[0] == '\0') { error("You must specify a non empty sort order file name\n"); free_sort_infos(inf); return USAGE_ERROR_CODE; } strcpy(sort_order, options.vars()->optarg); break; case 'l': if (options.vars()->optarg[0] == '\0') { error("You must specify a non empty information file name\n"); free_sort_infos(inf); return USAGE_ERROR_CODE; } strcpy(line_info, options.vars()->optarg); break; case 't': mode = THAI; break; case 'f': inf->factorize_inflectional_codes = 1; break; case 'V': only_verify_arguments = true; break; case 'h': usage(); free_sort_infos(inf); return SUCCESS_RETURN_CODE; case 'k': if (options.vars()->optarg[0] == '\0') { error("Empty input_encoding argument\n"); free_sort_infos(inf); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter( &(vec.mask_encoding_compatibility_input), options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0] == '\0') { error("Empty output_encoding argument\n"); free_sort_infos(inf); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output), &(vec.bom_output), options.vars()->optarg); break; case ':': index == -1 ? error("Missing argument for option -%c\n", options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_SortTxt[index].name); free_sort_infos(inf); return USAGE_ERROR_CODE; case '?': index == -1 ? error("Invalid option -%c\n", options.vars()->optopt) : error("Invalid option --%s\n", options.vars()->optarg); free_sort_infos(inf); return USAGE_ERROR_CODE; } index = -1; } if (options.vars()->optind != argc - 1) { error("Invalid arguments: rerun with --help\n"); free_sort_infos(inf); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory free_sort_infos(inf); return SUCCESS_RETURN_CODE; } if (sort_order[0] != '\0') { read_char_order(&vec, sort_order, inf); } char new_name[FILENAME_MAX]; strcpy(new_name, argv[options.vars()->optind]); strcat(new_name, ".new"); inf->f = u_fopen(&vec, argv[options.vars()->optind], U_READ); if (inf->f == NULL) { error("Cannot open file %s\n", argv[options.vars()->optind]); free_sort_infos(inf); return DEFAULT_ERROR_CODE; } inf->f_out = u_fopen(&vec, new_name, U_WRITE); if (inf->f_out == NULL) { error("Cannot open temporary file %s\n", new_name); u_fclose(inf->f); free_sort_infos(inf); return DEFAULT_ERROR_CODE; } switch (mode) { case DEFAULT: sort(inf); break; case THAI: sort_thai(inf); break; } if (line_info[0] != '\0') { U_FILE* F = u_fopen(&vec, line_info, U_WRITE); if (F == NULL) { error("Cannot write %s\n", line_info); } else { u_fprintf(F, "%d\n", inf->resulting_line_number); u_fclose(F); } } u_fclose(inf->f_out); u_fclose(inf->f); af_remove(argv[options.vars()->optind]); af_rename(new_name, argv[options.vars()->optind]); free_sort_infos(inf); u_printf("Done.\n"); return SUCCESS_RETURN_CODE; }
int main_SpellCheck(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; char mode=0; char snt[FILENAME_MAX]=""; char txt[FILENAME_MAX]=""; char output[FILENAME_MAX]=""; char output_set=0; char output_op='A'; SpellCheckConfig config; config.max_errors=1; config.max_SP_INSERT=1; config.max_SP_SUPPR=1; config.max_SP_SWAP=1; config.max_SP_CHANGE=1; for (int i=0;i<N_SPSubOp;i++) { config.score[i]=default_scores[i]; } config.min_length1=4; config.min_length2=6; config.min_length3=12; config.input_op='D'; config.keyboard=NULL; config.allow_uppercase_initial=0; char foo; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_SpellCheck,lopts_SpellCheck,&index))) { switch(val) { case 's': { strcpy(snt,options.vars()->optarg); mode='s'; break; } case 'f': { strcpy(txt,options.vars()->optarg); mode='f'; break; } case 'o': { if (options.vars()->optarg!=NULL) { strcpy(output,options.vars()->optarg); } output_set=1; break; } case 'I': { if (!strcmp(options.vars()->optarg,"D") || !strcmp(options.vars()->optarg,"M") || !strcmp(options.vars()->optarg,"U")) { config.input_op=options.vars()->optarg[0]; } else { error("Invalid argument %s for option --input-op: should in [DMU]\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 'O': { if (!strcmp(options.vars()->optarg,"O") || !strcmp(options.vars()->optarg,"A")) { output_op=options.vars()->optarg[0]; } else { error("Invalid argument %s for option --output-op: should in [OA]\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 1: { config.keyboard=get_Keyboard(options.vars()->optarg); if (config.keyboard==NULL) { error("Invalid argument %s for option --keyboard:\nUse --show-keyboards to see possible values\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 2: { print_available_keyboards(U_STDOUT); return SUCCESS_RETURN_CODE; } case 10: { if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_errors,&foo)) { error("Invalid argument %s for --max-errors: should be an integer >=0\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 11: { if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_INSERT,&foo)) { error("Invalid argument %s for --max-insert: should be an integer >=0\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 12: { if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_SUPPR,&foo)) { error("Invalid argument %s for --max-suppr: should be an integer >=0\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 13: { if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_CHANGE,&foo)) { error("Invalid argument %s for --max-change: should be an integer >=0\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 14: { if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_SWAP,&foo)) { error("Invalid argument %s for --max-swap: should be an integer >=0\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 20: { int* scores=config.score; if (N_SPSubOp!=sscanf(options.vars()->optarg,"%d,%d,%d,%d,%d,%d,%d,%d,%d%c", scores,scores+1,scores+2,scores+3,scores+4,scores+5, scores+6,scores+7,scores+8,&foo)) { error("Invalid argument %s for option --scores. See --help-scores\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 21: { usage_scores(); return SUCCESS_RETURN_CODE; } case 22: { if (3!=sscanf(options.vars()->optarg,"%u,%u,%u%c", &config.min_length1,&config.min_length2,&config.min_length3,&foo)) { error("Invalid argument %s for option --min-lengths\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 23: { if (!strcmp(options.vars()->optarg,"yes")) { config.allow_uppercase_initial=1; } else if (!strcmp(options.vars()->optarg,"no")) { config.allow_uppercase_initial=0; } else { error("Invalid argument %s for option --upper-initial\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_SpellCheck[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind==argc) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (mode==0) { error("You must use either --snt or --file\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } config.n_dics=argc-options.vars()->optind; config.dics=(Dictionary**)malloc(config.n_dics*sizeof(Dictionary*)); if (config.dics==NULL) { alloc_error("main_SpellCheck"); return ALLOC_ERROR_CODE; } for (int i=0;i<config.n_dics;i++) { config.dics[i]=new_Dictionary(&vec,argv[i+options.vars()->optind]); if (config.dics[i]==NULL) { error("Cannot load dictionary %s\n",argv[i+options.vars()->optind]); } } config.out=U_STDOUT; config.n_input_lines=0; config.n_output_lines=0; if (mode=='s') { /* When working with a .snt, we actually want to work on its err file */ get_snt_path(snt,txt); strcat(txt,"err"); /* the output must be dlf, and we note the number of lines in the existing * dlf file, if any */ get_snt_path(snt,output); strcat(output,"dlf.n"); U_FILE* f=u_fopen(&vec,output,U_READ); if (f!=NULL) { u_fscanf(f,"%d",&(config.n_output_lines)); u_fclose(f); } get_snt_path(snt,output); strcat(output,"dlf"); output_set=1; /* and we force the values for -I and -O */ config.input_op='U'; output_op='A'; } else { /* If mode=='f', we don't have anything to do since we already * defined the default output to stdout */ } if (output_set) { if (output_op=='O') { config.out=u_fopen(&vec,output,U_WRITE); } else { config.out=u_fopen(&vec,output,U_APPEND); } if (config.out==NULL) { error("Cannot open output file %s\n",output); for (int i=0;i<config.n_dics;i++) { free_Dictionary(config.dics[i]); } free(config.dics); return DEFAULT_ERROR_CODE; } } config.modified_input=NULL; char modified_input[FILENAME_MAX]=""; if (config.input_op!='D') { strcpy(modified_input,txt); strcat(modified_input,".tmp"); config.modified_input=u_fopen(&vec,modified_input,U_WRITE); if (config.modified_input==NULL) { error("Cannot open tmp file %s\n",modified_input); if (config.out!=U_STDOUT) { u_fclose(config.out); } for (int i=0;i<config.n_dics;i++) { free_Dictionary(config.dics[i]); } free(config.dics); return DEFAULT_ERROR_CODE; } } config.in=u_fopen(&vec,txt,U_READ); if (config.in==NULL) { error("Cannot open file %s\n",txt); u_fclose(config.modified_input); if (config.out!=U_STDOUT) { u_fclose(config.out); } for (int i=0;i<config.n_dics;i++) { free_Dictionary(config.dics[i]); } free(config.dics); return DEFAULT_ERROR_CODE; } /* We perform spellchecking */ spellcheck(&config); /* And we clean */ u_fclose(config.in); if (config.modified_input!=NULL) { /* If we used a tmp file because the input file has to be modified, * it's now time to actually modify it */ u_fclose(config.modified_input); af_remove(txt); af_rename(modified_input,txt); } if (config.out!=U_STDOUT) { u_fclose(config.out); } for (int i=0;i<config.n_dics;i++) { free_Dictionary(config.dics[i]); } free(config.dics); /* Finally, we update the dlf.n and err.n files if mode=='s' */ if (mode=='s') { get_snt_path(snt,output); strcat(output,"err.n"); U_FILE* f=u_fopen(&vec,output,U_WRITE); if (f!=NULL) { u_fprintf(f,"%d",config.n_input_lines); u_fclose(f); } if (config.input_op!='D') { get_snt_path(snt,output); strcat(output,"dlf.n"); U_FILE* fw=u_fopen(&vec,output,U_WRITE); if (fw!=NULL) { u_fprintf(fw,"%d",config.n_output_lines); u_fclose(fw); } } } return SUCCESS_RETURN_CODE; }
int main_RebuildTfst(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } VersatileEncodingConfig vec=VEC_DEFAULT; int val, index=-1; bool only_verify_arguments = false; UnitexGetOpt options; int save_statistics=1; while (EOF!=(val=options.parse_long(argc,argv,optstring_RebuildTfst,lopts_RebuildTfst,&index))) { switch (val) { case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'S': save_statistics = 0; break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n", options.vars()->optopt) : error("Missing argument for option --%s\n", lopts_RebuildTfst[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n", options.vars()->optopt) : error("Invalid option --%s\n", options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } char input_tfst[FILENAME_MAX]; char input_tind[FILENAME_MAX]; strcpy(input_tfst,argv[options.vars()->optind]); remove_extension(input_tfst,input_tind); strcat(input_tind,".tind"); u_printf("Loading %s...\n",input_tfst); Tfst* tfst = open_text_automaton(&vec,input_tfst); if (tfst==NULL) { error("Unable to load %s automaton\n",input_tfst); return DEFAULT_ERROR_CODE; } char basedir[FILENAME_MAX]; get_path(input_tfst,basedir); char output_tfst[FILENAME_MAX]; sprintf(output_tfst, "%s.new.tfst",input_tfst); char output_tind[FILENAME_MAX]; sprintf(output_tind, "%s.new.tind",input_tfst); U_FILE* f_tfst; if ((f_tfst = u_fopen(&vec,output_tfst,U_WRITE)) == NULL) { error("Unable to open %s for writing\n", output_tfst); close_text_automaton(tfst); return DEFAULT_ERROR_CODE; } U_FILE* f_tind; if ((f_tind = u_fopen(BINARY,output_tind,U_WRITE)) == NULL) { u_fclose(f_tfst); close_text_automaton(tfst); error("Unable to open %s for writing\n", output_tind); return DEFAULT_ERROR_CODE; } /* We use this hash table to rebuild files tfst_tags_by_freq/alph.txt */ struct hash_table* form_frequencies=new_hash_table((HASH_FUNCTION)hash_unichar,(EQUAL_FUNCTION)u_equal, (FREE_FUNCTION)free,NULL,(KEYCOPY_FUNCTION)keycopy); u_fprintf(f_tfst,"%010d\n",tfst->N); for (int i = 1; i <= tfst->N; i++) { if ((i % 100) == 0) { u_printf("%d/%d sentences rebuilt...\n", i, tfst->N); } load_sentence(tfst,i); char grfname[FILENAME_MAX]; sprintf(grfname, "%ssentence%d.grf", basedir, i); unichar** tags=NULL; int n_tags=-1; if (fexists(grfname)) { /* If there is a .grf for the current sentence, then we must * take it into account */ if (0==pseudo_main_Grf2Fst2(&vec,grfname,0,NULL,1,1,NULL,NULL,0)) { /* We proceed only if the graph compilation was a success */ char fst2name[FILENAME_MAX]; sprintf(fst2name, "%ssentence%d.fst2", basedir, i); struct FST2_free_info fst2_free; Fst2* fst2=load_abstract_fst2(&vec,fst2name,0,&fst2_free); af_remove(fst2name); free_SingleGraph(tfst->automaton,NULL); tfst->automaton=create_copy_of_fst2_subgraph(fst2,1); tags=create_tfst_tags(fst2,&n_tags); free_abstract_Fst2(fst2,&fst2_free); } else { error("Error: %s is not a valid sentence automaton\n",grfname); } } save_current_sentence(tfst,f_tfst,f_tind,tags,n_tags,form_frequencies); if (tags!=NULL) { /* If necessary, we free the tags we created */ for (int count_tags=0;count_tags<n_tags;count_tags++) { free(tags[count_tags]); } free(tags); } } u_printf("Text automaton rebuilt.\n"); u_fclose(f_tind); u_fclose(f_tfst); close_text_automaton(tfst); /* Finally, we save statistics */ if (save_statistics) { char tfst_tags_by_freq[FILENAME_MAX]; char tfst_tags_by_alph[FILENAME_MAX]; strcpy(tfst_tags_by_freq, basedir); strcat(tfst_tags_by_freq, "tfst_tags_by_freq.txt"); strcpy(tfst_tags_by_alph, basedir); strcat(tfst_tags_by_alph, "tfst_tags_by_alph.txt"); U_FILE* f_tfst_tags_by_freq = u_fopen(&vec, tfst_tags_by_freq, U_WRITE); if (f_tfst_tags_by_freq == NULL) { error("Cannot open %s\n", tfst_tags_by_freq); } U_FILE* f_tfst_tags_by_alph = u_fopen(&vec, tfst_tags_by_alph, U_WRITE); if (f_tfst_tags_by_alph == NULL) { error("Cannot open %s\n", tfst_tags_by_alph); } sort_and_save_tfst_stats(form_frequencies, f_tfst_tags_by_freq, f_tfst_tags_by_alph); u_fclose(f_tfst_tags_by_freq); u_fclose(f_tfst_tags_by_alph); } free_hash_table(form_frequencies); /* make a backup and replace old automaton with new */ char backup_tfst[FILENAME_MAX]; char backup_tind[FILENAME_MAX]; sprintf(backup_tfst,"%s.bck",input_tfst); sprintf(backup_tind,"%s.bck",input_tind); /* We remove the existing backup files, if any */ af_remove(backup_tfst); af_remove(backup_tind); af_rename(input_tfst,backup_tfst); af_rename(input_tind,backup_tind); af_rename(output_tfst,input_tfst); af_rename(output_tind,input_tind); u_printf("\nYou can find a backup of the original files in:\n %s\nand %s\n", backup_tfst,backup_tind); return SUCCESS_RETURN_CODE; }
int main_PolyLex(int argc,char* const argv[]) { if (argc==1) { usage(); return 0; } int language=-1; char alphabet[FILENAME_MAX]=""; char dictionary[FILENAME_MAX]=""; char output[FILENAME_MAX]=""; char info[FILENAME_MAX]=""; Encoding encoding_output = DEFAULT_ENCODING_OUTPUT; int bom_output = DEFAULT_BOM_OUTPUT; int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT; int val,index=-1; struct OptVars* vars=new_OptVars(); while (EOF!=(val=getopt_long_TS(argc,argv,optstring_PolyLex,lopts_PolyLex,&index,vars))) { switch(val) { case 'D': language=DUTCH; break; case 'G': language=GERMAN; break; case 'N': language=NORWEGIAN; break; case 'R': language=RUSSIAN; break; case 'a': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty alphabet file name\n"); } strcpy(alphabet,vars->optarg); break; case 'd': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty dictionary file name\n"); } strcpy(dictionary,vars->optarg); break; case 'o': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty output file name\n"); } strcpy(output,vars->optarg); break; case 'i': if (vars->optarg[0]=='\0') { fatal_error("You must specify a non empty information file name\n"); } strcpy(info,vars->optarg); break; case 'k': if (vars->optarg[0]=='\0') { fatal_error("Empty input_encoding argument\n"); } decode_reading_encoding_parameter(&mask_encoding_compatibility_input,vars->optarg); break; case 'q': if (vars->optarg[0]=='\0') { fatal_error("Empty output_encoding argument\n"); } decode_writing_encoding_parameter(&encoding_output,&bom_output,vars->optarg); break; case 'h': usage(); return 0; case ':': if (index==-1) fatal_error("Missing argument for option -%c\n",vars->optopt); else fatal_error("Missing argument for option --%s\n",lopts_PolyLex[index].name); case '?': if (index==-1) fatal_error("Invalid option -%c\n",vars->optopt); else fatal_error("Invalid option --%s\n",vars->optarg); break; } index=-1; } if (vars->optind!=argc-1) { fatal_error("Invalid arguments: rerun with --help\n"); } if (dictionary[0]=='\0') { fatal_error("You must specify the .bin dictionary to use\n"); } if (output[0]=='\0') { fatal_error("You must specify the output dictionary file name\n"); } if (language==-1) { fatal_error("You must specify the language\n"); } Alphabet* alph=NULL; if (alphabet[0]!='\0') { u_printf("Loading alphabet...\n"); alph=load_alphabet(alphabet); if (alph==NULL) { fatal_error("Cannot load alphabet file %s\n",alphabet); } } char temp[FILENAME_MAX]; struct string_hash* forbiddenWords=NULL; if (language==DUTCH || language==NORWEGIAN) { get_path(dictionary,temp); strcat(temp,"ForbiddenWords.txt"); forbiddenWords=load_key_list(temp,mask_encoding_compatibility_input); } u_printf("Loading BIN file...\n"); struct BIN_free_info bin_free; const unsigned char* bin=load_abstract_BIN_file(dictionary,&bin_free); if (bin==NULL) { error("Cannot load bin file %s\n",dictionary); free_alphabet(alph); free_string_hash(forbiddenWords); return 1; } strcpy(temp,dictionary); temp[strlen(dictionary)-3]='\0'; strcat(temp,"inf"); u_printf("Loading INF file...\n"); struct INF_free_info inf_free; const struct INF_codes* inf=load_abstract_INF_file(temp,&inf_free); if (inf==NULL) { error("Cannot load inf file %s\n",temp); free_alphabet(alph); free_abstract_BIN(bin,&bin_free); free_string_hash(forbiddenWords); return 1; } char tmp[FILENAME_MAX]; strcpy(tmp,argv[vars->optind]); strcat(tmp,".tmp"); U_FILE* words=u_fopen_existing_versatile_encoding(mask_encoding_compatibility_input,argv[vars->optind],U_READ); if (words==NULL) { error("Cannot open word list file %s\n",argv[vars->optind]); free_alphabet(alph); free_abstract_BIN(bin,&bin_free); free_abstract_INF(inf,&inf_free); free_string_hash(forbiddenWords); // here we return 0 in order to do not block the preprocessing // in the Unitex Java interface, if no dictionary was applied // so that there is no "err" file return 0; } U_FILE* new_unknown_words=u_fopen_existing_versatile_encoding(mask_encoding_compatibility_input,tmp,U_WRITE); if (new_unknown_words==NULL) { error("Cannot open temporary word list file %s\n",tmp); free_alphabet(alph); free_abstract_BIN(bin,&bin_free); free_abstract_INF(inf,&inf_free); u_fclose(words); free_string_hash(forbiddenWords); return 1; } U_FILE* res=u_fopen_versatile_encoding(encoding_output,bom_output,mask_encoding_compatibility_input,output,U_APPEND); if (res==NULL) { error("Cannot open result file %s\n",output); free_alphabet(alph); free_abstract_BIN(bin,&bin_free); free_abstract_INF(inf,&inf_free); u_fclose(words); u_fclose(new_unknown_words); free_string_hash(forbiddenWords); return 1; } U_FILE* debug=NULL; if (info!=NULL) { debug=u_fopen_versatile_encoding(encoding_output,bom_output,mask_encoding_compatibility_input,info,U_WRITE); if (debug==NULL) { error("Cannot open debug file %s\n",info); } } struct utags UTAG; switch(language) { case DUTCH: analyse_dutch_unknown_words(alph,bin,inf,words,res,debug,new_unknown_words,forbiddenWords); break; case GERMAN: analyse_german_compounds(alph,bin,inf,words,res,debug,new_unknown_words); break; case NORWEGIAN: analyse_norwegian_unknown_words(alph,bin,inf,words,res,debug,new_unknown_words,forbiddenWords); break; case RUSSIAN: init_russian(&UTAG); analyse_compounds(alph,bin,inf,words,res,debug,new_unknown_words,UTAG); break; } free_alphabet(alph); free_abstract_BIN(bin,&bin_free); free_abstract_INF(inf,&inf_free); u_fclose(words); u_fclose(new_unknown_words); free_string_hash(forbiddenWords); af_remove(argv[vars->optind]); af_rename(tmp,argv[vars->optind]); u_fclose(res); if (debug!=NULL) { u_fclose(debug); } free_OptVars(vars); return 0; }
/** * remove a file */ UNITEX_FUNC int UNITEX_CALL RemoveUnitexFile(const char*name) { return af_remove(name); }
int main_XMLizer(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } int output_style=TEI; char output[FILENAME_MAX]=""; char alphabet[FILENAME_MAX]=""; char normalization[FILENAME_MAX]=""; char segmentation[FILENAME_MAX]=""; VersatileEncodingConfig vec=VEC_DEFAULT; int convLFtoCRLF=1; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_XMLizer,lopts_XMLizer,&index))) { switch(val) { case 'x': output_style=XML; break; case 't': output_style=TEI; break; case 'n': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty normalization grammar name\n"); return USAGE_ERROR_CODE; } strcpy(normalization,options.vars()->optarg); break; case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output file name\n"); return USAGE_ERROR_CODE; } strcpy(output,options.vars()->optarg); break; case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet file name\n"); return USAGE_ERROR_CODE; } strcpy(alphabet,options.vars()->optarg); break; case 's': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty segmentation grammar name\n"); return USAGE_ERROR_CODE; } strcpy(segmentation,options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_XMLizer[index].name); return USAGE_ERROR_CODE; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (segmentation[0]=='\0') { error("You must specify the segmentation grammar to use\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } char input[FILENAME_MAX]; strcpy(input,argv[options.vars()->optind]); char snt[FILENAME_MAX]; remove_extension(input,snt); strcat(snt,"_tmp.snt"); char tmp[FILENAME_MAX]; remove_extension(input,tmp); strcat(tmp,".tmp"); normalize(input,snt,&vec,KEEP_CARRIAGE_RETURN,convLFtoCRLF,normalization,NULL,1); struct fst2txt_parameters* p=new_fst2txt_parameters(); p->vec=vec; p->input_text_file=strdup(snt); if (p->input_text_file ==NULL) { alloc_error("main_XMLizer"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } p->output_text_file_is_temp=1; p->output_text_file=strdup(tmp); if (p->output_text_file==NULL) { alloc_error("main_XMLizer"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } p->fst_file=strdup(segmentation); if (p->fst_file==NULL) { alloc_error("main_XMLizer"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } p->alphabet_file=strdup(alphabet); if (p->alphabet_file==NULL) { alloc_error("main_XMLizer"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } p->output_policy=MERGE_OUTPUTS; p->tokenization_policy=WORD_BY_WORD_TOKENIZATION; p->space_policy=DONT_START_WITH_SPACE; main_fst2txt(p); free_fst2txt_parameters(p); if (output[0]=='\0') { remove_extension(input,output); strcat(output,".xml"); } int return_value = xmlize(&vec,snt,output,output_style); af_remove(snt); af_remove(tmp); return return_value; }
int main_DuplicateFile(int argc,char* const argv[]) { if (argc==1) { usage(); return 0; } const char *input_file = NULL; const char *output_file = NULL; int do_delete=0; int do_move=0; int val,index=-1; struct OptVars* vars=new_OptVars(); while (EOF!=(val=getopt_long_TS(argc,argv,optstring_DuplicateFile,lopts_DuplicateFile,&index,vars))) { switch(val) { case 'd': do_delete=1; break; case 'i': if (vars->optarg[0]=='\0') { fatal_error("Empty input argument\n"); } input_file = vars->optarg; break; case 'm': if (vars->optarg[0]=='\0') { fatal_error("Empty move argument\n"); } input_file = vars->optarg; do_move=1; break; case 'h': usage(); return 0; case ':': if (index==-1) fatal_error("Missing argument for option -%c\n",vars->optopt); else fatal_error("Missing argument for option --%s\n",lopts_DuplicateFile[index].name); case '?': if (index==-1) fatal_error("Invalid option -%c\n",vars->optopt); else fatal_error("Invalid option --%s\n",vars->optarg); break; case 'k': case 'q': /* ignore -k and -q parameter instead make error */ break; } index=-1; } if (vars->optind!=argc-1) { fatal_error("Invalid arguments: rerun with --help\n"); } output_file = argv[vars->optind]; if ((input_file==NULL) && (do_delete==0)) { fatal_error("You must specify the input_file file\n"); } if ((input_file!=NULL) && (do_delete==1)) { fatal_error("You cannot specify input_file when delete\n"); } if (output_file==NULL) { fatal_error("You must specify the output_file file\n"); } int result; if (input_file != NULL) { if (do_move == 0) { u_printf("copy file %s to %s\n",input_file,output_file); result=af_copy(input_file,output_file); } else { u_printf("move file %s to %s\n",input_file,output_file); result=af_rename(input_file,output_file); } } else { u_printf("remove file %s\n",output_file); result=af_remove(output_file); } u_printf((result==0) ? "Done.\n" : "Unsucessfull.\n"); return result; }
int main_Normalize(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } int mode=KEEP_CARRIAGE_RETURN; int separator_normalization=1; char rules[FILENAME_MAX]=""; char input_offsets[FILENAME_MAX]=""; char output_offsets[FILENAME_MAX]=""; VersatileEncodingConfig vec=VEC_DEFAULT; int convLFtoCRLF=1; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_Normalize,lopts_Normalize,&index))) { switch(val) { case 'l': convLFtoCRLF=0; break; case 'n': mode=REMOVE_CARRIAGE_RETURN; break; case 'r': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty replacement rule file name\n"); return USAGE_ERROR_CODE; } strcpy(rules,options.vars()->optarg); break; case 1: separator_normalization=0; break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case '$': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty input offset file name\n"); return USAGE_ERROR_CODE; } strcpy(input_offsets,options.vars()->optarg); break; case '@': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output offset file name\n"); return USAGE_ERROR_CODE; } strcpy(output_offsets,options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_Normalize[index].name); return USAGE_ERROR_CODE; break; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; break; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } vector_offset* v_input_offsets=NULL; vector_offset* v_output_offsets=NULL; U_FILE* f_output_offsets=NULL; if (output_offsets[0]!='\0') { /* We deal with offsets only if we have to produce output offsets */ if (input_offsets[0]!='\0') { v_input_offsets=load_offsets(&vec,input_offsets); } f_output_offsets=u_fopen(&vec, output_offsets, U_WRITE); if (f_output_offsets==NULL) { error("Cannot create offset file %s\n",output_offsets); return DEFAULT_ERROR_CODE; } v_output_offsets=new_vector_offset(); } char tmp_file[FILENAME_MAX]; get_extension(argv[options.vars()->optind],tmp_file); if (!strcmp(tmp_file, ".snt")) { /* If the file to process has already the .snt extension, we temporary rename it to * .snt.normalizing */ strcpy(tmp_file,argv[options.vars()->optind]); strcat(tmp_file,".normalizing"); af_rename(argv[options.vars()->optind],tmp_file); } else { strcpy(tmp_file,argv[options.vars()->optind]); } /* We set the destination file */ char dest_file[FILENAME_MAX]; remove_extension(argv[options.vars()->optind],dest_file); strcat(dest_file,".snt"); u_printf("Normalizing %s...\n",argv[options.vars()->optind]); int return_value = normalize(tmp_file, dest_file, &vec, mode, convLFtoCRLF, rules, v_output_offsets, separator_normalization); u_printf("\n"); /* If we have used a temporary file, we delete it */ if (strcmp(tmp_file,argv[options.vars()->optind])) { af_remove(tmp_file); } process_offsets(v_input_offsets,v_output_offsets,f_output_offsets); u_fclose(f_output_offsets); free_vector_offset(v_input_offsets); free_vector_offset(v_output_offsets); u_printf((return_value==SUCCESS_RETURN_CODE) ? "Done.\n" : "Unsuccessfull.\n"); return return_value; }
int main_DuplicateFile(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } const char *input_file = NULL; const char *output_file = NULL; int do_delete=0; int do_recursive_delete=0; int do_move=0; int do_make_dir=0; int do_make_dir_parent=0; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_DuplicateFile,lopts_DuplicateFile,&index))) { switch(val) { case 'a': do_make_dir = 1; break; case 'p': do_make_dir_parent = 1; break; case 'd': do_delete = 1; break; case 'r': do_delete = do_recursive_delete = 1; break; case 'i': if (options.vars()->optarg[0]=='\0') { error("Empty input argument\n"); return USAGE_ERROR_CODE; } input_file = options.vars()->optarg; break; case 'm': if (options.vars()->optarg[0]=='\0') { error("Empty move argument\n"); return USAGE_ERROR_CODE; } input_file = options.vars()->optarg; do_move=1; break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt): error("Missing argument for option --%s\n",lopts_DuplicateFile[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; case 'k': case 'q': /* ignore -k and -q parameter instead to raise an error */ break; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } output_file = argv[options.vars()->optind]; if ((input_file==NULL) && (do_delete==0) && (do_make_dir==0) && (do_make_dir_parent ==0)) { error("You must specify the input_file file\n"); return USAGE_ERROR_CODE; } if ((input_file!=NULL) && (do_delete==1)) { error("You cannot specify input_file when delete\n"); return USAGE_ERROR_CODE; } if (output_file==NULL) { error("You must specify the output_file file\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } int result = 0; if (input_file != NULL) { if (do_move == 0) { u_printf("copy file %s to %s\n",input_file,output_file); /* af_copy return 0 if success, -1 with reading problem, 1 writing problem */ result=af_copy(input_file,output_file); } else { u_printf("move file %s to %s\n",input_file,output_file); result=af_rename(input_file,output_file); } } else if (do_make_dir != 0) { u_printf("make dir %s\n", output_file); result = mkDirPortable(output_file); } else if (do_make_dir_parent != 0) { u_printf("make dir %s with parent\n", output_file); result = mkDirRecursiveIfNeeded(output_file); } else { if (do_recursive_delete == 0) { u_printf("remove file %s\n",output_file); result=af_remove(output_file); } else { u_printf("remove folder %s\n", output_file); af_remove_folder(output_file); result=0; } } u_printf((result==0) ? "Done.\n" : "Unsucessfull.\n"); return result; }