int main_Uncompress(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; char output[FILENAME_MAX]=""; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_Uncompress,lopts_Uncompress,&index))) { switch(val) { case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output file name\n"); return USAGE_ERROR_CODE; } strcpy(output,options.vars()->optarg); break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_Uncompress[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } if (output[0]=='\0') { remove_extension(argv[options.vars()->optind],output); strcat(output,".dic"); } U_FILE* f=u_fopen(&vec,output,U_WRITE); if (f==NULL) { error("Cannot open file %s\n",output); return DEFAULT_ERROR_CODE; } char inf_file[FILENAME_MAX]; remove_extension(argv[options.vars()->optind],inf_file); strcat(inf_file,".inf"); u_printf("Uncompressing %s...\n",argv[options.vars()->optind]); Dictionary* d=new_Dictionary(&vec,argv[options.vars()->optind],inf_file); if (d!=NULL) { rebuild_dictionary(d,f); } u_fclose(f); free_Dictionary(d); u_printf("Done.\n"); return SUCCESS_RETURN_CODE; }
int main_SpellCheck(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; char mode=0; char snt[FILENAME_MAX]=""; char txt[FILENAME_MAX]=""; char output[FILENAME_MAX]=""; char output_set=0; char output_op='A'; SpellCheckConfig config; config.max_errors=1; config.max_SP_INSERT=1; config.max_SP_SUPPR=1; config.max_SP_SWAP=1; config.max_SP_CHANGE=1; for (int i=0;i<N_SPSubOp;i++) { config.score[i]=default_scores[i]; } config.min_length1=4; config.min_length2=6; config.min_length3=12; config.input_op='D'; config.keyboard=NULL; config.allow_uppercase_initial=0; char foo; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_SpellCheck,lopts_SpellCheck,&index))) { switch(val) { case 's': { strcpy(snt,options.vars()->optarg); mode='s'; break; } case 'f': { strcpy(txt,options.vars()->optarg); mode='f'; break; } case 'o': { if (options.vars()->optarg!=NULL) { strcpy(output,options.vars()->optarg); } output_set=1; break; } case 'I': { if (!strcmp(options.vars()->optarg,"D") || !strcmp(options.vars()->optarg,"M") || !strcmp(options.vars()->optarg,"U")) { config.input_op=options.vars()->optarg[0]; } else { error("Invalid argument %s for option --input-op: should in [DMU]\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 'O': { if (!strcmp(options.vars()->optarg,"O") || !strcmp(options.vars()->optarg,"A")) { output_op=options.vars()->optarg[0]; } else { error("Invalid argument %s for option --output-op: should in [OA]\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 1: { config.keyboard=get_Keyboard(options.vars()->optarg); if (config.keyboard==NULL) { error("Invalid argument %s for option --keyboard:\nUse --show-keyboards to see possible values\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 2: { print_available_keyboards(U_STDOUT); return SUCCESS_RETURN_CODE; } case 10: { if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_errors,&foo)) { error("Invalid argument %s for --max-errors: should be an integer >=0\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 11: { if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_INSERT,&foo)) { error("Invalid argument %s for --max-insert: should be an integer >=0\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 12: { if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_SUPPR,&foo)) { error("Invalid argument %s for --max-suppr: should be an integer >=0\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 13: { if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_CHANGE,&foo)) { error("Invalid argument %s for --max-change: should be an integer >=0\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 14: { if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_SWAP,&foo)) { error("Invalid argument %s for --max-swap: should be an integer >=0\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 20: { int* scores=config.score; if (N_SPSubOp!=sscanf(options.vars()->optarg,"%d,%d,%d,%d,%d,%d,%d,%d,%d%c", scores,scores+1,scores+2,scores+3,scores+4,scores+5, scores+6,scores+7,scores+8,&foo)) { error("Invalid argument %s for option --scores. See --help-scores\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 21: { usage_scores(); return SUCCESS_RETURN_CODE; } case 22: { if (3!=sscanf(options.vars()->optarg,"%u,%u,%u%c", &config.min_length1,&config.min_length2,&config.min_length3,&foo)) { error("Invalid argument %s for option --min-lengths\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 23: { if (!strcmp(options.vars()->optarg,"yes")) { config.allow_uppercase_initial=1; } else if (!strcmp(options.vars()->optarg,"no")) { config.allow_uppercase_initial=0; } else { error("Invalid argument %s for option --upper-initial\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_SpellCheck[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind==argc) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (mode==0) { error("You must use either --snt or --file\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } config.n_dics=argc-options.vars()->optind; config.dics=(Dictionary**)malloc(config.n_dics*sizeof(Dictionary*)); if (config.dics==NULL) { alloc_error("main_SpellCheck"); return ALLOC_ERROR_CODE; } for (int i=0;i<config.n_dics;i++) { config.dics[i]=new_Dictionary(&vec,argv[i+options.vars()->optind]); if (config.dics[i]==NULL) { error("Cannot load dictionary %s\n",argv[i+options.vars()->optind]); } } config.out=U_STDOUT; config.n_input_lines=0; config.n_output_lines=0; if (mode=='s') { /* When working with a .snt, we actually want to work on its err file */ get_snt_path(snt,txt); strcat(txt,"err"); /* the output must be dlf, and we note the number of lines in the existing * dlf file, if any */ get_snt_path(snt,output); strcat(output,"dlf.n"); U_FILE* f=u_fopen(&vec,output,U_READ); if (f!=NULL) { u_fscanf(f,"%d",&(config.n_output_lines)); u_fclose(f); } get_snt_path(snt,output); strcat(output,"dlf"); output_set=1; /* and we force the values for -I and -O */ config.input_op='U'; output_op='A'; } else { /* If mode=='f', we don't have anything to do since we already * defined the default output to stdout */ } if (output_set) { if (output_op=='O') { config.out=u_fopen(&vec,output,U_WRITE); } else { config.out=u_fopen(&vec,output,U_APPEND); } if (config.out==NULL) { error("Cannot open output file %s\n",output); for (int i=0;i<config.n_dics;i++) { free_Dictionary(config.dics[i]); } free(config.dics); return DEFAULT_ERROR_CODE; } } config.modified_input=NULL; char modified_input[FILENAME_MAX]=""; if (config.input_op!='D') { strcpy(modified_input,txt); strcat(modified_input,".tmp"); config.modified_input=u_fopen(&vec,modified_input,U_WRITE); if (config.modified_input==NULL) { error("Cannot open tmp file %s\n",modified_input); if (config.out!=U_STDOUT) { u_fclose(config.out); } for (int i=0;i<config.n_dics;i++) { free_Dictionary(config.dics[i]); } free(config.dics); return DEFAULT_ERROR_CODE; } } config.in=u_fopen(&vec,txt,U_READ); if (config.in==NULL) { error("Cannot open file %s\n",txt); u_fclose(config.modified_input); if (config.out!=U_STDOUT) { u_fclose(config.out); } for (int i=0;i<config.n_dics;i++) { free_Dictionary(config.dics[i]); } free(config.dics); return DEFAULT_ERROR_CODE; } /* We perform spellchecking */ spellcheck(&config); /* And we clean */ u_fclose(config.in); if (config.modified_input!=NULL) { /* If we used a tmp file because the input file has to be modified, * it's now time to actually modify it */ u_fclose(config.modified_input); af_remove(txt); af_rename(modified_input,txt); } if (config.out!=U_STDOUT) { u_fclose(config.out); } for (int i=0;i<config.n_dics;i++) { free_Dictionary(config.dics[i]); } free(config.dics); /* Finally, we update the dlf.n and err.n files if mode=='s' */ if (mode=='s') { get_snt_path(snt,output); strcat(output,"err.n"); U_FILE* f=u_fopen(&vec,output,U_WRITE); if (f!=NULL) { u_fprintf(f,"%d",config.n_input_lines); u_fclose(f); } if (config.input_op!='D') { get_snt_path(snt,output); strcat(output,"dlf.n"); U_FILE* fw=u_fopen(&vec,output,U_WRITE); if (fw!=NULL) { u_fprintf(fw,"%d",config.n_output_lines); u_fclose(fw); } } } return SUCCESS_RETURN_CODE; }
int main_PolyLex(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } int language=-1; char alphabet[FILENAME_MAX]=""; char name_bin[FILENAME_MAX]=""; char output[FILENAME_MAX]=""; char info[FILENAME_MAX]=""; VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_PolyLex,lopts_PolyLex,&index))) { switch(val) { case 'D': language=DUTCH; break; case 'G': language=GERMAN; break; case 'N': language=NORWEGIAN; break; case 'R': language=RUSSIAN; break; case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet file name\n"); return USAGE_ERROR_CODE; } strcpy(alphabet,options.vars()->optarg); break; case 'd': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty dictionary file name\n"); return USAGE_ERROR_CODE; } strcpy(name_bin,options.vars()->optarg); break; case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output file name\n"); return USAGE_ERROR_CODE; } strcpy(output,options.vars()->optarg); break; case 'i': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty information file name\n"); return USAGE_ERROR_CODE; } strcpy(info,options.vars()->optarg); break; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_PolyLex[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (name_bin[0]=='\0') { error("You must specify the .bin dictionary to use\n"); return USAGE_ERROR_CODE; } if (output[0]=='\0') { error("You must specify the output dictionary file name\n"); return USAGE_ERROR_CODE; } if (language==-1) { error("You must specify the language\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } Alphabet* alph=NULL; if (alphabet[0]!='\0') { u_printf("Loading alphabet...\n"); alph=load_alphabet(&vec,alphabet); if (alph==NULL) { error("Cannot load alphabet file %s\n",alphabet); return USAGE_ERROR_CODE; } } char name_inf[FILENAME_MAX]; struct string_hash* forbiddenWords=NULL; if (language==DUTCH || language==NORWEGIAN) { get_path(name_bin,name_inf); strcat(name_inf,"ForbiddenWords.txt"); forbiddenWords=load_key_list(&vec,name_inf); if (forbiddenWords==NULL) { /* If there was no file, we don't want to block the process */ forbiddenWords=new_string_hash(DONT_USE_VALUES); } } strcpy(name_inf,name_bin); name_inf[strlen(name_bin)-3]='\0'; strcat(name_inf,"inf"); Dictionary* d=new_Dictionary(&vec,name_bin,name_inf); if (d==NULL) { error("Cannot load dictionary %s\n",name_bin); free_string_hash(forbiddenWords); free_alphabet(alph); return DEFAULT_ERROR_CODE; } char tmp[FILENAME_MAX]; strcpy(tmp,argv[options.vars()->optind]); strcat(tmp,".tmp"); U_FILE* words=u_fopen(&vec,argv[options.vars()->optind],U_READ); if (words==NULL) { error("Cannot open word list file %s\n",argv[options.vars()->optind]); free_Dictionary(d); free_string_hash(forbiddenWords); free_alphabet(alph); // here we return 0 in order to do not block the preprocessing // in the Unitex/GramLab IDE interface, if no dictionary was applied // so that there is no "err" file return SUCCESS_RETURN_CODE; } U_FILE* new_unknown_words=u_fopen(&vec,tmp,U_WRITE); if (new_unknown_words==NULL) { error("Cannot open temporary word list file %s\n",tmp); u_fclose(words); free_Dictionary(d); free_string_hash(forbiddenWords); free_alphabet(alph); return DEFAULT_ERROR_CODE; } U_FILE* res=u_fopen(&vec,output,U_APPEND); if (res==NULL) { error("Cannot open result file %s\n",output); u_fclose(new_unknown_words); u_fclose(words); free_Dictionary(d); free_string_hash(forbiddenWords); free_alphabet(alph); u_fclose(words); return DEFAULT_ERROR_CODE; } U_FILE* debug=NULL; if ((*info)!='\0') { debug=u_fopen(&vec,info,U_WRITE); if (debug==NULL) { error("Cannot open debug file %s\n",info); } } struct utags UTAG; switch(language) { case DUTCH: analyse_dutch_unknown_words(alph, d, words, res, debug, new_unknown_words, forbiddenWords); break; case GERMAN: analyse_german_compounds(alph, d, words, res, debug, new_unknown_words); break; case NORWEGIAN: analyse_norwegian_unknown_words(alph, d, words, res, debug, new_unknown_words, forbiddenWords); break; case RUSSIAN: init_russian(&UTAG); analyse_compounds(alph, d, words, res, debug, new_unknown_words, UTAG); break; } free_alphabet(alph); free_Dictionary(d); u_fclose(words); u_fclose(new_unknown_words); free_string_hash(forbiddenWords); af_remove(argv[options.vars()->optind]); af_rename(tmp,argv[options.vars()->optind]); u_fclose(res); if (debug!=NULL) { u_fclose(debug); } return SUCCESS_RETURN_CODE; }
/** * The same than main, but no call to setBufferMode. */ int main_BuildKrMwuDic(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } int val,index=-1; char output[FILENAME_MAX]=""; char inflection_dir[FILENAME_MAX]=""; char alphabet[FILENAME_MAX]=""; char dic_bin[FILENAME_MAX]=""; char dic_inf[FILENAME_MAX]=""; // default policy is to compile only out of date graphs GraphRecompilationPolicy graph_recompilation_policy = ONLY_OUT_OF_DATE; VersatileEncodingConfig vec=VEC_DEFAULT; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_BuildKrMwuDic,lopts_BuildKrMwuDic,&index))) { switch(val) { case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output file name\n"); return USAGE_ERROR_CODE; } strcpy(output,options.vars()->optarg); break; case 'd': if (options.vars()->optarg[0]=='\0') { error("Empty inflection directory\n"); return USAGE_ERROR_CODE; } strcpy(inflection_dir,options.vars()->optarg); break; case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet file name\n"); return USAGE_ERROR_CODE; } strcpy(alphabet,options.vars()->optarg); break; case 'b': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty binary dictionary name\n"); return USAGE_ERROR_CODE; } strcpy(dic_bin,options.vars()->optarg); remove_extension(dic_bin,dic_inf); strcat(dic_inf,".inf"); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case 'f': graph_recompilation_policy = ALWAYS_RECOMPILE; break; case 'n': graph_recompilation_policy = NEVER_RECOMPILE; break; case 't': graph_recompilation_policy = ONLY_OUT_OF_DATE; break; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_BuildKrMwuDic[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (output[0]=='\0') { error("Output file must be specified\n"); return USAGE_ERROR_CODE; } if (inflection_dir[0]=='\0') { error("Inflection directory must be specified\n"); return USAGE_ERROR_CODE; } if (alphabet[0]=='\0') { error("Alphabet file must be specified\n"); return USAGE_ERROR_CODE; } if (dic_bin[0]=='\0') { error("Binary dictionary must be specified\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } U_FILE* delas=u_fopen(&vec,argv[options.vars()->optind],U_READ); if (delas==NULL) { error("Cannot open %s\n",argv[options.vars()->optind]); return DEFAULT_ERROR_CODE; } U_FILE* grf=u_fopen(&vec,output,U_WRITE); if (grf==NULL) { error("Cannot open %s\n",output); u_fclose(delas); return DEFAULT_ERROR_CODE; } Alphabet* alph=load_alphabet(&vec,alphabet,1); if (alph==NULL) { u_fclose(grf); u_fclose(delas); error("Cannot open alphabet file %s\n",alphabet); return DEFAULT_ERROR_CODE; } Korean* korean=new Korean(alph); MultiFlex_ctx* multiFlex_ctx=new_MultiFlex_ctx(inflection_dir, NULL, NULL, &vec, korean, NULL, NULL, graph_recompilation_policy); Dictionary* d=new_Dictionary(&vec,dic_bin,dic_inf); create_mwu_dictionary(delas,grf,multiFlex_ctx,d); free_Dictionary(d); u_fclose(delas); u_fclose(grf); free_alphabet(alph); delete korean; for (int count_free_fst2=0;count_free_fst2<multiFlex_ctx->n_fst2;count_free_fst2++) { free_abstract_Fst2(multiFlex_ctx->fst2[count_free_fst2],&(multiFlex_ctx->fst2_free[count_free_fst2])); multiFlex_ctx->fst2[count_free_fst2]=NULL; } free_MultiFlex_ctx(multiFlex_ctx); u_printf("Done.\n"); return SUCCESS_RETURN_CODE; }