Пример #1
0
/**
 * The same than main, but no call to setBufferMode.
 */
int main_BuildKrMwuDic(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return 0;
}


int val,index=-1;
char output[FILENAME_MAX]="";
char inflection_dir[FILENAME_MAX]="";
char alphabet[FILENAME_MAX]="";
char dic_bin[FILENAME_MAX]="";
char dic_inf[FILENAME_MAX]="";
Encoding encoding_output = DEFAULT_ENCODING_OUTPUT;
int bom_output = DEFAULT_BOM_OUTPUT;
int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT;
struct OptVars* vars=new_OptVars();
while (EOF!=(val=getopt_long_TS(argc,argv,optstring_BuildKrMwuDic,lopts_BuildKrMwuDic,&index,vars))) {
   switch(val) {
   case 'o': if (vars->optarg[0]=='\0') {
                fatal_error("You must specify a non empty output file name\n");
             }
             strcpy(output,vars->optarg);
             break;
   case 'd': if (vars->optarg[0]=='\0') {
                fatal_error("Empty inflection directory\n");
             }
             strcpy(inflection_dir,vars->optarg);
             break;
   case 'a': if (vars->optarg[0]=='\0') {
                fatal_error("You must specify a non empty alphabet file name\n");
             }
             strcpy(alphabet,vars->optarg);
             break;
   case 'b': if (vars->optarg[0]=='\0') {
                fatal_error("You must specify a non empty binary dictionary name\n");
             }
             strcpy(dic_bin,vars->optarg);
             remove_extension(dic_bin,dic_inf);
             strcat(dic_inf,".inf");
             break;
   case 'h': usage(); return 0;
   case ':': if (index==-1) fatal_error("Missing argument for option -%c\n",vars->optopt);
             else fatal_error("Missing argument for option --%s\n",lopts_BuildKrMwuDic[index].name);
   case '?': if (index==-1) fatal_error("Invalid option -%c\n",vars->optopt);
             else fatal_error("Invalid option --%s\n",vars->optarg);
             break;
   case 'k': if (vars->optarg[0]=='\0') {
                fatal_error("Empty input_encoding argument\n");
             }
             decode_reading_encoding_parameter(&mask_encoding_compatibility_input,vars->optarg);
             break;
   case 'q': if (vars->optarg[0]=='\0') {
                fatal_error("Empty output_encoding argument\n");
             }
             decode_writing_encoding_parameter(&encoding_output,&bom_output,vars->optarg);
             break;
   }
   index=-1;
}
if (vars->optind!=argc-1) {
   fatal_error("Invalid arguments: rerun with --help\n");
}
if (output[0]=='\0') {
   fatal_error("Output file must be specified\n");
}
if (inflection_dir[0]=='\0') {
   fatal_error("Inflection directory must be specified\n");
}
if (alphabet[0]=='\0') {
   fatal_error("Alphabet file must be specified\n");
}
if (dic_bin[0]=='\0') {
   fatal_error("Binary dictionary must be specified\n");
}

U_FILE* delas=u_fopen_existing_versatile_encoding(mask_encoding_compatibility_input,argv[vars->optind],U_READ);
if (delas==NULL) {
   fatal_error("Cannot open %s\n",argv[vars->optind]);
}
U_FILE* grf=u_fopen_existing_versatile_encoding(mask_encoding_compatibility_input,output,U_WRITE);
if (grf==NULL) {
   fatal_error("Cannot open %s\n",output);
}
Alphabet* alph=load_alphabet(alphabet,1);
if (alph==NULL) {
   fatal_error("Cannot open alphabet file %s\n",alphabet);
}
Korean* korean=new Korean(alph);
MultiFlex_ctx* multiFlex_ctx = (MultiFlex_ctx*)malloc(sizeof(MultiFlex_ctx));
if (multiFlex_ctx==NULL) {
   fatal_alloc_error("main_BuildKrMwuDic");
}
strcpy(multiFlex_ctx->inflection_directory,inflection_dir);
if (init_transducer_tree(multiFlex_ctx)) {
   fatal_error("init_transducer_tree error\n");
}
struct l_morpho_t* pL_MORPHO=init_langage_morph();
if (pL_MORPHO == NULL) {
   fatal_error("init_langage_morph error\n");
}

unsigned char* bin=load_BIN_file(dic_bin);
struct INF_codes* inf=load_INF_file(dic_inf);

create_mwu_dictionary(delas,grf,multiFlex_ctx,korean,pL_MORPHO,encoding_output,
       bom_output,mask_encoding_compatibility_input,bin,inf);

free(bin);
free_INF_codes(inf);
u_fclose(delas);
u_fclose(grf);
free_alphabet(alph);
delete korean;
free_transducer_tree(multiFlex_ctx);
for (int count_free_fst2=0;count_free_fst2<multiFlex_ctx->n_fst2;count_free_fst2++) {
    free_abstract_Fst2(multiFlex_ctx->fst2[count_free_fst2],&(multiFlex_ctx->fst2_free[count_free_fst2]));
    multiFlex_ctx->fst2[count_free_fst2]=NULL;
}
free_language_morpho(pL_MORPHO);
free(multiFlex_ctx);
free_OptVars(vars);
u_printf("Done.\n");
return 0;
}
Пример #2
0
/**
 * The same than main, but no call to setBufferMode.
 */
int main_BuildKrMwuDic(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

int val,index=-1;
char output[FILENAME_MAX]="";
char inflection_dir[FILENAME_MAX]="";
char alphabet[FILENAME_MAX]="";
char dic_bin[FILENAME_MAX]="";
char dic_inf[FILENAME_MAX]="";

// default policy is to compile only out of date graphs
GraphRecompilationPolicy graph_recompilation_policy = ONLY_OUT_OF_DATE;

VersatileEncodingConfig vec=VEC_DEFAULT;

bool only_verify_arguments = false;

UnitexGetOpt options;

while (EOF!=(val=options.parse_long(argc,argv,optstring_BuildKrMwuDic,lopts_BuildKrMwuDic,&index))) {
   switch(val) {
   case 'o': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty output file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(output,options.vars()->optarg);
             break;
   case 'd': if (options.vars()->optarg[0]=='\0') {
                error("Empty inflection directory\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(inflection_dir,options.vars()->optarg);
             break;
   case 'a': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty alphabet file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(alphabet,options.vars()->optarg);
             break;
   case 'b': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty binary dictionary name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(dic_bin,options.vars()->optarg);
             remove_extension(dic_bin,dic_inf);
             strcat(dic_inf,".inf");
             break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage(); 
             return SUCCESS_RETURN_CODE;
   case 'f': graph_recompilation_policy = ALWAYS_RECOMPILE; break;
   case 'n': graph_recompilation_policy = NEVER_RECOMPILE;  break;
   case 't': graph_recompilation_policy = ONLY_OUT_OF_DATE; break;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_BuildKrMwuDic[index].name);
             return USAGE_ERROR_CODE;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   }
   index=-1;
}
if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}
if (output[0]=='\0') {
   error("Output file must be specified\n");
   return USAGE_ERROR_CODE;
}
if (inflection_dir[0]=='\0') {
   error("Inflection directory must be specified\n");
   return USAGE_ERROR_CODE;
}
if (alphabet[0]=='\0') {
   error("Alphabet file must be specified\n");
   return USAGE_ERROR_CODE;
}
if (dic_bin[0]=='\0') {
   error("Binary dictionary must be specified\n");
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory 
  return SUCCESS_RETURN_CODE;
}

U_FILE* delas=u_fopen(&vec,argv[options.vars()->optind],U_READ);
if (delas==NULL) {
   error("Cannot open %s\n",argv[options.vars()->optind]);
   return DEFAULT_ERROR_CODE;
}

U_FILE* grf=u_fopen(&vec,output,U_WRITE);
if (grf==NULL) {
   error("Cannot open %s\n",output);
   u_fclose(delas);  
   return DEFAULT_ERROR_CODE;
}

Alphabet* alph=load_alphabet(&vec,alphabet,1);
if (alph==NULL) {
   u_fclose(grf);
   u_fclose(delas);
   error("Cannot open alphabet file %s\n",alphabet);
   return DEFAULT_ERROR_CODE;
}
Korean* korean=new Korean(alph);

MultiFlex_ctx* multiFlex_ctx=new_MultiFlex_ctx(inflection_dir,
                                               NULL,
                                               NULL,
                                               &vec,
                                               korean,
                                               NULL,
                                               NULL,
                                               graph_recompilation_policy);

Dictionary* d=new_Dictionary(&vec,dic_bin,dic_inf);

create_mwu_dictionary(delas,grf,multiFlex_ctx,d);

free_Dictionary(d);
u_fclose(delas);
u_fclose(grf);
free_alphabet(alph);
delete korean;
for (int count_free_fst2=0;count_free_fst2<multiFlex_ctx->n_fst2;count_free_fst2++) {
    free_abstract_Fst2(multiFlex_ctx->fst2[count_free_fst2],&(multiFlex_ctx->fst2_free[count_free_fst2]));
    multiFlex_ctx->fst2[count_free_fst2]=NULL;
}
free_MultiFlex_ctx(multiFlex_ctx);
u_printf("Done.\n");
return SUCCESS_RETURN_CODE;
}