コード例 #1
0
int main_SpellCheck(int argc,char* const argv[]) {
if (argc==1) {
    usage();
    return SUCCESS_RETURN_CODE;
}

VersatileEncodingConfig vec=VEC_DEFAULT;
int val,index=-1;
char mode=0;
char snt[FILENAME_MAX]="";
char txt[FILENAME_MAX]="";
char output[FILENAME_MAX]="";
char output_set=0;
char output_op='A';
SpellCheckConfig config;
config.max_errors=1;
config.max_SP_INSERT=1;
config.max_SP_SUPPR=1;
config.max_SP_SWAP=1;
config.max_SP_CHANGE=1;
for (int i=0;i<N_SPSubOp;i++) {
    config.score[i]=default_scores[i];
}
config.min_length1=4;
config.min_length2=6;
config.min_length3=12;
config.input_op='D';
config.keyboard=NULL;
config.allow_uppercase_initial=0;
char foo;
bool only_verify_arguments = false;
UnitexGetOpt options;
while (EOF!=(val=options.parse_long(argc,argv,optstring_SpellCheck,lopts_SpellCheck,&index))) {
   switch(val) {
   case 's': {
       strcpy(snt,options.vars()->optarg);
       mode='s';
       break;
   }
   case 'f': {
       strcpy(txt,options.vars()->optarg);
       mode='f';
       break;
   }
   case 'o': {
       if (options.vars()->optarg!=NULL) {
           strcpy(output,options.vars()->optarg);
       }
       output_set=1;
       break;
   }
   case 'I': {
       if (!strcmp(options.vars()->optarg,"D") || !strcmp(options.vars()->optarg,"M") || !strcmp(options.vars()->optarg,"U")) {
           config.input_op=options.vars()->optarg[0];
       } else {
       error("Invalid argument %s for option --input-op: should in [DMU]\n",options.vars()->optarg);
       return USAGE_ERROR_CODE;
       }
       break;
   }
   case 'O': {
       if (!strcmp(options.vars()->optarg,"O") || !strcmp(options.vars()->optarg,"A")) {
           output_op=options.vars()->optarg[0];
       } else {
           error("Invalid argument %s for option --output-op: should in [OA]\n",options.vars()->optarg);
       return USAGE_ERROR_CODE;
       }
       break;
   }
   case 1: {
       config.keyboard=get_Keyboard(options.vars()->optarg);
       if (config.keyboard==NULL) {
           error("Invalid argument %s for option --keyboard:\nUse --show-keyboards to see possible values\n",options.vars()->optarg);
       return USAGE_ERROR_CODE;
       }
       break;
   }
   case 2: {
       print_available_keyboards(U_STDOUT);
       return SUCCESS_RETURN_CODE;
   }
   case 10: {
       if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_errors,&foo)) {
           error("Invalid argument %s for --max-errors: should be an integer >=0\n",options.vars()->optarg);
       return USAGE_ERROR_CODE;
       }
       break;
   }
   case 11: {
       if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_INSERT,&foo)) {
           error("Invalid argument %s for --max-insert: should be an integer >=0\n",options.vars()->optarg);
       return USAGE_ERROR_CODE;
       }
       break;
   }
   case 12: {
       if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_SUPPR,&foo)) {
           error("Invalid argument %s for --max-suppr: should be an integer >=0\n",options.vars()->optarg);
       return USAGE_ERROR_CODE;
       }
       break;
   }
   case 13: {
       if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_CHANGE,&foo)) {
           error("Invalid argument %s for --max-change: should be an integer >=0\n",options.vars()->optarg);
       return USAGE_ERROR_CODE;
       }
       break;
   }
   case 14: {
       if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_SWAP,&foo)) {
           error("Invalid argument %s for --max-swap: should be an integer >=0\n",options.vars()->optarg);
       return USAGE_ERROR_CODE;
       }
       break;
   }
   case 20: {
       int* scores=config.score;
       if (N_SPSubOp!=sscanf(options.vars()->optarg,"%d,%d,%d,%d,%d,%d,%d,%d,%d%c",
            scores,scores+1,scores+2,scores+3,scores+4,scores+5,
            scores+6,scores+7,scores+8,&foo)) {
            error("Invalid argument %s for option --scores. See --help-scores\n",options.vars()->optarg);
        return USAGE_ERROR_CODE;
       }
       break;
   }
   case 21: {
       usage_scores();
       return SUCCESS_RETURN_CODE;
   }
   case 22: {
       if (3!=sscanf(options.vars()->optarg,"%u,%u,%u%c",
            &config.min_length1,&config.min_length2,&config.min_length3,&foo)) {
            error("Invalid argument %s for option --min-lengths\n",options.vars()->optarg);
        return USAGE_ERROR_CODE;
       }
       break;
   }
   case 23: {
       if (!strcmp(options.vars()->optarg,"yes")) {
           config.allow_uppercase_initial=1;
       } else if (!strcmp(options.vars()->optarg,"no")) {
           config.allow_uppercase_initial=0;
       } else {
           error("Invalid argument %s for option --upper-initial\n",options.vars()->optarg);
       return USAGE_ERROR_CODE;
       }
       break;
   }
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage();
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_SpellCheck[index].name);
             return USAGE_ERROR_CODE;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
   }
   index=-1;
}

if (options.vars()->optind==argc) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}

if (mode==0) {
  error("You must use either --snt or --file\n");
  return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

config.n_dics=argc-options.vars()->optind;
config.dics=(Dictionary**)malloc(config.n_dics*sizeof(Dictionary*));
if (config.dics==NULL) {
    alloc_error("main_SpellCheck");
  return ALLOC_ERROR_CODE;
}

for (int i=0;i<config.n_dics;i++) {
    config.dics[i]=new_Dictionary(&vec,argv[i+options.vars()->optind]);
    if (config.dics[i]==NULL) {
        error("Cannot load dictionary %s\n",argv[i+options.vars()->optind]);
    }
}

config.out=U_STDOUT;
config.n_input_lines=0;
config.n_output_lines=0;

if (mode=='s') {
    /* When working with a .snt, we actually want to work on its err file */
    get_snt_path(snt,txt);
    strcat(txt,"err");
    /* the output must be dlf, and we note the number of lines in the existing
     * dlf file, if any */
    get_snt_path(snt,output);
    strcat(output,"dlf.n");
    U_FILE* f=u_fopen(&vec,output,U_READ);
    if (f!=NULL) {
        u_fscanf(f,"%d",&(config.n_output_lines));
        u_fclose(f);
    }
    get_snt_path(snt,output);
    strcat(output,"dlf");
    output_set=1;
    /* and we force the values for -I and -O */
    config.input_op='U';
    output_op='A';
} else {
    /* If mode=='f', we don't have anything to do since we already
     * defined the default output to stdout */
}

if (output_set) {
    if (output_op=='O') {
        config.out=u_fopen(&vec,output,U_WRITE);
    } else {
        config.out=u_fopen(&vec,output,U_APPEND);
    }
    if (config.out==NULL) {
        error("Cannot open output file %s\n",output);
    for (int i=0;i<config.n_dics;i++) {
      free_Dictionary(config.dics[i]);
    }
    free(config.dics);
    return DEFAULT_ERROR_CODE;
    }
}

config.modified_input=NULL;
char modified_input[FILENAME_MAX]="";
if (config.input_op!='D') {
    strcpy(modified_input,txt);
    strcat(modified_input,".tmp");
    config.modified_input=u_fopen(&vec,modified_input,U_WRITE);
    if (config.modified_input==NULL) {
        error("Cannot open tmp file %s\n",modified_input);
    if (config.out!=U_STDOUT) {
      u_fclose(config.out);
    }
    for (int i=0;i<config.n_dics;i++) {
      free_Dictionary(config.dics[i]);
    }
    free(config.dics);
    return DEFAULT_ERROR_CODE;
    }
}

config.in=u_fopen(&vec,txt,U_READ);
if (config.in==NULL) {
    error("Cannot open file %s\n",txt);
  u_fclose(config.modified_input);
  if (config.out!=U_STDOUT) {
    u_fclose(config.out);
  }
  for (int i=0;i<config.n_dics;i++) {
    free_Dictionary(config.dics[i]);
  }
  free(config.dics);
  return DEFAULT_ERROR_CODE;
}

/* We perform spellchecking */
spellcheck(&config);

/* And we clean */

u_fclose(config.in);

if (config.modified_input!=NULL) {
  /* If we used a tmp file because the input file has to be modified,
   * it's now time to actually modify it */
  u_fclose(config.modified_input);
  af_remove(txt);
  af_rename(modified_input,txt);
}

if (config.out!=U_STDOUT) {
  u_fclose(config.out);
}

for (int i=0;i<config.n_dics;i++) {
  free_Dictionary(config.dics[i]);
}
free(config.dics);

/* Finally, we update the dlf.n and err.n files if mode=='s' */
if (mode=='s') {
    get_snt_path(snt,output);
    strcat(output,"err.n");
    U_FILE* f=u_fopen(&vec,output,U_WRITE);
    if (f!=NULL) {
        u_fprintf(f,"%d",config.n_input_lines);
        u_fclose(f);
    }
    if (config.input_op!='D') {
        get_snt_path(snt,output);
        strcat(output,"dlf.n");
        U_FILE* fw=u_fopen(&vec,output,U_WRITE);
        if (fw!=NULL) {
            u_fprintf(fw,"%d",config.n_output_lines);
            u_fclose(fw);
        }
    }
}

return SUCCESS_RETURN_CODE;
}
コード例 #2
0
ファイル: Evamb.cpp プロジェクト: anukat2015/unitex-core
int main_Evamb(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

int val,index=-1;
int sentence_number=-1;
const char* outfilename=NULL;
char output_name_buffer[FILENAME_MAX]="";
VersatileEncodingConfig vec=VEC_DEFAULT;
bool only_verify_arguments = false;
UnitexGetOpt options;

while (EOF!=(val=options.parse_long(argc,argv,optstring_Evamb,lopts_Evamb,&index))) {
   switch(val) {
     case 's':   {  char c_foo;
                    if (1!=sscanf(options.vars()->optarg,"%d%c",&sentence_number,&c_foo) || sentence_number<=0) {
                      /* foo is used to check that the sentence number is not like "45gjh" */
                      error("Invalid sentence number: %s\n",options.vars()->optarg);
                      return USAGE_ERROR_CODE;
                    }
                  }
               break;
     case 'o': if (options.vars()->optarg[0]=='\0') {
                  error("You must specify a non empty output file name\n");
                  return USAGE_ERROR_CODE;
               }
               strcpy(output_name_buffer,options.vars()->optarg);
               outfilename=output_name_buffer;
               break;
     case 'k': if (options.vars()->optarg[0]=='\0') {
                 error("Empty input_encoding argument\n");
                 return USAGE_ERROR_CODE;
               }
               decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
               break;
     case 'q': if (options.vars()->optarg[0]=='\0') {
                 error("Empty output_encoding argument\n");
                 return USAGE_ERROR_CODE;
               }
               decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
               break;
     case 'V': only_verify_arguments = true;
               break;                
     case 'h': usage(); 
               return SUCCESS_RETURN_CODE;
     case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                           error("Missing argument for option --%s\n",lopts_Evamb[index].name);
               return USAGE_ERROR_CODE;                         
     case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                           error("Invalid option --%s\n",options.vars()->optarg);
               return USAGE_ERROR_CODE;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

u_printf("Loading '%s'...\n",argv[options.vars()->optind]);
Tfst* tfst=open_text_automaton(&vec,argv[options.vars()->optind]);
if (tfst==NULL) {
   error("Unable to load '%s'\n",argv[options.vars()->optind]);
   return DEFAULT_ERROR_CODE;
}
if (sentence_number>tfst->N) {
   error("Invalid sentence number %d: should be in [1;%d]\n",sentence_number,tfst->N);
   close_text_automaton(tfst);
   return DEFAULT_ERROR_CODE;
}
U_FILE* outfile = (outfilename == NULL) ? U_STDOUT : u_fopen(&vec,outfilename,U_WRITE);
if (outfile==NULL) {
    error("Cannot create file %s\n",outfilename);
    close_text_automaton(tfst);
    return DEFAULT_ERROR_CODE;
}
if (sentence_number==-1) {
   /* If we have to evaluate the ambiguity rate of the whole automaton */
   double lognp_total=0.0;
   double lmoy_total=0.0;
   double maxlogamb=0.0;
   double minlogamb=(double)INT_MAX;
   /* This is the number of bad automata in the text .fst2 */
   int n_bad_automata=0;
   int maxambno=-1;
   int minambno=-1;
   for (sentence_number=1;sentence_number<=tfst->N;sentence_number++) {
      load_sentence(tfst,sentence_number);
      SingleGraph graph=tfst->automaton;
      if (graph->number_of_states==0 || graph->states[0]->outgoing_transitions==NULL) {
         n_bad_automata++;
         error("Sentence %d: empty automaton\n",sentence_number);
      } else {
         /* log(number of paths) */
         double lognp;
         /* minimum/maximum path length */
         int lmin,lmax;
         /* Approximation of the sentence length */
         double lmoy;
         /* log(ambiguity rate) */
         double logamb;
         lognp=evaluate_ambiguity(graph,&lmin,&lmax);
         lmoy=(double)(lmin+lmax)/2.0;
         logamb=lognp/lmoy;
         if (maxlogamb<logamb) {
            maxlogamb=logamb;
            maxambno=sentence_number;
         }
         if (minlogamb>logamb) {
            minlogamb=logamb;
            minambno=sentence_number;
         }
         u_printf("Sentence %d            \r",sentence_number);
         lognp_total=lognp_total+lognp;
         lmoy_total=lmoy_total+lmoy;
      }
   }
   if (n_bad_automata>=tfst->N) {
      error("No stats to print because no non-empty sentence automata were found.\n");
   } else {
      u_fprintf(outfile,"%d/%d sentence%s taken into account\n",tfst->N-n_bad_automata,tfst->N,(tfst->N>1)?"s":"");
      u_fprintf(outfile,"Average ambiguity rate=%.3f\n",exp(lognp_total/lmoy_total));
      u_fprintf(outfile,"Minimum ambiguity rate=%.3f (sentence %d)\n",exp(minlogamb),minambno);
      u_fprintf(outfile,"Maximum ambiguity rate=%.3f (sentence %d)\n",exp(maxlogamb),maxambno);
   }
} else {
   /* If we have to evaluate the ambiguity rate of a single sentence automaton */
   load_sentence(tfst,sentence_number);
   SingleGraph graph=tfst->automaton;
   if (graph->number_of_states==0) {
      error("Sentence %d: empty automaton\n",sentence_number);
   } else {
      int min;
      int max;
      double lognp=evaluate_ambiguity(graph,&min,&max);
      double lmoy=(double)(min+max)/2.0;
      u_fprintf(outfile,"Sentence %d: ambiguity rate=%.3f\n",sentence_number,exp(lognp/lmoy));
   }
}

if (outfile!=U_STDOUT) {
  u_fclose(outfile);
}

close_text_automaton(tfst);
return SUCCESS_RETURN_CODE;
}
コード例 #3
0
ファイル: Fst2Txt.cpp プロジェクト: anukat2015/unitex-core
int main_Fst2Txt(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

struct fst2txt_parameters* p=new_fst2txt_parameters();
char in_offsets[FILENAME_MAX]="";
char out_offsets[FILENAME_MAX]="";
int val,index=-1;
bool only_verify_arguments = false;
UnitexGetOpt options;

while (EOF!=(val=options.parse_long(argc,argv,optstring_Fst2Txt,lopts_Fst2Txt,&index))) {
   switch(val) {
   case 't': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty text file name\n");
                free_fst2txt_parameters(p);
                return USAGE_ERROR_CODE;
             }
             p->input_text_file=strdup(options.vars()->optarg);
             if (p->input_text_file==NULL) {
                alloc_error("main_Fst2Txt");
                free_fst2txt_parameters(p);
                return ALLOC_ERROR_CODE;
             }
             break;
   case 'o': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty text output file name\n");
                free_fst2txt_parameters(p);
                return USAGE_ERROR_CODE;
             }
             p->output_text_file=strdup(options.vars()->optarg);
			 p->output_text_file_is_temp=0;
             if (p->output_text_file==NULL) {
                alloc_error("main_Fst2Txt");
                free_fst2txt_parameters(p);
                return ALLOC_ERROR_CODE;
             }
             break;
   case 'a': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty alphabet file name\n");
                free_fst2txt_parameters(p);
                return USAGE_ERROR_CODE;                
             }
             p->alphabet_file=strdup(options.vars()->optarg);
             if (p->alphabet_file==NULL) {
               alloc_error("main_Fst2Txt");
               free_fst2txt_parameters(p);
               return ALLOC_ERROR_CODE;               
             }
             break;
   case 'M': p->output_policy=MERGE_OUTPUTS; break;
   case 'R': p->output_policy=REPLACE_OUTPUTS; break;
   case 'c': p->tokenization_policy=CHAR_BY_CHAR_TOKENIZATION; break;
   case 'w': p->tokenization_policy=WORD_BY_WORD_TOKENIZATION; break;
   case 's': p->space_policy=START_WITH_SPACE; break;
   case 'x': p->space_policy=DONT_START_WITH_SPACE; break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage();
             free_fst2txt_parameters(p);
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_Fst2Txt[index].name);
             free_fst2txt_parameters(p);
             return USAGE_ERROR_CODE; 
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                free_fst2txt_parameters(p);
                return USAGE_ERROR_CODE;                 
             }
             decode_reading_encoding_parameter(&(p->vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                free_fst2txt_parameters(p);
                return USAGE_ERROR_CODE; 
             }
             decode_writing_encoding_parameter(&(p->vec.encoding_output),&(p->vec.bom_output),options.vars()->optarg);
             break;
   case '$': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_offsets argument\n");
                free_fst2txt_parameters(p);
                return USAGE_ERROR_CODE; 
             }
             strcpy(in_offsets,options.vars()->optarg);
             break;
   case '@': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_offsets argument\n");
                free_fst2txt_parameters(p);
                return USAGE_ERROR_CODE; 
             }
             strcpy(out_offsets,options.vars()->optarg);
             break;
   case 'l': p->convLFtoCRLF=0; break;
   case 'r': p->keepCR = 1; break;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             free_fst2txt_parameters(p);
             return USAGE_ERROR_CODE;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   free_fst2txt_parameters(p);
   return USAGE_ERROR_CODE;
}

if (p->input_text_file==NULL) {
   error("You must specify the text file\n");
   free_fst2txt_parameters(p);
   return USAGE_ERROR_CODE;   
}

if (only_verify_arguments) {
  // freeing all allocated memory
  free_fst2txt_parameters(p);
  return SUCCESS_RETURN_CODE;
}

if (out_offsets[0]!='\0') {
	/* We deal with offsets only if the program is expected to produce some */
	if (in_offsets[0]!='\0') {
		p->v_in_offsets=load_offsets(&(p->vec),in_offsets);
		if (p->v_in_offsets==NULL) {
			error("Cannot load offset file %s\n",in_offsets);
      free_fst2txt_parameters(p);
      return DEFAULT_ERROR_CODE;      
		}
	} else {
		/* If there is no input offset file, we create an empty offset vector
		 * in order to avoid testing whether the vector is NULL or not */
		p->v_in_offsets=new_vector_offset(1);
	}
	p->f_out_offsets=u_fopen(&(p->vec),out_offsets,U_WRITE);
	if (p->f_out_offsets==NULL) {
		error("Cannot create file %s\n",out_offsets);
    free_fst2txt_parameters(p);
    return DEFAULT_ERROR_CODE;     
	}
}

if (p->output_text_file == NULL) {
	char tmp[FILENAME_MAX];
	remove_extension(p->input_text_file, tmp);
	strcat(tmp, ".tmp");
	p->output_text_file_is_temp=1;
	p->output_text_file = strdup(tmp);
	if (p->output_text_file == NULL) {
		alloc_error("main_Fst2Txt");
		free_fst2txt_parameters(p);
		return ALLOC_ERROR_CODE;
	}
}
p->fst_file=strdup(argv[options.vars()->optind]);
if (p->fst_file==NULL) {
   alloc_error("main_Fst2Txt");
   free_fst2txt_parameters(p);
   return ALLOC_ERROR_CODE;   
}

int result=main_fst2txt(p);

free_fst2txt_parameters(p);
return result;
}
コード例 #4
0
InstallLogger::InstallLogger(int argc,char* const argv[]) :
  ule(ule_default_init), init_done(0) {
  ClearUniLoggerSpaceStruct(0);
  if (argc==1) {
    usage();
    return;
  }

  Encoding encoding_output = DEFAULT_ENCODING_OUTPUT;
  int bom_output = DEFAULT_BOM_OUTPUT;
  int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT;
  int val,index=-1;
  bool only_verify_arguments = false;
  UnitexGetOpt options;
  while (EOF!=(val=options.parse_long(argc,argv,optstring_CreateLog,lopts_CreateLog,&index))) {
     switch(val) {
       case 'V': only_verify_arguments = true;
                 break;
       case 'h': usage();
                 return;
       case 'n': ule.store_file_in_content = 0;
                 break;
       case 'i': ule.store_file_in_content = 1;
                 break;
       case 'o': ule.store_file_out_content = 1;
                 break;
       case 'u': ule.store_file_out_content = 0;
                 break;
       case 's': ule.store_list_file_in_content = 1;
                 break;
       case 't': ule.store_list_file_in_content = 0;
                 break;
       case 'r': ule.store_list_file_out_content = 1;
                 break;
       case 'f': ule.store_list_file_out_content = 0;
                 break;
       case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                             error("Missing argument for option --%s\n",lopts_CreateLog[index].name);
                 return;            
       case 'k': if (options.vars()->optarg[0]=='\0') {
                    error("Empty input_encoding argument\n");
                    return;
                 }
                 decode_reading_encoding_parameter(&mask_encoding_compatibility_input,options.vars()->optarg);
                 break;
       case 'q': if (options.vars()->optarg[0]=='\0') {
                    error("Empty output_encoding argument\n");
                    return;
                 }
                 decode_writing_encoding_parameter(&encoding_output,&bom_output,options.vars()->optarg);
                 break;
       case 'g': ClearUniLoggerSpaceStruct(1);
                 return;
       case 'p': if (options.vars()->optarg[0]=='\0') {
                    error("You must specify a non empty param file\n");
                    return;
                 }
                 ClearUniLoggerSpaceStruct(1);
                 LoadParamFile(options.vars()->optarg);
                 return;
       case 'l': if (options.vars()->optarg[0]=='\0') {
                    error("You must specify a non empty log filename\n");
                    return;
                 }
                 if (ule.szNameLog != NULL) {
                     free((void*)ule.szNameLog);
                 }
                 ule.szNameLog = strdup(options.vars()->optarg);
                 break;
     
       case 'd': if (options.vars()->optarg[0]=='\0') {
                    error("You must specify a non empty directory\n");
                    return;
                 }
                 if (ule.szPathLog != NULL) {
                     free((void*)ule.szPathLog);
                 }    
                 ule.szPathLog = strdup(options.vars()->optarg);
                 break;

       case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                             error("Invalid option --%s\n",options.vars()->optarg);
                 return;
     }
     index=-1;
  }

  if (options.vars()->optind!=argc-1) {
  }

  if (only_verify_arguments) {
    // freeing all allocated memory
    return;
  }

  if (AddActivityLogger(&ule) != 0) {
    init_done = 1;
  } else {
    ClearUniLoggerSpaceStruct(1);
  }
}
コード例 #5
0
int main_Normalize(int argc,char* const argv[]) {
if (argc==1) {
  usage();
  return SUCCESS_RETURN_CODE;
}
int mode=KEEP_CARRIAGE_RETURN;
int separator_normalization=1;
char rules[FILENAME_MAX]="";
char input_offsets[FILENAME_MAX]="";
char output_offsets[FILENAME_MAX]="";
VersatileEncodingConfig vec=VEC_DEFAULT;
int convLFtoCRLF=1;
int val,index=-1;
bool only_verify_arguments = false;
UnitexGetOpt options;
while (EOF!=(val=options.parse_long(argc,argv,optstring_Normalize,lopts_Normalize,&index))) {
   switch(val) {
   case 'l': convLFtoCRLF=0; break;
   case 'n': mode=REMOVE_CARRIAGE_RETURN; break;
   case 'r': if (options.vars()->optarg[0]=='\0') {
              error("You must specify a non empty replacement rule file name\n");
              return USAGE_ERROR_CODE;
             }
             strcpy(rules,options.vars()->optarg);
             break;
   case 1: separator_normalization=0; break;
   case 'k': if (options.vars()->optarg[0]=='\0') {
              error("Empty input_encoding argument\n");
              return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
              error("Empty output_encoding argument\n");
              return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case '$': if (options.vars()->optarg[0]=='\0') {
              error("You must specify a non empty input offset file name\n");
              return USAGE_ERROR_CODE;
             }
             strcpy(input_offsets,options.vars()->optarg);
             break;
   case '@': if (options.vars()->optarg[0]=='\0') {
              error("You must specify a non empty output offset file name\n");
              return USAGE_ERROR_CODE;
             }
             strcpy(output_offsets,options.vars()->optarg);
             break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage();
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_Normalize[index].name);
             return USAGE_ERROR_CODE;
             break;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
             break;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
  error("Invalid arguments: rerun with --help\n");
  return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

vector_offset* v_input_offsets=NULL;
vector_offset* v_output_offsets=NULL;
U_FILE* f_output_offsets=NULL;

if (output_offsets[0]!='\0') {
  /* We deal with offsets only if we have to produce output offsets */
  if (input_offsets[0]!='\0') {
    v_input_offsets=load_offsets(&vec,input_offsets);
  }
  f_output_offsets=u_fopen(&vec, output_offsets, U_WRITE);
  if (f_output_offsets==NULL) {
    error("Cannot create offset file %s\n",output_offsets);
    return DEFAULT_ERROR_CODE;
  }
  v_output_offsets=new_vector_offset();
}
char tmp_file[FILENAME_MAX];
get_extension(argv[options.vars()->optind],tmp_file);
if (!strcmp(tmp_file, ".snt")) {
   /* If the file to process has already the .snt extension, we temporary rename it to
   * .snt.normalizing */
  strcpy(tmp_file,argv[options.vars()->optind]);
  strcat(tmp_file,".normalizing");
  af_rename(argv[options.vars()->optind],tmp_file);
} else {
   strcpy(tmp_file,argv[options.vars()->optind]);
}
/* We set the destination file */
char dest_file[FILENAME_MAX];
remove_extension(argv[options.vars()->optind],dest_file);
strcat(dest_file,".snt");
u_printf("Normalizing %s...\n",argv[options.vars()->optind]);

int return_value = normalize(tmp_file,
                             dest_file,
                             &vec,
                             mode,
                             convLFtoCRLF,
                             rules,
                             v_output_offsets,
                             separator_normalization);
u_printf("\n");
/* If we have used a temporary file, we delete it */
if (strcmp(tmp_file,argv[options.vars()->optind])) {
   af_remove(tmp_file);
}
process_offsets(v_input_offsets,v_output_offsets,f_output_offsets);
u_fclose(f_output_offsets);
free_vector_offset(v_input_offsets);
free_vector_offset(v_output_offsets);
u_printf((return_value==SUCCESS_RETURN_CODE) ? "Done.\n" : "Unsuccessfull.\n");

return return_value;
}
コード例 #6
0
int main_MultiFlex(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

char output[FILENAME_MAX]="";
char config_dir[FILENAME_MAX]="";
char alphabet[FILENAME_MAX]="";
char pkgdir[FILENAME_MAX]="";
char* named=NULL;
int is_korean=0;
// default policy is to compile only out of date graphs
GraphRecompilationPolicy graph_recompilation_policy = ONLY_OUT_OF_DATE;
//Current language's alphabet
int error_check_status=SIMPLE_AND_COMPOUND_WORDS;
VersatileEncodingConfig vec=VEC_DEFAULT;
int val,index=-1;
bool only_verify_arguments = false;
UnitexGetOpt options;
while (EOF!=(val=options.parse_long(argc,argv,optstring_MultiFlex,lopts_MultiFlex,&index))) {
   switch(val) {
   case 'o': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty DELAF file name\n");
                free(named);
                return USAGE_ERROR_CODE;
             }
             strcpy(output,options.vars()->optarg);
             break;
   case 'a': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty alphabet file name\n");
                free(named);
                return USAGE_ERROR_CODE;
             }
             strcpy(alphabet,options.vars()->optarg);
             break;
   case 'd': strcpy(config_dir,options.vars()->optarg); break;
   case 'K': is_korean=1;
             break;
   case 's': error_check_status=ONLY_SIMPLE_WORDS; break;
   case 'c': error_check_status=ONLY_COMPOUND_WORDS; break;
   case 'f': graph_recompilation_policy = ALWAYS_RECOMPILE; break;
   case 'n': graph_recompilation_policy = NEVER_RECOMPILE;  break;
   case 't': graph_recompilation_policy = ONLY_OUT_OF_DATE; break;
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                free(named);
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                free(named);
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case 'p': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty package directory name\n");
                free(named);
                return USAGE_ERROR_CODE;
             }
             strcpy(pkgdir,options.vars()->optarg);
             break;
   case 'r': if (named==NULL) {
                  named=strdup(options.vars()->optarg);
                  if (named==NULL) {
                     alloc_error("main_Grf2Fst2");
                     return ALLOC_ERROR_CODE;
                  }
             } else {
                   char* more_names = (char*)realloc((void*)named,strlen(named)+strlen(options.vars()->optarg)+2);
                 if (more_names) {
                  named = more_names;
                 } else {
                  alloc_error("main_MultiFlex");
                  free(named);
                  return ALLOC_ERROR_CODE;
                 }
                 strcat(named,";");
                 strcat(named,options.vars()->optarg);
             }
             break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage();
             free(named);
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_MultiFlex[index].name);
             free(named);
             return USAGE_ERROR_CODE;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             free(named);
             return USAGE_ERROR_CODE;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   free(named);
   return USAGE_ERROR_CODE;
}

if (output[0]=='\0') {
   error("You must specify the output DELAF name\n");
   free(named);
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  free(named);
  return SUCCESS_RETURN_CODE;
}

//Load morphology description
char morphology[FILENAME_MAX];
new_file(config_dir,"Morphology.txt",morphology);
//int config_files_status=CONFIG_FILES_OK;
Alphabet* alph=NULL;
if (alphabet[0]!='\0') {
   //Load alphabet
   alph=load_alphabet(&vec,alphabet,1);  //To be done once at the beginning of the inflection
   if (alph==NULL) {
      error("Cannot open alphabet file %s\n",alphabet);
      free(named);
      return DEFAULT_ERROR_CODE;
   }
}
//Init equivalence files
char equivalences[FILENAME_MAX];
new_file(config_dir,"Equivalences.txt",equivalences);

/* Korean */
Korean* korean=NULL;
if (is_korean) {
   if (alph==NULL) {
      error("Cannot initialize Korean data with a NULL alphabet\n");
      free(named);
      return DEFAULT_ERROR_CODE;
   }
    korean=new Korean(alph);
}
MultiFlex_ctx* p_multiFlex_ctx=new_MultiFlex_ctx(config_dir,
                                                 morphology,
                                                 equivalences,
                                                 &vec,
                                                 korean,
                                                 pkgdir,
                                                 named,
                                                 graph_recompilation_policy);

//DELAC inflection
int return_value = inflect(argv[options.vars()->optind],output,p_multiFlex_ctx,alph,error_check_status);

free(named);

for (int count_free_fst2=0;count_free_fst2<p_multiFlex_ctx->n_fst2;count_free_fst2++) {
    free_abstract_Fst2(p_multiFlex_ctx->fst2[count_free_fst2],&(p_multiFlex_ctx->fst2_free[count_free_fst2]));
    p_multiFlex_ctx->fst2[count_free_fst2] = NULL;
}

free_alphabet(alph);

free_MultiFlex_ctx(p_multiFlex_ctx);

if (korean!=NULL) {
    delete korean;
}

u_printf("Done.\n");
return return_value;
}
コード例 #7
0
ファイル: PolyLex.cpp プロジェクト: UnitexGramLab/unitex-core
int main_PolyLex(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

int language=-1;
char alphabet[FILENAME_MAX]="";
char name_bin[FILENAME_MAX]="";
char output[FILENAME_MAX]="";
char info[FILENAME_MAX]="";
VersatileEncodingConfig vec=VEC_DEFAULT;
int val,index=-1;
bool only_verify_arguments = false;
UnitexGetOpt options;
while (EOF!=(val=options.parse_long(argc,argv,optstring_PolyLex,lopts_PolyLex,&index))) {
   switch(val) {
   case 'D': language=DUTCH; break;
   case 'G': language=GERMAN; break;
   case 'N': language=NORWEGIAN; break;
   case 'R': language=RUSSIAN; break;
   case 'a': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty alphabet file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(alphabet,options.vars()->optarg);
             break;
   case 'd': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty dictionary file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(name_bin,options.vars()->optarg);
             break;
   case 'o': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty output file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(output,options.vars()->optarg);
             break;
   case 'i': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty information file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(info,options.vars()->optarg);
             break;
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage();
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_PolyLex[index].name);
             return USAGE_ERROR_CODE;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}

if (name_bin[0]=='\0') {
   error("You must specify the .bin dictionary to use\n");
   return USAGE_ERROR_CODE;
}

if (output[0]=='\0') {
   error("You must specify the output dictionary file name\n");
   return USAGE_ERROR_CODE;
}

if (language==-1) {
   error("You must specify the language\n");
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

Alphabet* alph=NULL;
if (alphabet[0]!='\0') {
   u_printf("Loading alphabet...\n");
   alph=load_alphabet(&vec,alphabet);
   if (alph==NULL) {
      error("Cannot load alphabet file %s\n",alphabet);
      return USAGE_ERROR_CODE;
   }
}

char name_inf[FILENAME_MAX];
struct string_hash* forbiddenWords=NULL;
if (language==DUTCH || language==NORWEGIAN) {
   get_path(name_bin,name_inf);
   strcat(name_inf,"ForbiddenWords.txt");
   forbiddenWords=load_key_list(&vec,name_inf);
   if (forbiddenWords==NULL) {
       /* If there was no file, we don't want to block the process */
       forbiddenWords=new_string_hash(DONT_USE_VALUES);
   }
}

strcpy(name_inf,name_bin);
name_inf[strlen(name_bin)-3]='\0';
strcat(name_inf,"inf");
Dictionary* d=new_Dictionary(&vec,name_bin,name_inf);
if (d==NULL) {
    error("Cannot load dictionary %s\n",name_bin);
    free_string_hash(forbiddenWords);
    free_alphabet(alph);
    return DEFAULT_ERROR_CODE;
}

char tmp[FILENAME_MAX];
strcpy(tmp,argv[options.vars()->optind]);
strcat(tmp,".tmp");

U_FILE* words=u_fopen(&vec,argv[options.vars()->optind],U_READ);
if (words==NULL) {
   error("Cannot open word list file %s\n",argv[options.vars()->optind]);
   free_Dictionary(d);
   free_string_hash(forbiddenWords);
   free_alphabet(alph);
   // here we return 0 in order to do not block the preprocessing
   // in the Unitex/GramLab IDE interface, if no dictionary was applied
   // so that there is no "err" file
   return SUCCESS_RETURN_CODE;
}

U_FILE* new_unknown_words=u_fopen(&vec,tmp,U_WRITE);
if (new_unknown_words==NULL) {
   error("Cannot open temporary word list file %s\n",tmp);
   u_fclose(words);
   free_Dictionary(d);
   free_string_hash(forbiddenWords);
   free_alphabet(alph);
   return DEFAULT_ERROR_CODE;
}

U_FILE* res=u_fopen(&vec,output,U_APPEND);
if (res==NULL) {
   error("Cannot open result file %s\n",output);
   u_fclose(new_unknown_words);
   u_fclose(words);
   free_Dictionary(d);
   free_string_hash(forbiddenWords);
   free_alphabet(alph);
   u_fclose(words);
   return DEFAULT_ERROR_CODE;
}

U_FILE* debug=NULL;
if ((*info)!='\0') {
   debug=u_fopen(&vec,info,U_WRITE);
   if (debug==NULL) {
      error("Cannot open debug file %s\n",info);
   }
}
struct utags UTAG;

switch(language) {
  case DUTCH:
    analyse_dutch_unknown_words(alph,
                                d,
                                words,
                                res,
                                debug,
                                new_unknown_words,
                                forbiddenWords);
    break;
  case GERMAN:
    analyse_german_compounds(alph,
                             d,
                             words,
                             res,
                             debug,
                             new_unknown_words);
    break;
  case NORWEGIAN:
    analyse_norwegian_unknown_words(alph,
                                    d,
                                    words,
                                    res,
                                    debug,
                                    new_unknown_words,
                                    forbiddenWords);
    break;
  case RUSSIAN:
     init_russian(&UTAG);
     analyse_compounds(alph,
                       d,
                       words,
                       res,
                       debug,
                       new_unknown_words,
                       UTAG);
     break;
}

free_alphabet(alph);
free_Dictionary(d);
u_fclose(words);
u_fclose(new_unknown_words);
free_string_hash(forbiddenWords);
af_remove(argv[options.vars()->optind]);
af_rename(tmp,argv[options.vars()->optind]);
u_fclose(res);

if (debug!=NULL) {
   u_fclose(debug);
}

return SUCCESS_RETURN_CODE;
}
コード例 #8
0
int main_PackFile(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

char junk_prefix[FILENAME_MAX+0x20]="";
char include_filename[FILENAME_MAX+0x20]="";
char global_comment[FILENAME_MAX+0x20]="";

Encoding encoding_output = DEFAULT_ENCODING_OUTPUT;
int bom_output = DEFAULT_BOM_OUTPUT;
int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT;
int val,index=-1;
int quiet=0;
int add_one_file_only=1;
int append=0;
bool only_verify_arguments = false;
UnitexGetOpt options;
while (EOF!=(val=options.parse_long(argc,argv,optstring_PackFile,lopts_PackFile,&index))) {
   switch(val) {

   case 'm': quiet=1; break;
   case 'p': add_one_file_only=0; break;
   case 'a': append=1; break;
   case 'i': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty include file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(include_filename,options.vars()->optarg);
             break;
   case 'j': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty junk prefix file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(junk_prefix,options.vars()->optarg);
             break;
   case 'g': if (options.vars()->optarg[0]=='\0') {
                   error("You must specify a non empty global comment\n");
                   return USAGE_ERROR_CODE;
                }
                strcpy(global_comment,options.vars()->optarg);
                break;
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&mask_encoding_compatibility_input,options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&encoding_output,&bom_output,options.vars()->optarg);
             break;
   case 'V': only_verify_arguments = true;
             break;             
   case 'h': usage(); 
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_PackFile[index].name);
             return USAGE_ERROR_CODE;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}

const char* ulpFile=argv[options.vars()->optind];

if (ulpFile == NULL) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}

if ((*ulpFile)=='\0') {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

int retValue = buildPackFile(ulpFile,append,
	                           global_comment,
                             include_filename,
                             add_one_file_only,
                             junk_prefix,
				                     quiet);
if (retValue == 0) {
	error("Error creating %s\n", ulpFile);
	return DEFAULT_ERROR_CODE;
} else {
	return SUCCESS_RETURN_CODE;
}

}
コード例 #9
0
/**
 * The same than main, but no call to setBufferMode.
 */
int main_KeyWords(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

VersatileEncodingConfig vec=VEC_DEFAULT;

char tokens[FILENAME_MAX];
char output[FILENAME_MAX]="";
char alph[FILENAME_MAX]="";
char cdic[FILENAME_MAX]="";
unichar* code=u_strdup("XXX");
int val,index=-1;
bool only_verify_arguments = false;
UnitexGetOpt options;

while (EOF!=(val=options.parse_long(argc,argv,optstring_KeyWords,lopts_KeyWords,&index))) {
   switch(val) {
   case 'o': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty output\n");
                free(code);
                return USAGE_ERROR_CODE;
             }
             strcpy(output,options.vars()->optarg);
             break;
   case 'a': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty alphabet file name\n");
                free(code);
                return USAGE_ERROR_CODE;                
             }
             strcpy(alph,options.vars()->optarg);
             break;
   case 'f': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty forbidden code\n");
                free(code);
                return USAGE_ERROR_CODE;                
             }
             free(code);
             code=u_strdup(options.vars()->optarg);
             break;
   case 'c': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty file name\n");
                free(code);
                return USAGE_ERROR_CODE;                
             }
             strcpy(cdic,options.vars()->optarg);
             break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage();
             free(code);
             return SUCCESS_RETURN_CODE;
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                free(code);
                return USAGE_ERROR_CODE;                
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                free(code);
                return USAGE_ERROR_CODE;                
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_KeyWords[index].name);
             free(code);
             return USAGE_ERROR_CODE;
             break;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             free(code);
             return USAGE_ERROR_CODE;
             break;
   }
   index=-1;
}

if (options.vars()->optind==argc || options.vars()->optind==argc-1) {
   error("Invalid arguments: rerun with --help\n");
   free(code);
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  free(code);
  return SUCCESS_RETURN_CODE;
}

Alphabet* alphabet=NULL;
if (alph[0]!='\0') {
  alphabet=load_alphabet(&vec,alph);
  if (alphabet==NULL) {
    error("Cannot load alphabet file %s\n",alph);
    free(code);
    return DEFAULT_ERROR_CODE;    
  }
}

strcpy(tokens,argv[(options.vars()->optind++)]);
if (output[0]=='\0') {
	get_path(tokens,output);
	strcat(output,"keywords.txt");
}
struct string_hash_ptr* keywords=load_tokens_by_freq(tokens,&vec);
filter_non_letter_keywords(keywords,alphabet);
if (cdic[0]!='\0') {
	load_compound_words(cdic,&vec,keywords);
}

for (;options.vars()->optind!=argc;(options.vars()->optind)++) {
	filter_keywords_with_dic(keywords,argv[options.vars()->optind],&vec,alphabet);
}
merge_case_equivalent_unknown_words(keywords,alphabet);
struct string_hash* forbidden_lemmas=compute_forbidden_lemmas(keywords,code);
remove_keywords_with_forbidden_lemma(keywords,forbidden_lemmas);
free_string_hash(forbidden_lemmas);
vector_ptr* sorted=sort_keywords(keywords);
U_FILE* f_output=u_fopen(&vec,output,U_WRITE);

if (f_output==NULL) {
	error("Cannot write in file %s\n",output);
  free_vector_ptr(sorted,(void(*)(void*))free_KeyWord_list);
  free_string_hash_ptr(keywords,(void(*)(void*))free_KeyWord_list);
  free_alphabet(alphabet);
  free(code);
  return DEFAULT_ERROR_CODE;  
}

dump_keywords(sorted,f_output);

u_fclose(f_output);
free_vector_ptr(sorted,(void(*)(void*))free_KeyWord_list);
free_string_hash_ptr(keywords,(void(*)(void*))free_KeyWord_list);
free_alphabet(alphabet);
free(code);

return SUCCESS_RETURN_CODE;
}
コード例 #10
0
int main_PersistResource(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

const char *resource_file = NULL;
int res_graph=0;
int res_alphabet=0;
int res_dico=0;
int unpersist=0;
int verbose=0;
VersatileEncodingConfig vec = VEC_DEFAULT;
int val,index=-1;
bool only_verify_arguments = false;
const char*output_file = NULL;
const char*resource_type = NULL;
UnitexGetOpt options;

while (EOF!=(val=options.parse_long(argc,argv,optstring_PersistResource,lopts_PersistResource,&index))) {
   switch(val) {
   case 'a': res_alphabet = 1; resource_type = "alphabet"; break;
   case 'g': res_graph = 1; resource_type = "graph"; break;
   case 'd': res_dico = 1; resource_type = "dictionary"; break;
   case 'u': unpersist = 1; break;
   case 'v': verbose = 1; break;
   case 'o': if (options.vars()->optarg[0]=='\0') {
                error("Empty output argument\n");
                return USAGE_ERROR_CODE;
             }
             output_file = options.vars()->optarg; // FIXME(gvollant)
             break;
   case 'V': only_verify_arguments = true;
             break;   
   case 'h': usage(); 
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt):
                         error("Missing argument for option --%s\n",lopts_PersistResource[index].name);
             return USAGE_ERROR_CODE;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
   case 'k':
   case 'q': /* ignore -k and -q parameter instead to raise an error */
             break;
   }
   index=-1;
}

if ((res_graph+res_alphabet+res_dico) != 1) {
	error("Invalid arguments: rerun with --help\n");
	return USAGE_ERROR_CODE;
}

if ((output_file!=NULL) && (unpersist!=0)) {
	error("Invalid arguments: rerun with --help\n");
	return USAGE_ERROR_CODE;
}

if (options.vars()->optind!=argc-1) {
	error("Invalid arguments: rerun with --help\n");
	return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
	// freeing all allocated memory
	return SUCCESS_RETURN_CODE;
}

resource_file = argv[options.vars()->optind];
size_t size_buf_persisted_filename = strlen(resource_file) + 0x200;
char* buf_persisted_filename = (char*)malloc(size_buf_persisted_filename +1);
if (buf_persisted_filename == NULL) {
	alloc_error("PersistResource's main");
	return ALLOC_ERROR_CODE;
}
*buf_persisted_filename='\0';
if (unpersist == 0) {
	int result = 0;
	if (res_alphabet)
		result = standard_load_persistence_alphabet(resource_file, buf_persisted_filename, size_buf_persisted_filename);
	if (res_graph)
		result = standard_load_persistence_fst2(resource_file, buf_persisted_filename, size_buf_persisted_filename);
	if (res_dico)
		result = standard_load_persistence_dictionary(resource_file, buf_persisted_filename, size_buf_persisted_filename);
	if (result && verbose)
		u_printf("Success on persist %s resource %s to persisted name %s\n", resource_type, resource_file, buf_persisted_filename);
	if (!result)
		error("The %s resource %s cannnot be persisted\n", resource_type, resource_file);


	if (result && (output_file != NULL)) {
		U_FILE* text = u_fopen(&vec, output_file, U_WRITE);
		if (text == NULL) {
			error("Cannot create text file %s\n", output_file);
			free(buf_persisted_filename);
			return DEFAULT_ERROR_CODE;
		}
		u_fprintf(text, "%s", buf_persisted_filename);
		u_fclose(text);
	}

	free(buf_persisted_filename);
	return result ? SUCCESS_RETURN_CODE : DEFAULT_ERROR_CODE;
}
else
{
	if (res_alphabet)
		standard_unload_persistence_alphabet(resource_file);
	if (res_graph)
		standard_unload_persistence_fst2(resource_file);
	if (res_dico)
		standard_unload_persistence_dictionary(resource_file);
	u_printf("The %s resource %s unpersisted\n", resource_type, resource_file);
	return SUCCESS_RETURN_CODE;
}

}
コード例 #11
0
int main_XMLizer(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

int output_style=TEI;
char output[FILENAME_MAX]="";
char alphabet[FILENAME_MAX]="";
char normalization[FILENAME_MAX]="";
char segmentation[FILENAME_MAX]="";
VersatileEncodingConfig vec=VEC_DEFAULT;
int convLFtoCRLF=1;
int val,index=-1;
bool only_verify_arguments = false;
UnitexGetOpt options;
while (EOF!=(val=options.parse_long(argc,argv,optstring_XMLizer,lopts_XMLizer,&index))) {
   switch(val) {
   case 'x': output_style=XML; break;
   case 't': output_style=TEI; break;
   case 'n': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty normalization grammar name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(normalization,options.vars()->optarg);
             break;
   case 'o': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty output file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(output,options.vars()->optarg);
             break;
   case 'a': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty alphabet file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(alphabet,options.vars()->optarg);
             break;
   case 's': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty segmentation grammar name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(segmentation,options.vars()->optarg);
             break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage(); 
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_XMLizer[index].name);
             return USAGE_ERROR_CODE;                         
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case '?': index==-1  ? error("Invalid option -%c\n",options.vars()->optopt) :
                          error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;             
  
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}

if (segmentation[0]=='\0') {
   error("You must specify the segmentation grammar to use\n");
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

char input[FILENAME_MAX];
strcpy(input,argv[options.vars()->optind]);
char snt[FILENAME_MAX];
remove_extension(input,snt);
strcat(snt,"_tmp.snt");
char tmp[FILENAME_MAX];
remove_extension(input,tmp);
strcat(tmp,".tmp");
normalize(input,snt,&vec,KEEP_CARRIAGE_RETURN,convLFtoCRLF,normalization,NULL,1);
struct fst2txt_parameters* p=new_fst2txt_parameters();
p->vec=vec;
p->input_text_file=strdup(snt);
if (p->input_text_file ==NULL) {
   alloc_error("main_XMLizer");
   free_fst2txt_parameters(p);
   return ALLOC_ERROR_CODE;
}

p->output_text_file_is_temp=1;
p->output_text_file=strdup(tmp);
if (p->output_text_file==NULL) {
   alloc_error("main_XMLizer");
   free_fst2txt_parameters(p);
   return ALLOC_ERROR_CODE;
}
p->fst_file=strdup(segmentation);
if (p->fst_file==NULL) {
   alloc_error("main_XMLizer");
   free_fst2txt_parameters(p);
   return ALLOC_ERROR_CODE;
}
p->alphabet_file=strdup(alphabet);
if (p->alphabet_file==NULL) {
   alloc_error("main_XMLizer");
   free_fst2txt_parameters(p);
   return ALLOC_ERROR_CODE;
}

p->output_policy=MERGE_OUTPUTS;
p->tokenization_policy=WORD_BY_WORD_TOKENIZATION;
p->space_policy=DONT_START_WITH_SPACE;

main_fst2txt(p);

free_fst2txt_parameters(p);

if (output[0]=='\0') {
  remove_extension(input,output);
	strcat(output,".xml");
}

int return_value = xmlize(&vec,snt,output,output_style);

af_remove(snt);
af_remove(tmp);

return return_value;
}
コード例 #12
0
ファイル: TEI2Txt.cpp プロジェクト: UnitexGramLab/unitex-core
int main_TEI2Txt(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

char output[FILENAME_MAX]="";
VersatileEncodingConfig vec=VEC_DEFAULT;
int val,index=-1;
bool only_verify_arguments = false;
UnitexGetOpt options;
while (EOF!=(val=options.parse_long(argc,argv,optstring_TEI2Txt,lopts_TEI2Txt,&index))) {
   switch(val) {
   case 'o': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty output file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(output,options.vars()->optarg);
             break;
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage();
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_TEI2Txt[index].name);
             return USAGE_ERROR_CODE;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

if(output[0]=='\0') {
  remove_extension(argv[options.vars()->optind],output);
    strcat(output,".txt");
}

int return_value = tei2txt(argv[options.vars()->optind],output,&vec);

return return_value;
}
コード例 #13
0
/**
 * The same than main, but no call to setBufferMode.
 */
int main_BuildKrMwuDic(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

int val,index=-1;
char output[FILENAME_MAX]="";
char inflection_dir[FILENAME_MAX]="";
char alphabet[FILENAME_MAX]="";
char dic_bin[FILENAME_MAX]="";
char dic_inf[FILENAME_MAX]="";

// default policy is to compile only out of date graphs
GraphRecompilationPolicy graph_recompilation_policy = ONLY_OUT_OF_DATE;

VersatileEncodingConfig vec=VEC_DEFAULT;

bool only_verify_arguments = false;

UnitexGetOpt options;

while (EOF!=(val=options.parse_long(argc,argv,optstring_BuildKrMwuDic,lopts_BuildKrMwuDic,&index))) {
   switch(val) {
   case 'o': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty output file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(output,options.vars()->optarg);
             break;
   case 'd': if (options.vars()->optarg[0]=='\0') {
                error("Empty inflection directory\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(inflection_dir,options.vars()->optarg);
             break;
   case 'a': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty alphabet file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(alphabet,options.vars()->optarg);
             break;
   case 'b': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty binary dictionary name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(dic_bin,options.vars()->optarg);
             remove_extension(dic_bin,dic_inf);
             strcat(dic_inf,".inf");
             break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage(); 
             return SUCCESS_RETURN_CODE;
   case 'f': graph_recompilation_policy = ALWAYS_RECOMPILE; break;
   case 'n': graph_recompilation_policy = NEVER_RECOMPILE;  break;
   case 't': graph_recompilation_policy = ONLY_OUT_OF_DATE; break;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_BuildKrMwuDic[index].name);
             return USAGE_ERROR_CODE;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   }
   index=-1;
}
if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}
if (output[0]=='\0') {
   error("Output file must be specified\n");
   return USAGE_ERROR_CODE;
}
if (inflection_dir[0]=='\0') {
   error("Inflection directory must be specified\n");
   return USAGE_ERROR_CODE;
}
if (alphabet[0]=='\0') {
   error("Alphabet file must be specified\n");
   return USAGE_ERROR_CODE;
}
if (dic_bin[0]=='\0') {
   error("Binary dictionary must be specified\n");
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory 
  return SUCCESS_RETURN_CODE;
}

U_FILE* delas=u_fopen(&vec,argv[options.vars()->optind],U_READ);
if (delas==NULL) {
   error("Cannot open %s\n",argv[options.vars()->optind]);
   return DEFAULT_ERROR_CODE;
}

U_FILE* grf=u_fopen(&vec,output,U_WRITE);
if (grf==NULL) {
   error("Cannot open %s\n",output);
   u_fclose(delas);  
   return DEFAULT_ERROR_CODE;
}

Alphabet* alph=load_alphabet(&vec,alphabet,1);
if (alph==NULL) {
   u_fclose(grf);
   u_fclose(delas);
   error("Cannot open alphabet file %s\n",alphabet);
   return DEFAULT_ERROR_CODE;
}
Korean* korean=new Korean(alph);

MultiFlex_ctx* multiFlex_ctx=new_MultiFlex_ctx(inflection_dir,
                                               NULL,
                                               NULL,
                                               &vec,
                                               korean,
                                               NULL,
                                               NULL,
                                               graph_recompilation_policy);

Dictionary* d=new_Dictionary(&vec,dic_bin,dic_inf);

create_mwu_dictionary(delas,grf,multiFlex_ctx,d);

free_Dictionary(d);
u_fclose(delas);
u_fclose(grf);
free_alphabet(alph);
delete korean;
for (int count_free_fst2=0;count_free_fst2<multiFlex_ctx->n_fst2;count_free_fst2++) {
    free_abstract_Fst2(multiFlex_ctx->fst2[count_free_fst2],&(multiFlex_ctx->fst2_free[count_free_fst2]));
    multiFlex_ctx->fst2[count_free_fst2]=NULL;
}
free_MultiFlex_ctx(multiFlex_ctx);
u_printf("Done.\n");
return SUCCESS_RETURN_CODE;
}
コード例 #14
0
int main_ElagComp(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

VersatileEncodingConfig vec=VEC_DEFAULT;
int val,index=-1;
char compilename[FILENAME_MAX]="";
char directory[FILENAME_MAX]="";
char grammar[FILENAME_MAX]="";
char rule_file[FILENAME_MAX]="";
char lang[FILENAME_MAX]="";
bool only_verify_arguments = false;
UnitexGetOpt options;
while (EOF!=(val=options.parse_long(argc,argv,optstring_ElagComp,lopts_ElagComp,&index))) {
   switch(val) {
   case 'l': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty language definition file\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(lang,options.vars()->optarg);
             break;
   case 'r': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty rule file\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(rule_file,options.vars()->optarg);
             get_path(rule_file,directory);
             break;
   case 'g': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty grammar file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(grammar,options.vars()->optarg);
             get_path(grammar,directory);
             break;
   case 'o': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty output file\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(compilename,options.vars()->optarg);
             break;
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage();
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_ElagComp[index].name);
             return USAGE_ERROR_CODE;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
   }
   index=-1;
}

if (lang[0]=='\0') {
   error("You must define the language definition file\n");
   return USAGE_ERROR_CODE;
}
if ((rule_file[0]=='\0' && grammar[0]=='\0')
     || (rule_file[0]!='\0' && grammar[0]!='\0')) {
   error("You must define a rule list OR a grammar\n");
   return USAGE_ERROR_CODE;
}
if (options.vars()->optind!=argc) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}

if (rule_file[0]=='\0' && grammar[0]=='\0') {
   error("You must specified a grammar or a rule file name\n");
   return USAGE_ERROR_CODE;
}

if (rule_file[0]!='\0' && grammar[0]!='\0') {
   error("Cannot handle both a rule file and a grammar\n");
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

language_t* language = load_language_definition(&vec,lang);
if (rule_file[0]!='\0') {
   /* If we work with a rule list */
   if (compilename[0]=='\0') {
      int l=(int)strlen(rule_file);
      if (strcmp(rule_file+l-4,".lst")==0) {
         strcpy(compilename,rule_file);
         strcpy(compilename+l-4,".rul");
      } else {
         sprintf(compilename,"%s.rul",rule_file);
      }
   }
   if (compile_elag_rules(rule_file,compilename,&vec,language)==-1) {
      error("An error occurred while compiling %s\n",compilename);
      free_language_t(language);
      return DEFAULT_ERROR_CODE;
   }
   u_printf("\nElag grammars are compiled in %s.\n",compilename);
} else {
   /* If we must compile a single grammar */
   char elg_file[FILENAME_MAX];
   get_extension(grammar,elg_file);
   if (strcmp(elg_file,".fst2")) {
     error("Grammar '%s' should be a .fst2 file\n");
     free_language_t(language);
     return DEFAULT_ERROR_CODE;
   }
   remove_extension(grammar,elg_file);
   strcat(elg_file,".elg");
   if (compile_elag_grammar(grammar,elg_file,&vec,language)==-1) {
     error("An error occured while compiling %s\n",grammar);
     free_language_t(language);
     return DEFAULT_ERROR_CODE;
   }
   u_printf("Elag grammar is compiled into %s.\n",elg_file);
}
free_language_t(language);
return SUCCESS_RETURN_CODE;
}
コード例 #15
0
int main_Untokenize(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

char alphabet[FILENAME_MAX]="";
char token_file[FILENAME_MAX]="";
char dynamicSntDir[FILENAME_MAX]="";
VersatileEncodingConfig vec=VEC_DEFAULT;
int val,index=-1;
int range_start,range_stop,use_range;
int token_step_number=0;
range_start=range_stop=use_range=0;
char foo=0;
bool only_verify_arguments = false;
UnitexGetOpt options;
while (EOF!=(val=options.parse_long(argc,argv,optstring_Untokenize,lopts_Untokenize,&index))) {
   switch(val) {
   case 'a': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty alphabet file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(alphabet,options.vars()->optarg);
             break;
   case 'd': if (options.vars()->optarg[0]=='\0') {
                   error("You must specify a non empty snt dir name\n");
                   return USAGE_ERROR_CODE;
                }
                strcpy(dynamicSntDir,options.vars()->optarg);
                break;
   case 't': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty token file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(token_file,options.vars()->optarg);
             break;
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;

   case 'n': if (1!=sscanf(options.vars()->optarg,"%d%c",&token_step_number,&foo) || token_step_number<=0) {
                /* foo is used to check that the search limit is not like "45gjh" */
                error("Invalid token numbering argument: %s\n",options.vars()->optarg);
                return USAGE_ERROR_CODE;
             }
             break;
   case 'r': {
                int param1 = 0;
                int param2 = 0;
                int ret_scan = sscanf(options.vars()->optarg,"%d,%d%c",&param1,&param2,&foo);
                if (ret_scan == 2) {
                    range_start = param1;
                    range_stop  = param2;
                    use_range=1;
                    if (((range_start < -1)) || (range_stop < -1)) {
                        /* foo is used to check that the search limit is not like "45gjh" */
                        error("Invalid stop count argument: %s\n",options.vars()->optarg);
                        return USAGE_ERROR_CODE;
                    }
                }
                else
                    if (1!=sscanf(options.vars()->optarg,"%d%c",&range_start,&foo) || (range_start < -1)) {
                        /* foo is used to check that the search limit is not like "45gjh" */
                        error("Invalid stop count argument: %s\n",options.vars()->optarg);
                        return USAGE_ERROR_CODE;
                    }
                    use_range=1;
             }
             break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage(); 
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_Untokenize[index].name);
             return USAGE_ERROR_CODE;            
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

char tokens_txt[FILENAME_MAX];
char text_cod[FILENAME_MAX];
char enter_pos[FILENAME_MAX];

if (dynamicSntDir[0]=='\0') {
    get_snt_path(argv[options.vars()->optind],dynamicSntDir);
}

strcpy(text_cod,dynamicSntDir);
strcat(text_cod,"text.cod");
strcpy(enter_pos,dynamicSntDir);
strcat(enter_pos,"enter.pos");
strcpy(tokens_txt,dynamicSntDir);
strcat(tokens_txt,"tokens.txt");

Alphabet* alph=NULL;
if (alphabet[0]!='\0') {
   alph=load_alphabet(&vec,alphabet);
   if (alph==NULL) {
      error("Cannot load alphabet file %s\n",alphabet);
      return DEFAULT_ERROR_CODE;
   }
}

ABSTRACTMAPFILE* af_text_cod=af_open_mapfile(text_cod,MAPFILE_OPTION_READ,0);
if (af_text_cod==NULL) {
  error("Cannot open file %s\n",text_cod);
  free_alphabet(alph);
  return DEFAULT_ERROR_CODE;
}

ABSTRACTMAPFILE* af_enter_pos=af_open_mapfile(enter_pos,MAPFILE_OPTION_READ,0);
if (af_enter_pos==NULL) {
  error("Cannot open file %s\n",enter_pos);
  af_close_mapfile(af_text_cod);
  free_alphabet(alph);
  return DEFAULT_ERROR_CODE;
}

U_FILE* text = u_fopen(&vec,argv[options.vars()->optind],U_WRITE);
if (text==NULL) {
  error("Cannot create text file %s\n",argv[options.vars()->optind]);
  af_close_mapfile(af_enter_pos);
  af_close_mapfile(af_text_cod);
  free_alphabet(alph);
  return DEFAULT_ERROR_CODE;
}

struct text_tokens* tok=load_text_tokens(&vec,tokens_txt);
u_printf("Untokenizing text...\n");
size_t nb_item = af_get_mapfile_size(af_text_cod)/sizeof(int);
const int* buf=(const int*)af_get_mapfile_pointer(af_text_cod);

size_t nb_item_enter_pos=0;
const int* buf_enter=NULL;

if (af_enter_pos!=NULL) {
    buf_enter=(const int*)af_get_mapfile_pointer(af_enter_pos);
    if (buf_enter!=NULL) {
        nb_item_enter_pos=af_get_mapfile_size(af_enter_pos)/sizeof(int);
    }
}

size_t count_pos=0;
for (size_t i=0;i<nb_item;i++) {
    int is_in_range=1;
    if ((use_range!=0) && (i<(size_t)range_start)) {
        is_in_range=0;
    }
    if ((use_range!=0) && (range_stop!=0) && (i>(size_t)range_stop)) {
        is_in_range=0;
    }
    int is_newline=0;
    if (count_pos<nb_item_enter_pos) {
        if (i==(size_t)(*(buf_enter+count_pos))) {
            is_newline = 1;
            count_pos++;
        }
    }

    if (is_in_range!=0) {
        if (token_step_number != 0)
            if ((i%token_step_number)==0)
                u_fprintf(text,"\n\nToken %d : ", (int)i);

        if (is_newline!=0) {
            u_fprintf(text,"\n", tok->token[*(buf+i)]);
        }
        else {
			u_fputs(tok->token[*(buf+i)], text);
        }
    }
}

af_release_mapfile_pointer(af_text_cod,buf);
af_release_mapfile_pointer(af_enter_pos,buf_enter);
af_close_mapfile(af_enter_pos);
af_close_mapfile(af_text_cod);
free_text_tokens(tok);
u_fclose(text);
free_alphabet(alph);

u_printf("\nDone.\n");
return SUCCESS_RETURN_CODE;
}
コード例 #16
0
/**
 * This is the customized diff program designed to compare grf files.
 */
int main_SelectOutput(int argc,char* const argv[]) {
if (argc==1) {
	usage();
	return SUCCESS_RETURN_CODE;
}

VersatileEncodingConfig vec=VEC_DEFAULT;
int val,index=-1;
bool only_verify_arguments = false;
UnitexGetOpt options;
while (EOF!=(val=options.parse_long(argc,argv,optstring_SelectOutput,lopts_SelectOutput,&index))) {
   switch(val) {
   case 'V': only_verify_arguments = true;
             break;    
   case 'h': usage(); 
             return SUCCESS_RETURN_CODE;
   case 'e':
   case 'o':
	   {
		   enum stdwrite_kind swk = (val == 'o') ? stdwrite_kind_out : stdwrite_kind_err;
		   if (strcmp(options.vars()->optarg,"on") == 0)
		   {
		       SetStdWriteCB(swk,0,NULL,NULL);
		   }
		   else
		   if (strcmp(options.vars()->optarg,"off") == 0)
		   {
		       SetStdWriteCB(swk,1,NULL,NULL);
		   }
		   else 
		   {
		       error("Invalid option --%s, must be 'on' or 'off'\n",options.vars()->optarg);
		       return USAGE_ERROR_CODE;
		   }
		   break;
	   }
   
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_SelectOutput[index].name);
             return USAGE_ERROR_CODE;            
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
   }
   index=-1;
}

  // keep this for further modifications
  if (only_verify_arguments) {
    // freeing all allocated memory
    return SUCCESS_RETURN_CODE;
  }


	return SUCCESS_RETURN_CODE;
}
コード例 #17
0
ファイル: SortTxt.cpp プロジェクト: UnitexGramLab/unitex-core
int main_SortTxt(int argc, char* const argv[]) {
  if (argc == 1) {
    usage();
    return SUCCESS_RETURN_CODE;
  }

  struct sort_infos* inf = new_sort_infos();
  if(!inf) {
    return ALLOC_ERROR_CODE;
  }

  int mode = DEFAULT;
  char line_info[FILENAME_MAX] = "";
  char sort_order[FILENAME_MAX] = "";
  VersatileEncodingConfig vec = { DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT,
      DEFAULT_ENCODING_OUTPUT, DEFAULT_BOM_OUTPUT };
  int val, index = -1;
  bool only_verify_arguments = false;
  UnitexGetOpt options;
  while (EOF != (val = options.parse_long(argc, argv, optstring_SortTxt,
      lopts_SortTxt, &index))) {
    switch (val) {
    case 'n':
      inf->REMOVE_DUPLICATES = 1;
      break;
    case 'd':
      inf->REMOVE_DUPLICATES = 0;
      break;
    case 'r':
      inf->REVERSE = -1;
      break;
    case 'o':
      if (options.vars()->optarg[0] == '\0') {
        error("You must specify a non empty sort order file name\n");
        free_sort_infos(inf);
        return USAGE_ERROR_CODE;
      }
      strcpy(sort_order, options.vars()->optarg);
      break;
    case 'l':
      if (options.vars()->optarg[0] == '\0') {
        error("You must specify a non empty information file name\n");
        free_sort_infos(inf);
        return USAGE_ERROR_CODE;
      }
      strcpy(line_info, options.vars()->optarg);
      break;
    case 't':
      mode = THAI;
      break;
    case 'f':
      inf->factorize_inflectional_codes = 1;
      break;
    case 'V': only_verify_arguments = true;
      break;
    case 'h':
      usage();
      free_sort_infos(inf);
      return SUCCESS_RETURN_CODE;
    case 'k':
      if (options.vars()->optarg[0] == '\0') {
        error("Empty input_encoding argument\n");
        free_sort_infos(inf);
        return USAGE_ERROR_CODE;
      }
      decode_reading_encoding_parameter(
          &(vec.mask_encoding_compatibility_input), options.vars()->optarg);
      break;
    case 'q':
      if (options.vars()->optarg[0] == '\0') {
        error("Empty output_encoding argument\n");
        free_sort_infos(inf);
        return USAGE_ERROR_CODE;
      }
      decode_writing_encoding_parameter(&(vec.encoding_output),
          &(vec.bom_output), options.vars()->optarg);
      break;
    case ':':
        index == -1 ? error("Missing argument for option -%c\n", options.vars()->optopt) :
                      error("Missing argument for option --%s\n",lopts_SortTxt[index].name);
        free_sort_infos(inf);
        return USAGE_ERROR_CODE;
    case '?':
        index == -1 ? error("Invalid option -%c\n", options.vars()->optopt) :
                      error("Invalid option --%s\n", options.vars()->optarg);
        free_sort_infos(inf);
        return USAGE_ERROR_CODE;
    }
    index = -1;
  }

  if (options.vars()->optind != argc - 1) {
    error("Invalid arguments: rerun with --help\n");
    free_sort_infos(inf);
    return USAGE_ERROR_CODE;
  }

  if (only_verify_arguments) {
    // freeing all allocated memory
    free_sort_infos(inf);
    return SUCCESS_RETURN_CODE;
  }

  if (sort_order[0] != '\0') {
    read_char_order(&vec, sort_order, inf);
  }

  char new_name[FILENAME_MAX];
  strcpy(new_name, argv[options.vars()->optind]);
  strcat(new_name, ".new");

  inf->f = u_fopen(&vec, argv[options.vars()->optind], U_READ);
  if (inf->f == NULL) {
    error("Cannot open file %s\n", argv[options.vars()->optind]);
    free_sort_infos(inf);
    return DEFAULT_ERROR_CODE;
  }

  inf->f_out = u_fopen(&vec, new_name, U_WRITE);
  if (inf->f_out == NULL) {
    error("Cannot open temporary file %s\n", new_name);
    u_fclose(inf->f);
    free_sort_infos(inf);
    return DEFAULT_ERROR_CODE;
  }

  switch (mode) {
  case DEFAULT:
    sort(inf);
    break;
  case THAI:
    sort_thai(inf);
    break;
  }
  if (line_info[0] != '\0') {
    U_FILE* F = u_fopen(&vec, line_info, U_WRITE);
    if (F == NULL) {
      error("Cannot write %s\n", line_info);
    } else {
      u_fprintf(F, "%d\n", inf->resulting_line_number);
      u_fclose(F);
    }
  }

  u_fclose(inf->f_out);
  u_fclose(inf->f);
  af_remove(argv[options.vars()->optind]);
  af_rename(new_name, argv[options.vars()->optind]);
  free_sort_infos(inf);

  u_printf("Done.\n");
  return SUCCESS_RETURN_CODE;
}
コード例 #18
0
ファイル: GrfDiff.cpp プロジェクト: UnitexGramLab/unitex-core
/**
 * This is the customized diff program designed to compare grf files.
 */
int main_GrfDiff(int argc,char* const argv[]) {
if (argc==1) {
    usage();
    return SUCCESS_RETURN_CODE;
}

VersatileEncodingConfig vec=VEC_DEFAULT;
int val,index=-1;
char output[FILENAME_MAX]="";
bool only_verify_arguments = false;
UnitexGetOpt options;
while (EOF!=(val=options.parse_long(argc,argv,optstring_GrfDiff,lopts_GrfDiff,&index))) {
   switch(val) {
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage();
             return SUCCESS_RETURN_CODE;
   case 1: {
       strcpy(output,options.vars()->optarg);
       break;
   }
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_GrfDiff[index].name);
             return USAGE_ERROR_CODE;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
   }
   index=-1;
}

if (options.vars()->optind!=argc-2) {
  error("Invalid arguments: rerun with --help\n");
  return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

U_FILE* f=U_STDOUT;
if (output[0]!='\0') {
    /* Since the output is supposed to be a diff-like one, there is no point
     * in outputing in a variable encoding, so we force UTF8 */
    f=u_fopen(UTF8,output,U_WRITE);
    if (f==NULL) {
        error("Cannot create file %s\n",output);
    return DEFAULT_ERROR_CODE;
    }
}

Grf* a=load_Grf(&vec,argv[options.vars()->optind]);
if (a==NULL) {
  if (f!=U_STDOUT) {
    u_fclose(f);
  }
  return DEFAULT_ERROR_CODE;
}

Grf* b=load_Grf(&vec,argv[options.vars()->optind+1]);
if (b==NULL) {
    free_Grf(a);
  if (f!=U_STDOUT) {
    u_fclose(f);
  }
  return DEFAULT_ERROR_CODE;
}

GrfDiff* diff=grf_diff(a,b);
free_Grf(a);
free_Grf(b);
print_diff(f,diff);
if (f!=U_STDOUT) {
  u_fclose(f);
}
int different=diff->diff_ops->nbelems!=0;
free_GrfDiff(diff);
return different;
}
コード例 #19
0
int main_Flatten(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

int RTN=1;
int depth=10;
VersatileEncodingConfig vec=VEC_DEFAULT;
int val,index=-1;
char foo;
bool only_verify_arguments = false;
UnitexGetOpt options;

while (EOF!=(val=options.parse_long(argc,argv,optstring_Flatten,lopts_Flatten,&index))) {
   switch(val) {
   case 'f': RTN=0; break;
   case 'r': RTN=1; break;
   case 'd': if (1!=sscanf(options.vars()->optarg,"%d%c",&depth,&foo) || depth<=0) {
                /* foo is used to check that the depth is not like "45gjh" */
                error("Invalid depth argument: %s\n",options.vars()->optarg);
                return USAGE_ERROR_CODE;
             }
             break;
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case 'h': usage(); return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_Flatten[index].name);
             return USAGE_ERROR_CODE;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
             break;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

u_printf("Loading %s...\n",argv[options.vars()->optind]);
struct FST2_free_info fst2_free;
Fst2* origin=load_abstract_fst2(&vec,argv[options.vars()->optind],1,&fst2_free);
if (origin==NULL) {
   error("Cannot load %s\n",argv[options.vars()->optind]);
   return DEFAULT_ERROR_CODE;
}

char temp[FILENAME_MAX];
strcpy(temp,argv[options.vars()->optind]);
strcat(temp,".tmp.fst2");

switch (flatten_fst2(origin,depth,temp,&vec,RTN)) {
   case EQUIVALENT_FST:
      u_printf("The resulting grammar is an equivalent finite-state transducer.\n");
      break;
   case APPROXIMATIVE_FST:
      u_printf("The resulting grammar is a finite-state approximation.\n");
      break;
   case EQUIVALENT_RTN:
      u_printf("The resulting grammar is an equivalent FST2 (RTN).\n");
      break;
   default: 
      error("Internal state error in Flatten's main\n");
      free_abstract_Fst2(origin,&fst2_free);
      return DEFAULT_ERROR_CODE;
}

free_abstract_Fst2(origin,&fst2_free);
af_remove(argv[options.vars()->optind]);
af_rename(temp,argv[options.vars()->optind]);

return SUCCESS_RETURN_CODE;
}
コード例 #20
0
/**
 * The same than main, but no call to setBufferMode.
 */
int main_Concord(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

int val,index=-1;
struct conc_opt* concord_options = new_conc_opt();
char foo;
VersatileEncodingConfig vec=VEC_DEFAULT;
int ret;
char offset_file[FILENAME_MAX]="";
char PRLG[FILENAME_MAX]="";
bool only_verify_arguments = false;
UnitexGetOpt options;
while (EOF!=(val=options.parse_long(argc,argv,optstring_Concord,lopts_Concord,&index))) {
   switch(val) {
   case 'f': if (options.vars()->optarg[0]=='\0') {
                error("Empty font name argument\n");
                free_conc_opt(concord_options);
                return USAGE_ERROR_CODE;
             }
             concord_options->fontname=strdup(options.vars()->optarg);
             if (concord_options->fontname==NULL) {
                alloc_error("main_Concord");
                free_conc_opt(concord_options);
                return ALLOC_ERROR_CODE;
             }
             break;
   case 's': if (1!=sscanf(options.vars()->optarg,"%d%c",&(concord_options->fontsize),&foo)) {
                /* foo is used to check that the font size is not like "45gjh" */
                error("Invalid font size argument: %s\n",options.vars()->optarg);
                free_conc_opt(concord_options);
                return USAGE_ERROR_CODE;
             }
             break;
   case 'l': ret=sscanf(options.vars()->optarg,"%d%c%c",&(concord_options->left_context),&foo,&foo);
             if (ret==0 || ret==3 || (ret==2 && foo!='s') || concord_options->left_context<0) {
                error("Invalid left context argument: %s\n",options.vars()->optarg);
                free_conc_opt(concord_options);
                return USAGE_ERROR_CODE;                
             }
             if (ret==2) {
                concord_options->left_context_until_eos=1;
             }
             break;
   case 'r': ret=sscanf(options.vars()->optarg,"%d%c%c",&(concord_options->right_context),&foo,&foo);
             if (ret==0 || ret==3 || (ret==2 && foo!='s') || concord_options->right_context<0) {
                error("Invalid right context argument: %s\n",options.vars()->optarg);
                free_conc_opt(concord_options);
                return USAGE_ERROR_CODE;                
             }
             if (ret==2) {
                concord_options->right_context_until_eos=1;
             }
             break;
   case 'L': concord_options->convLFtoCRLF=0; break;
   case 0: concord_options->sort_mode=TEXT_ORDER; break;
   case 1: concord_options->sort_mode=LEFT_CENTER; break;
   case 2: concord_options->sort_mode=LEFT_RIGHT; break;
   case 3: concord_options->sort_mode=CENTER_LEFT; break;
   case 4: concord_options->sort_mode=CENTER_RIGHT; break;
   case 5: concord_options->sort_mode=RIGHT_LEFT; break;
   case 6: concord_options->sort_mode=RIGHT_CENTER; break;
   case 7: concord_options->result_mode=DIFF_; break;
   case 8: concord_options->only_ambiguous=1; break;
   case 9: {
     strcpy(PRLG,options.vars()->optarg);
     char* pos=strchr(PRLG,',');
     if (pos==NULL || pos==PRLG || *(pos+1)=='\0') {
       error("Invalid argument for option --PRLG: %s\n",options.vars()->optarg);
       free_conc_opt(concord_options);
       return USAGE_ERROR_CODE;
     }
     *pos='\0';
     strcpy(offset_file,pos+1);
     break;
   }
   case 10: concord_options->only_matches=1; break;
   case 11: concord_options->result_mode=LEMMATIZE_; break;
   case 12: concord_options->result_mode=CSV_; break;
   case 'H': concord_options->result_mode=HTML_; break;
   case 't': {
     concord_options->result_mode=TEXT_;
     if (options.vars()->optarg!=NULL) {
       strcpy(concord_options->output,options.vars()->optarg);
     }
     break;
   }
   case 'g': concord_options->result_mode=GLOSSANET_;
             if (options.vars()->optarg[0]=='\0') {
                error("Empty glossanet script argument\n");
                free_conc_opt(concord_options);
                return USAGE_ERROR_CODE;                   
             }
             concord_options->script=strdup(options.vars()->optarg);
             if (concord_options->script==NULL) {
                alloc_error("main_Concord");
                free_conc_opt(concord_options);
                return ALLOC_ERROR_CODE;                   
             }
             break;
   case 'p': concord_options->result_mode=SCRIPT_;
             if (options.vars()->optarg[0]=='\0') {
                error("Empty script argument\n");
                free_conc_opt(concord_options);
                return USAGE_ERROR_CODE;                
             }
             concord_options->script=strdup(options.vars()->optarg);
             if (concord_options->script==NULL) {
                alloc_error("main_Concord");
                free_conc_opt(concord_options);
                return ALLOC_ERROR_CODE;                 
             }
             break;
   case 'i': concord_options->result_mode=INDEX_; break;
   case 'u': concord_options->result_mode=UIMA_;
             if (options.vars()->optarg!=NULL) {
                strcpy(offset_file,options.vars()->optarg);
             }
             concord_options->original_file_offsets=1;
             break;
   case 'e': concord_options->result_mode=XML_;
             if (options.vars()->optarg!=NULL) {
                strcpy(offset_file, options.vars()->optarg);
                concord_options->original_file_offsets=1;
             }
             break;
   case 'w': concord_options->result_mode=XML_WITH_HEADER_; 
             if (options.vars()->optarg!=NULL) {
                strcpy(offset_file, options.vars()->optarg);
                concord_options->original_file_offsets = 1;
             }
             break;
   case '$': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_offsets argument\n");
                free_conc_opt(concord_options);
                return USAGE_ERROR_CODE;
             }
             strcpy(concord_options->input_offsets,options.vars()->optarg);
             break;
   case '@': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_offsets argument\n");
                free_conc_opt(concord_options);
                return USAGE_ERROR_CODE;
             }
             strcpy(concord_options->output_offsets,options.vars()->optarg);
             break;
   case 'A': concord_options->result_mode=AXIS_; break;
   case 'x': concord_options->result_mode=XALIGN_; break;
   case 'm': concord_options->result_mode=MERGE_;
             if (options.vars()->optarg[0]=='\0') {
                error("Empty output file name argument\n");
                free_conc_opt(concord_options);
                return USAGE_ERROR_CODE;
             }
             strcpy(concord_options->output,options.vars()->optarg);
             break;
   case 'a': if (options.vars()->optarg[0]=='\0') {
                error("Empty alphabet argument\n");
                free_conc_opt(concord_options);
                return USAGE_ERROR_CODE;                
             }
             concord_options->sort_alphabet=strdup(options.vars()->optarg);
             if (concord_options->sort_alphabet==NULL) {
                alloc_error("main_Concord");
                free_conc_opt(concord_options);
                return ALLOC_ERROR_CODE;            }
             break;
   case 'T': concord_options->thai_mode=1; break;
   case 'd': if (options.vars()->optarg[0]=='\0') {
                error("Empty snt directory argument\n");
                free_conc_opt(concord_options);
                return USAGE_ERROR_CODE;
             }
             strcpy(concord_options->working_directory,options.vars()->optarg);
             break;
   case 'V': only_verify_arguments = true;
             break;             
   case 'h': usage();
             free_conc_opt(concord_options); 
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_Concord[index].name);
             free_conc_opt(concord_options);
             return USAGE_ERROR_CODE;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             free_conc_opt(concord_options);
             return USAGE_ERROR_CODE;
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                free_conc_opt(concord_options);
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                free_conc_opt(concord_options);
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   free_conc_opt(concord_options);
   return USAGE_ERROR_CODE;
}
if (concord_options->fontname==NULL || concord_options->fontsize<=0) {
   if (concord_options->result_mode==HTML_ || concord_options->result_mode==GLOSSANET_) {
      error("The specified output mode is an HTML file: you must specify font parameters\n");
      free_conc_opt(concord_options);
      return USAGE_ERROR_CODE;      
   }
}

if (only_verify_arguments) {
  // freeing all allocated memory
  free_conc_opt(concord_options);
  return SUCCESS_RETURN_CODE;
}

U_FILE* concor=u_fopen(&vec,argv[options.vars()->optind],U_READ);
if (concor==NULL) {
   error("Cannot open concordance index file %s\n",argv[options.vars()->optind]);
   free_conc_opt(concord_options);
   return DEFAULT_ERROR_CODE;   
}

if (concord_options->working_directory[0]=='\0') {
   get_path(argv[options.vars()->optind],concord_options->working_directory);
}
if (concord_options->only_matches) {
  concord_options->left_context=0;
  concord_options->right_context=0;
}
/* We compute the name of the files associated to the text */
struct snt_files* snt_files=new_snt_files_from_path(concord_options->working_directory);
ABSTRACTMAPFILE* text=af_open_mapfile(snt_files->text_cod,MAPFILE_OPTION_READ,0);
if (text==NULL) {
  error("Cannot open file %s\n",snt_files->text_cod);
  free_snt_files(snt_files);
  u_fclose(concor);
  free_conc_opt(concord_options);
  return DEFAULT_ERROR_CODE;
}
struct text_tokens* tok=load_text_tokens(&vec,snt_files->tokens_txt);
if (tok==NULL) {
  error("Cannot load text token file %s\n",snt_files->tokens_txt);
  af_close_mapfile(text);
  free_snt_files(snt_files);
  u_fclose(concor);
  free_conc_opt(concord_options);
  return DEFAULT_ERROR_CODE;
}

U_FILE* f_enter=u_fopen(BINARY,snt_files->enter_pos,U_READ);
int n_enter_char=0;
int* enter_pos=NULL;
/* New lines are encoded in 'enter.pos' files. Those files will disappear in the future */
if (f_enter==NULL) {
  error("Cannot open file %s\n",snt_files->enter_pos);
}
else {
  long size=get_file_size(f_enter);
  enter_pos=(int*)malloc(size);
  if (enter_pos==NULL) {
    alloc_error("main_Concord");
    u_fclose(f_enter);
    free_text_tokens(tok);
    af_close_mapfile(text);
    free_snt_files(snt_files);
    u_fclose(concor);
    free_conc_opt(concord_options);
    return ALLOC_ERROR_CODE;     
  }
  n_enter_char=(int)fread(enter_pos,sizeof(int),size/sizeof(int),f_enter);
  if (n_enter_char!=(int)(size/sizeof(int))) {
    error("Read error on enter.pos file in main_Concord\n");
    u_fclose(f_enter);
    free(enter_pos);
    free_text_tokens(tok);
    af_close_mapfile(text);
    free_snt_files(snt_files);
    u_fclose(concor);
    free_conc_opt(concord_options);
    return DEFAULT_ERROR_CODE;
  }
  u_fclose(f_enter);
}
if (concord_options->result_mode==INDEX_ || concord_options->result_mode==UIMA_ || 
    concord_options->result_mode==XML_ || concord_options->result_mode==XML_WITH_HEADER_ ||
    concord_options->result_mode==AXIS_) {
   /* We force some options for index, uima and axis files */
   concord_options->left_context=0;
   concord_options->right_context=0;
   concord_options->sort_mode=TEXT_ORDER;
}
if (concord_options->only_ambiguous && concord_options->result_mode!=LEMMATIZE_) {
  /* We force text order when displaying only ambiguous outputs */
  concord_options->sort_mode=TEXT_ORDER;
}
if (concord_options->result_mode==HTML_ || concord_options->result_mode==DIFF_ || concord_options->result_mode==LEMMATIZE_) {
  /* We need the offset file if and only if we have to produce
   * an html concordance with positions in .snt file */
  concord_options->snt_offsets=load_snt_offsets(snt_files->snt_offsets_pos);
  if (concord_options->snt_offsets==NULL) {
    error("Cannot read snt offset file %s\n",snt_files->snt_offsets_pos);
    free(enter_pos);    
    free_text_tokens(tok);
    af_close_mapfile(text);
    free_snt_files(snt_files);
    u_fclose(concor);
    free_conc_opt(concord_options);
    return DEFAULT_ERROR_CODE;    
  }
}
if (offset_file[0]!='\0') {
  concord_options->uima_offsets=load_uima_offsets(&vec,offset_file);
  if (concord_options->uima_offsets==NULL) {
    error("Cannot read offset file %s\n",offset_file);
    free(enter_pos); 
    free_text_tokens(tok);
    af_close_mapfile(text);
    free_snt_files(snt_files);
    u_fclose(concor);
    free_conc_opt(concord_options);
    return DEFAULT_ERROR_CODE;    
  }
}
if (PRLG[0]!='\0') {
  concord_options->PRLG_data=load_PRLG_data(&vec,PRLG);
  if (concord_options->PRLG_data==NULL) {
    error("Cannot read PRLG file %s\n",PRLG);
    free(enter_pos);
    free_text_tokens(tok);
    af_close_mapfile(text);
    free_snt_files(snt_files);
    u_fclose(concor);
    free_conc_opt(concord_options);
    return DEFAULT_ERROR_CODE;    
  }
}
if (concord_options->result_mode==CSV_) {
  concord_options->sort_mode=TEXT_ORDER;
  concord_options->only_matches=1;
}

/* Once we have set all parameters, we call the function that
 * will actually create the concordance. */
create_concordance(&vec,concor,text,tok,n_enter_char,enter_pos,concord_options);

free(enter_pos);
free_text_tokens(tok);
af_close_mapfile(text);
free_snt_files(snt_files);
u_fclose(concor);
free_conc_opt(concord_options);

u_printf("Done.\n");

return SUCCESS_RETURN_CODE;
}
コード例 #21
0
int main_Uncompress(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

VersatileEncodingConfig vec=VEC_DEFAULT;
int val,index=-1;
char output[FILENAME_MAX]="";
bool only_verify_arguments = false;
UnitexGetOpt options;
while (EOF!=(val=options.parse_long(argc,argv,optstring_Uncompress,lopts_Uncompress,&index))) {
   switch(val) {
   case 'o': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty output file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(output,options.vars()->optarg);
             break;
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case 'V': only_verify_arguments = true;
             break;             
   case 'h': usage(); 
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_Uncompress[index].name);
             return USAGE_ERROR_CODE;                         
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

if (output[0]=='\0') {
   remove_extension(argv[options.vars()->optind],output);
   strcat(output,".dic");
}

U_FILE* f=u_fopen(&vec,output,U_WRITE);
if (f==NULL) {
   error("Cannot open file %s\n",output);
   return DEFAULT_ERROR_CODE;
}

char inf_file[FILENAME_MAX];
remove_extension(argv[options.vars()->optind],inf_file);
strcat(inf_file,".inf");
u_printf("Uncompressing %s...\n",argv[options.vars()->optind]);
Dictionary* d=new_Dictionary(&vec,argv[options.vars()->optind],inf_file);

if (d!=NULL) {
  rebuild_dictionary(d,f);
}

u_fclose(f);
free_Dictionary(d);
u_printf("Done.\n");

return SUCCESS_RETURN_CODE;
}
コード例 #22
0
int main_RebuildTfst(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

VersatileEncodingConfig vec=VEC_DEFAULT;
int val, index=-1;
bool only_verify_arguments = false;
UnitexGetOpt options;
int save_statistics=1;
while (EOF!=(val=options.parse_long(argc,argv,optstring_RebuildTfst,lopts_RebuildTfst,&index))) {
   switch (val) {
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case 'S': save_statistics = 0;
             break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h':
      usage();
      return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n", options.vars()->optopt) :
                         error("Missing argument for option --%s\n", lopts_RebuildTfst[index].name);
     return USAGE_ERROR_CODE;
   case '?': index==-1 ? error("Invalid option -%c\n", options.vars()->optopt) :
                         error("Invalid option --%s\n", options.vars()->optarg);
     return USAGE_ERROR_CODE;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

char input_tfst[FILENAME_MAX];
char input_tind[FILENAME_MAX];
strcpy(input_tfst,argv[options.vars()->optind]);
remove_extension(input_tfst,input_tind);
strcat(input_tind,".tind");

u_printf("Loading %s...\n",input_tfst);

Tfst* tfst = open_text_automaton(&vec,input_tfst);
if (tfst==NULL) {
   error("Unable to load %s automaton\n",input_tfst);
   return DEFAULT_ERROR_CODE;
}

char basedir[FILENAME_MAX];
get_path(input_tfst,basedir);
char output_tfst[FILENAME_MAX];
sprintf(output_tfst, "%s.new.tfst",input_tfst);
char output_tind[FILENAME_MAX];
sprintf(output_tind, "%s.new.tind",input_tfst);

U_FILE* f_tfst;
if ((f_tfst = u_fopen(&vec,output_tfst,U_WRITE)) == NULL) {
   error("Unable to open %s for writing\n", output_tfst);
   close_text_automaton(tfst);
   return DEFAULT_ERROR_CODE;
}

U_FILE* f_tind;
if ((f_tind = u_fopen(BINARY,output_tind,U_WRITE)) == NULL) {
   u_fclose(f_tfst);
   close_text_automaton(tfst);
   error("Unable to open %s for writing\n", output_tind);
   return DEFAULT_ERROR_CODE;
}
/* We use this hash table to rebuild files tfst_tags_by_freq/alph.txt */
struct hash_table* form_frequencies=new_hash_table((HASH_FUNCTION)hash_unichar,(EQUAL_FUNCTION)u_equal,
        (FREE_FUNCTION)free,NULL,(KEYCOPY_FUNCTION)keycopy);

u_fprintf(f_tfst,"%010d\n",tfst->N);
for (int i = 1; i <= tfst->N; i++) {
   if ((i % 100) == 0) {
      u_printf("%d/%d sentences rebuilt...\n", i, tfst->N);
   }
   load_sentence(tfst,i);

   char grfname[FILENAME_MAX];
   sprintf(grfname, "%ssentence%d.grf", basedir, i);
   unichar** tags=NULL;
   int n_tags=-1;
   if (fexists(grfname)) {
      /* If there is a .grf for the current sentence, then we must
       * take it into account */
      if (0==pseudo_main_Grf2Fst2(&vec,grfname,0,NULL,1,1,NULL,NULL,0)) {
         /* We proceed only if the graph compilation was a success */
         char fst2name[FILENAME_MAX];
         sprintf(fst2name, "%ssentence%d.fst2", basedir, i);
         struct FST2_free_info fst2_free;
         Fst2* fst2=load_abstract_fst2(&vec,fst2name,0,&fst2_free);
         af_remove(fst2name);
         free_SingleGraph(tfst->automaton,NULL);
         tfst->automaton=create_copy_of_fst2_subgraph(fst2,1);
         tags=create_tfst_tags(fst2,&n_tags);
         free_abstract_Fst2(fst2,&fst2_free);
      } else {
         error("Error: %s is not a valid sentence automaton\n",grfname);
      }
   }
   save_current_sentence(tfst,f_tfst,f_tind,tags,n_tags,form_frequencies);
   if (tags!=NULL) {
      /* If necessary, we free the tags we created */
      for (int count_tags=0;count_tags<n_tags;count_tags++) {
         free(tags[count_tags]);
      }
      free(tags);
   }
}

u_printf("Text automaton rebuilt.\n");

u_fclose(f_tind);
u_fclose(f_tfst);
close_text_automaton(tfst);

/* Finally, we save statistics */
if (save_statistics) {
    char tfst_tags_by_freq[FILENAME_MAX];
    char tfst_tags_by_alph[FILENAME_MAX];
    strcpy(tfst_tags_by_freq, basedir);
    strcat(tfst_tags_by_freq, "tfst_tags_by_freq.txt");
    strcpy(tfst_tags_by_alph, basedir);
    strcat(tfst_tags_by_alph, "tfst_tags_by_alph.txt");
    U_FILE* f_tfst_tags_by_freq = u_fopen(&vec, tfst_tags_by_freq, U_WRITE);
    if (f_tfst_tags_by_freq == NULL) {
        error("Cannot open %s\n", tfst_tags_by_freq);
    }
    U_FILE* f_tfst_tags_by_alph = u_fopen(&vec, tfst_tags_by_alph, U_WRITE);
    if (f_tfst_tags_by_alph == NULL) {
        error("Cannot open %s\n", tfst_tags_by_alph);
    }
    sort_and_save_tfst_stats(form_frequencies, f_tfst_tags_by_freq, f_tfst_tags_by_alph);
    u_fclose(f_tfst_tags_by_freq);
    u_fclose(f_tfst_tags_by_alph);
}
free_hash_table(form_frequencies);

/* make a backup and replace old automaton with new */
char backup_tfst[FILENAME_MAX];
char backup_tind[FILENAME_MAX];
sprintf(backup_tfst,"%s.bck",input_tfst);
sprintf(backup_tind,"%s.bck",input_tind);
/* We remove the existing backup files, if any */
af_remove(backup_tfst);
af_remove(backup_tind);
af_rename(input_tfst,backup_tfst);
af_rename(input_tind,backup_tind);
af_rename(output_tfst,input_tfst);
af_rename(output_tind,input_tind);
u_printf("\nYou can find a backup of the original files in:\n    %s\nand %s\n",
         backup_tfst,backup_tind);

return SUCCESS_RETURN_CODE;
}
コード例 #23
0
/*
 * This function behaves in the same way that a main one, except that it does
 * not invoke the setBufferMode function.
 */
int main_LocateTfst(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

VersatileEncodingConfig vec=VEC_DEFAULT;
int val,index=-1;
char text[FILENAME_MAX]="";
char alphabet[FILENAME_MAX]="";
int is_korean=0;
int tilde_negation_operator=1;
int selected_negation_operator=0;
int tagging=0;
int single_tags_only=0;
int match_word_boundaries=1;
MatchPolicy match_policy=LONGEST_MATCHES;
OutputPolicy output_policy=IGNORE_OUTPUTS;
AmbiguousOutputPolicy ambiguous_output_policy=ALLOW_AMBIGUOUS_OUTPUTS;
VariableErrorPolicy variable_error_policy=IGNORE_VARIABLE_ERRORS;
int search_limit=NO_MATCH_LIMIT;
char foo;
vector_ptr* injected=new_vector_ptr();
bool only_verify_arguments = false;
UnitexGetOpt options;
while (EOF!=(val=options.parse_long(argc,argv,optstring_LocateTfst,lopts_LocateTfst,&index))) {
   switch(val) {
   case 't': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty .tfst name\n");
                free_vector_ptr(injected);
                return USAGE_ERROR_CODE;
             }
             strcpy(text,options.vars()->optarg);
             break;
   case 'a': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty alphabet name\n");
                free_vector_ptr(injected);
                return USAGE_ERROR_CODE;
             }
             strcpy(alphabet,options.vars()->optarg);
             break;
   case 'K': is_korean=1;
   	   	   	  match_word_boundaries=0;
              break;
   case 'l': search_limit=NO_MATCH_LIMIT; break;
   case 'g': if (options.vars()->optarg[0]=='\0') {
                error("You must specify an argument for negation operator\n");
                free_vector_ptr(injected);
                return USAGE_ERROR_CODE;
             }
             selected_negation_operator=1;
             if ((strcmp(options.vars()->optarg,"minus")==0) || (strcmp(options.vars()->optarg,"-")==0)) {
                 tilde_negation_operator=0;
             }
             else
             if ((strcmp(options.vars()->optarg,"tilde")!=0) && (strcmp(options.vars()->optarg,"~")!=0)) {
                 error("You must specify a valid argument for negation operator\n");
                 free_vector_ptr(injected);
                 return USAGE_ERROR_CODE;                 
             }
             break;
   case 'n': if (1!=sscanf(options.vars()->optarg,"%d%c",&search_limit,&foo) || search_limit<=0) {
                /* foo is used to check that the search limit is not like "45gjh" */
                error("Invalid search limit argument: %s\n",options.vars()->optarg);
                free_vector_ptr(injected);
                return USAGE_ERROR_CODE;                
             }
             break;
   case 'S': match_policy=SHORTEST_MATCHES; break;
   case 'L': match_policy=LONGEST_MATCHES; break;
   case 'A': match_policy=ALL_MATCHES; break;
   case 'I': output_policy=IGNORE_OUTPUTS; break;
   case 'M': output_policy=MERGE_OUTPUTS; break;
   case 'R': output_policy=REPLACE_OUTPUTS; break;
   case 'X': variable_error_policy=EXIT_ON_VARIABLE_ERRORS; break;
   case 'Y': variable_error_policy=IGNORE_VARIABLE_ERRORS; break;
   case 'Z': variable_error_policy=BACKTRACK_ON_VARIABLE_ERRORS; break;
   case 'b': ambiguous_output_policy=ALLOW_AMBIGUOUS_OUTPUTS; break;
   case 'z': ambiguous_output_policy=IGNORE_AMBIGUOUS_OUTPUTS; break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage(); 
             return SUCCESS_RETURN_CODE;
   case 1: tagging=1; break;
   case 2: single_tags_only=1; break;
   case 3: match_word_boundaries=0; break;
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                free_vector_ptr(injected);
                return USAGE_ERROR_CODE;                
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                free_vector_ptr(injected);
                return USAGE_ERROR_CODE;                
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case 'v': {
	   unichar* key=u_strdup(options.vars()->optarg);
	   unichar* value=u_strchr(key,'=');
	   if (value==NULL) {
		   error("Invalid variable injection: %s\n",options.vars()->optarg);
       free_vector_ptr(injected);
       return USAGE_ERROR_CODE;       
	   }
	   (*value)='\0';
	   value++;
	   value=u_strdup(value);
	   vector_ptr_add(injected,key);
	   vector_ptr_add(injected,value);
	   break;
   }
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_LocateTfst[index].name);
             free_vector_ptr(injected);
             return USAGE_ERROR_CODE;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             free_vector_ptr(injected);
             return USAGE_ERROR_CODE;
             break;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   free_vector_ptr(injected);
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  free_vector_ptr(injected);
  return SUCCESS_RETURN_CODE;
}

if (selected_negation_operator==0) {
    get_graph_compatibility_mode_by_file(&vec,&tilde_negation_operator);
}

char grammar[FILENAME_MAX];
char output[FILENAME_MAX];
strcpy(grammar,argv[options.vars()->optind]);
get_path(text,output);
strcat(output,"concord.ind");

int OK=locate_tfst(text,
                   grammar,
                   alphabet,
                   output,
                   &vec,
                   match_policy,
                   output_policy,
                   ambiguous_output_policy,
                   variable_error_policy,
                   search_limit,
                   is_korean,
                   tilde_negation_operator,
                   injected,
                   tagging,
                   single_tags_only,
                   match_word_boundaries);

free_vector_ptr(injected);

return (!OK);
}
コード例 #24
0
int main_DuplicateFile(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

const char *input_file = NULL;
const char *output_file = NULL;
int do_delete=0;
int do_recursive_delete=0;
int do_move=0;
int do_make_dir=0;
int do_make_dir_parent=0;
int val,index=-1;
bool only_verify_arguments = false;
UnitexGetOpt options;

while (EOF!=(val=options.parse_long(argc,argv,optstring_DuplicateFile,lopts_DuplicateFile,&index))) {
   switch(val) {
   case 'a': do_make_dir = 1; break;
   case 'p': do_make_dir_parent = 1; break;
   case 'd': do_delete = 1; break;
   case 'r': do_delete = do_recursive_delete = 1; break;
   case 'i': if (options.vars()->optarg[0]=='\0') {
                error("Empty input argument\n");
                return USAGE_ERROR_CODE;
             }
             input_file = options.vars()->optarg;
             break;
   case 'm': if (options.vars()->optarg[0]=='\0') {
                error("Empty move argument\n");
                return USAGE_ERROR_CODE;
             }
             input_file = options.vars()->optarg;
             do_move=1;
             break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage();
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt):
                         error("Missing argument for option --%s\n",lopts_DuplicateFile[index].name);
             return USAGE_ERROR_CODE;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
   case 'k':
   case 'q': /* ignore -k and -q parameter instead to raise an error */
             break;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}

output_file = argv[options.vars()->optind];

if ((input_file==NULL) && (do_delete==0) && (do_make_dir==0) && (do_make_dir_parent ==0)) {
   error("You must specify the input_file file\n");
   return USAGE_ERROR_CODE;
}

if ((input_file!=NULL) && (do_delete==1)) {
   error("You cannot specify input_file when delete\n");
   return USAGE_ERROR_CODE;
}
if (output_file==NULL) {
   error("You must specify the output_file file\n");
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

int result = 0;
if (input_file != NULL) {
    if (do_move == 0) {
        u_printf("copy file %s to %s\n",input_file,output_file);
        /* af_copy return 0 if success, -1 with reading problem, 1 writing problem */
        result=af_copy(input_file,output_file);
    } else {
        u_printf("move file %s to %s\n",input_file,output_file);
        result=af_rename(input_file,output_file);
    }
} else if (do_make_dir != 0) {
    u_printf("make dir %s\n", output_file);
    result = mkDirPortable(output_file);
} else if (do_make_dir_parent != 0) {
    u_printf("make dir %s with parent\n", output_file);
    result = mkDirRecursiveIfNeeded(output_file);
} else {
    if (do_recursive_delete == 0) {
        u_printf("remove file %s\n",output_file);
        result=af_remove(output_file);
    } else {
        u_printf("remove folder %s\n", output_file);
        af_remove_folder(output_file);
        result=0;
    }
}
u_printf((result==0) ? "Done.\n" : "Unsucessfull.\n");
return result;
}