Exemplo n.º 1
0
int initialize_working_directory(const char *text,int must_create_directory){
	char path[FILENAME_MAX];
	get_path(text,path);

	char canonical_name[FILENAME_MAX];
	remove_path_and_extension(text, canonical_name);

	char extension[FILENAME_MAX];
	get_extension(text,extension);

	char working_directory[FILENAME_MAX];
	sprintf(working_directory, "%s%s%s%c",path, canonical_name, CASSYS_DIRECTORY_EXTENSION, PATH_SEPARATOR_CHAR);

	if (must_create_directory != 0) {
        make_directory(working_directory);
    }

	char text_in_wd[FILENAME_MAX];
	sprintf(text_in_wd, "%s%s_0%s",working_directory,canonical_name,extension );
	copy_file(text_in_wd,text);

	char snt_dir_text_in_wd[FILENAME_MAX];
	get_snt_path(text_in_wd, snt_dir_text_in_wd);
    if (must_create_directory != 0) {
        make_directory(snt_dir_text_in_wd);
    }

	char original_snt_dir[FILENAME_MAX];
	get_snt_path(text,original_snt_dir);
	copy_directory_snt_content(snt_dir_text_in_wd, original_snt_dir);

	return 0;
}
Exemplo n.º 2
0
int initialize_working_directory_before_tokenize(const char*text, int must_create_directory)
{
	char snt_dir[FILENAME_MAX];
	get_snt_path(text, snt_dir);
	if (must_create_directory != 0) {
		make_cassys_directory(snt_dir);
	}

	return 0;
}
Exemplo n.º 3
0
int main_Untokenize(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

char alphabet[FILENAME_MAX]="";
char token_file[FILENAME_MAX]="";
char dynamicSntDir[FILENAME_MAX]="";
VersatileEncodingConfig vec=VEC_DEFAULT;
int val,index=-1;
int range_start,range_stop,use_range;
int token_step_number=0;
range_start=range_stop=use_range=0;
char foo=0;
bool only_verify_arguments = false;
UnitexGetOpt options;
while (EOF!=(val=options.parse_long(argc,argv,optstring_Untokenize,lopts_Untokenize,&index))) {
   switch(val) {
   case 'a': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty alphabet file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(alphabet,options.vars()->optarg);
             break;
   case 'd': if (options.vars()->optarg[0]=='\0') {
                   error("You must specify a non empty snt dir name\n");
                   return USAGE_ERROR_CODE;
                }
                strcpy(dynamicSntDir,options.vars()->optarg);
                break;
   case 't': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty token file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(token_file,options.vars()->optarg);
             break;
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;

   case 'n': if (1!=sscanf(options.vars()->optarg,"%d%c",&token_step_number,&foo) || token_step_number<=0) {
                /* foo is used to check that the search limit is not like "45gjh" */
                error("Invalid token numbering argument: %s\n",options.vars()->optarg);
                return USAGE_ERROR_CODE;
             }
             break;
   case 'r': {
                int param1 = 0;
                int param2 = 0;
                int ret_scan = sscanf(options.vars()->optarg,"%d,%d%c",&param1,&param2,&foo);
                if (ret_scan == 2) {
                    range_start = param1;
                    range_stop  = param2;
                    use_range=1;
                    if (((range_start < -1)) || (range_stop < -1)) {
                        /* foo is used to check that the search limit is not like "45gjh" */
                        error("Invalid stop count argument: %s\n",options.vars()->optarg);
                        return USAGE_ERROR_CODE;
                    }
                }
                else
                    if (1!=sscanf(options.vars()->optarg,"%d%c",&range_start,&foo) || (range_start < -1)) {
                        /* foo is used to check that the search limit is not like "45gjh" */
                        error("Invalid stop count argument: %s\n",options.vars()->optarg);
                        return USAGE_ERROR_CODE;
                    }
                    use_range=1;
             }
             break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage(); 
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_Untokenize[index].name);
             return USAGE_ERROR_CODE;            
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

char tokens_txt[FILENAME_MAX];
char text_cod[FILENAME_MAX];
char enter_pos[FILENAME_MAX];

if (dynamicSntDir[0]=='\0') {
    get_snt_path(argv[options.vars()->optind],dynamicSntDir);
}

strcpy(text_cod,dynamicSntDir);
strcat(text_cod,"text.cod");
strcpy(enter_pos,dynamicSntDir);
strcat(enter_pos,"enter.pos");
strcpy(tokens_txt,dynamicSntDir);
strcat(tokens_txt,"tokens.txt");

Alphabet* alph=NULL;
if (alphabet[0]!='\0') {
   alph=load_alphabet(&vec,alphabet);
   if (alph==NULL) {
      error("Cannot load alphabet file %s\n",alphabet);
      return DEFAULT_ERROR_CODE;
   }
}

ABSTRACTMAPFILE* af_text_cod=af_open_mapfile(text_cod,MAPFILE_OPTION_READ,0);
if (af_text_cod==NULL) {
  error("Cannot open file %s\n",text_cod);
  free_alphabet(alph);
  return DEFAULT_ERROR_CODE;
}

ABSTRACTMAPFILE* af_enter_pos=af_open_mapfile(enter_pos,MAPFILE_OPTION_READ,0);
if (af_enter_pos==NULL) {
  error("Cannot open file %s\n",enter_pos);
  af_close_mapfile(af_text_cod);
  free_alphabet(alph);
  return DEFAULT_ERROR_CODE;
}

U_FILE* text = u_fopen(&vec,argv[options.vars()->optind],U_WRITE);
if (text==NULL) {
  error("Cannot create text file %s\n",argv[options.vars()->optind]);
  af_close_mapfile(af_enter_pos);
  af_close_mapfile(af_text_cod);
  free_alphabet(alph);
  return DEFAULT_ERROR_CODE;
}

struct text_tokens* tok=load_text_tokens(&vec,tokens_txt);
u_printf("Untokenizing text...\n");
size_t nb_item = af_get_mapfile_size(af_text_cod)/sizeof(int);
const int* buf=(const int*)af_get_mapfile_pointer(af_text_cod);

size_t nb_item_enter_pos=0;
const int* buf_enter=NULL;

if (af_enter_pos!=NULL) {
    buf_enter=(const int*)af_get_mapfile_pointer(af_enter_pos);
    if (buf_enter!=NULL) {
        nb_item_enter_pos=af_get_mapfile_size(af_enter_pos)/sizeof(int);
    }
}

size_t count_pos=0;
for (size_t i=0;i<nb_item;i++) {
    int is_in_range=1;
    if ((use_range!=0) && (i<(size_t)range_start)) {
        is_in_range=0;
    }
    if ((use_range!=0) && (range_stop!=0) && (i>(size_t)range_stop)) {
        is_in_range=0;
    }
    int is_newline=0;
    if (count_pos<nb_item_enter_pos) {
        if (i==(size_t)(*(buf_enter+count_pos))) {
            is_newline = 1;
            count_pos++;
        }
    }

    if (is_in_range!=0) {
        if (token_step_number != 0)
            if ((i%token_step_number)==0)
                u_fprintf(text,"\n\nToken %d : ", (int)i);

        if (is_newline!=0) {
            u_fprintf(text,"\n", tok->token[*(buf+i)]);
        }
        else {
			u_fputs(tok->token[*(buf+i)], text);
        }
    }
}

af_release_mapfile_pointer(af_text_cod,buf);
af_release_mapfile_pointer(af_enter_pos,buf_enter);
af_close_mapfile(af_enter_pos);
af_close_mapfile(af_text_cod);
free_text_tokens(tok);
u_fclose(text);
free_alphabet(alph);

u_printf("\nDone.\n");
return SUCCESS_RETURN_CODE;
}
Exemplo n.º 4
0
int main_SpellCheck(int argc,char* const argv[]) {
if (argc==1) {
    usage();
    return SUCCESS_RETURN_CODE;
}

VersatileEncodingConfig vec=VEC_DEFAULT;
int val,index=-1;
char mode=0;
char snt[FILENAME_MAX]="";
char txt[FILENAME_MAX]="";
char output[FILENAME_MAX]="";
char output_set=0;
char output_op='A';
SpellCheckConfig config;
config.max_errors=1;
config.max_SP_INSERT=1;
config.max_SP_SUPPR=1;
config.max_SP_SWAP=1;
config.max_SP_CHANGE=1;
for (int i=0;i<N_SPSubOp;i++) {
    config.score[i]=default_scores[i];
}
config.min_length1=4;
config.min_length2=6;
config.min_length3=12;
config.input_op='D';
config.keyboard=NULL;
config.allow_uppercase_initial=0;
char foo;
bool only_verify_arguments = false;
UnitexGetOpt options;
while (EOF!=(val=options.parse_long(argc,argv,optstring_SpellCheck,lopts_SpellCheck,&index))) {
   switch(val) {
   case 's': {
       strcpy(snt,options.vars()->optarg);
       mode='s';
       break;
   }
   case 'f': {
       strcpy(txt,options.vars()->optarg);
       mode='f';
       break;
   }
   case 'o': {
       if (options.vars()->optarg!=NULL) {
           strcpy(output,options.vars()->optarg);
       }
       output_set=1;
       break;
   }
   case 'I': {
       if (!strcmp(options.vars()->optarg,"D") || !strcmp(options.vars()->optarg,"M") || !strcmp(options.vars()->optarg,"U")) {
           config.input_op=options.vars()->optarg[0];
       } else {
       error("Invalid argument %s for option --input-op: should in [DMU]\n",options.vars()->optarg);
       return USAGE_ERROR_CODE;
       }
       break;
   }
   case 'O': {
       if (!strcmp(options.vars()->optarg,"O") || !strcmp(options.vars()->optarg,"A")) {
           output_op=options.vars()->optarg[0];
       } else {
           error("Invalid argument %s for option --output-op: should in [OA]\n",options.vars()->optarg);
       return USAGE_ERROR_CODE;
       }
       break;
   }
   case 1: {
       config.keyboard=get_Keyboard(options.vars()->optarg);
       if (config.keyboard==NULL) {
           error("Invalid argument %s for option --keyboard:\nUse --show-keyboards to see possible values\n",options.vars()->optarg);
       return USAGE_ERROR_CODE;
       }
       break;
   }
   case 2: {
       print_available_keyboards(U_STDOUT);
       return SUCCESS_RETURN_CODE;
   }
   case 10: {
       if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_errors,&foo)) {
           error("Invalid argument %s for --max-errors: should be an integer >=0\n",options.vars()->optarg);
       return USAGE_ERROR_CODE;
       }
       break;
   }
   case 11: {
       if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_INSERT,&foo)) {
           error("Invalid argument %s for --max-insert: should be an integer >=0\n",options.vars()->optarg);
       return USAGE_ERROR_CODE;
       }
       break;
   }
   case 12: {
       if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_SUPPR,&foo)) {
           error("Invalid argument %s for --max-suppr: should be an integer >=0\n",options.vars()->optarg);
       return USAGE_ERROR_CODE;
       }
       break;
   }
   case 13: {
       if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_CHANGE,&foo)) {
           error("Invalid argument %s for --max-change: should be an integer >=0\n",options.vars()->optarg);
       return USAGE_ERROR_CODE;
       }
       break;
   }
   case 14: {
       if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_SWAP,&foo)) {
           error("Invalid argument %s for --max-swap: should be an integer >=0\n",options.vars()->optarg);
       return USAGE_ERROR_CODE;
       }
       break;
   }
   case 20: {
       int* scores=config.score;
       if (N_SPSubOp!=sscanf(options.vars()->optarg,"%d,%d,%d,%d,%d,%d,%d,%d,%d%c",
            scores,scores+1,scores+2,scores+3,scores+4,scores+5,
            scores+6,scores+7,scores+8,&foo)) {
            error("Invalid argument %s for option --scores. See --help-scores\n",options.vars()->optarg);
        return USAGE_ERROR_CODE;
       }
       break;
   }
   case 21: {
       usage_scores();
       return SUCCESS_RETURN_CODE;
   }
   case 22: {
       if (3!=sscanf(options.vars()->optarg,"%u,%u,%u%c",
            &config.min_length1,&config.min_length2,&config.min_length3,&foo)) {
            error("Invalid argument %s for option --min-lengths\n",options.vars()->optarg);
        return USAGE_ERROR_CODE;
       }
       break;
   }
   case 23: {
       if (!strcmp(options.vars()->optarg,"yes")) {
           config.allow_uppercase_initial=1;
       } else if (!strcmp(options.vars()->optarg,"no")) {
           config.allow_uppercase_initial=0;
       } else {
           error("Invalid argument %s for option --upper-initial\n",options.vars()->optarg);
       return USAGE_ERROR_CODE;
       }
       break;
   }
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage();
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_SpellCheck[index].name);
             return USAGE_ERROR_CODE;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
   }
   index=-1;
}

if (options.vars()->optind==argc) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}

if (mode==0) {
  error("You must use either --snt or --file\n");
  return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

config.n_dics=argc-options.vars()->optind;
config.dics=(Dictionary**)malloc(config.n_dics*sizeof(Dictionary*));
if (config.dics==NULL) {
    alloc_error("main_SpellCheck");
  return ALLOC_ERROR_CODE;
}

for (int i=0;i<config.n_dics;i++) {
    config.dics[i]=new_Dictionary(&vec,argv[i+options.vars()->optind]);
    if (config.dics[i]==NULL) {
        error("Cannot load dictionary %s\n",argv[i+options.vars()->optind]);
    }
}

config.out=U_STDOUT;
config.n_input_lines=0;
config.n_output_lines=0;

if (mode=='s') {
    /* When working with a .snt, we actually want to work on its err file */
    get_snt_path(snt,txt);
    strcat(txt,"err");
    /* the output must be dlf, and we note the number of lines in the existing
     * dlf file, if any */
    get_snt_path(snt,output);
    strcat(output,"dlf.n");
    U_FILE* f=u_fopen(&vec,output,U_READ);
    if (f!=NULL) {
        u_fscanf(f,"%d",&(config.n_output_lines));
        u_fclose(f);
    }
    get_snt_path(snt,output);
    strcat(output,"dlf");
    output_set=1;
    /* and we force the values for -I and -O */
    config.input_op='U';
    output_op='A';
} else {
    /* If mode=='f', we don't have anything to do since we already
     * defined the default output to stdout */
}

if (output_set) {
    if (output_op=='O') {
        config.out=u_fopen(&vec,output,U_WRITE);
    } else {
        config.out=u_fopen(&vec,output,U_APPEND);
    }
    if (config.out==NULL) {
        error("Cannot open output file %s\n",output);
    for (int i=0;i<config.n_dics;i++) {
      free_Dictionary(config.dics[i]);
    }
    free(config.dics);
    return DEFAULT_ERROR_CODE;
    }
}

config.modified_input=NULL;
char modified_input[FILENAME_MAX]="";
if (config.input_op!='D') {
    strcpy(modified_input,txt);
    strcat(modified_input,".tmp");
    config.modified_input=u_fopen(&vec,modified_input,U_WRITE);
    if (config.modified_input==NULL) {
        error("Cannot open tmp file %s\n",modified_input);
    if (config.out!=U_STDOUT) {
      u_fclose(config.out);
    }
    for (int i=0;i<config.n_dics;i++) {
      free_Dictionary(config.dics[i]);
    }
    free(config.dics);
    return DEFAULT_ERROR_CODE;
    }
}

config.in=u_fopen(&vec,txt,U_READ);
if (config.in==NULL) {
    error("Cannot open file %s\n",txt);
  u_fclose(config.modified_input);
  if (config.out!=U_STDOUT) {
    u_fclose(config.out);
  }
  for (int i=0;i<config.n_dics;i++) {
    free_Dictionary(config.dics[i]);
  }
  free(config.dics);
  return DEFAULT_ERROR_CODE;
}

/* We perform spellchecking */
spellcheck(&config);

/* And we clean */

u_fclose(config.in);

if (config.modified_input!=NULL) {
  /* If we used a tmp file because the input file has to be modified,
   * it's now time to actually modify it */
  u_fclose(config.modified_input);
  af_remove(txt);
  af_rename(modified_input,txt);
}

if (config.out!=U_STDOUT) {
  u_fclose(config.out);
}

for (int i=0;i<config.n_dics;i++) {
  free_Dictionary(config.dics[i]);
}
free(config.dics);

/* Finally, we update the dlf.n and err.n files if mode=='s' */
if (mode=='s') {
    get_snt_path(snt,output);
    strcat(output,"err.n");
    U_FILE* f=u_fopen(&vec,output,U_WRITE);
    if (f!=NULL) {
        u_fprintf(f,"%d",config.n_input_lines);
        u_fclose(f);
    }
    if (config.input_op!='D') {
        get_snt_path(snt,output);
        strcat(output,"dlf.n");
        U_FILE* fw=u_fopen(&vec,output,U_WRITE);
        if (fw!=NULL) {
            u_fprintf(fw,"%d",config.n_output_lines);
            u_fclose(fw);
        }
    }
}

return SUCCESS_RETURN_CODE;
}
Exemplo n.º 5
0
char* create_labeled_files_and_directory(const char *text, int next_transducer_label,int must_create_directory,int must_copy_file) {
	char path[FILENAME_MAX];
	get_path(text, path);

	char canonical_text_name[FILENAME_MAX];
	remove_path_and_extension(text, canonical_text_name);

	char extension[FILENAME_MAX];
	get_extension(text, extension);

	char working_directory[FILENAME_MAX];
	sprintf(working_directory, "%s%s%s%c", path, canonical_text_name,
			CASSYS_DIRECTORY_EXTENSION, PATH_SEPARATOR_CHAR);

	// copy the text label i- to i
	char old_labeled_text_name[FILENAME_MAX];
	sprintf(old_labeled_text_name, "%s%s_%d%s", working_directory,
			canonical_text_name, next_transducer_label - 1, extension);

	char new_labeled_text_name[FILENAME_MAX];
	sprintf(new_labeled_text_name, "%s%s_%d%s", working_directory,
			canonical_text_name, next_transducer_label, extension);

	char new_labeled_snt_directory[FILENAME_MAX];
	get_snt_path(new_labeled_text_name, new_labeled_snt_directory);
    if (must_create_directory != 0) {
        make_directory(new_labeled_snt_directory);
    }

    if (must_copy_file != 0)
    {
	    copy_file(new_labeled_text_name, old_labeled_text_name);

	    // create snt directory labeled i
	    char old_labeled_snt_directory[FILENAME_MAX];
	    get_snt_path(old_labeled_text_name, old_labeled_snt_directory);


	    // copy dictionary files in the new snt directory
	    struct snt_files *old_snt_ = new_snt_files(old_labeled_text_name);
	    struct snt_files *new_snt_ = new_snt_files(new_labeled_text_name);

	    if (fexists(old_snt_->dlc)) {
		    copy_file(new_snt_->dlc, old_snt_->dlc);
	    }
	    if (fexists(old_snt_-> dlf)) {
		    copy_file(new_snt_->dlf, old_snt_->dlf);
	    }
	    if (fexists(old_snt_-> err)) {
		    copy_file(new_snt_->err, old_snt_->err);
	    }
	    if (fexists(old_snt_->dlc_n)) {
		    copy_file(new_snt_->dlc_n, old_snt_->dlc_n);
	    }
	    if (fexists(old_snt_->dlf_n)) {
		    copy_file(new_snt_->dlf_n, old_snt_->dlf_n);
	    }
	    if (fexists(old_snt_-> err_n)) {
		    copy_file(new_snt_->err_n, old_snt_->err_n);
	    }
	    if (fexists(old_snt_->stat_dic_n)) {
		    copy_file(new_snt_->stat_dic_n, old_snt_->stat_dic_n);
	    }
	    free_snt_files(old_snt_);
	    free_snt_files(new_snt_);
    }
	char *labeled_text_name;
	labeled_text_name = (char*)malloc(sizeof(char)*(strlen(new_labeled_text_name)+1));
	if(labeled_text_name == NULL){
		perror("malloc\n");
		fprintf(stderr,"Impossible to allocate memory\n");
		exit(1);
	}
	strcpy(labeled_text_name, new_labeled_text_name);
	return labeled_text_name;
}
Exemplo n.º 6
0
int main_Tokenize(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return 0;
}

char alphabet[FILENAME_MAX]="";
char token_file[FILENAME_MAX]="";

Encoding encoding_output = DEFAULT_ENCODING_OUTPUT;
int bom_output = DEFAULT_BOM_OUTPUT;
int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT;
int val,index=-1;
int mode=NORMAL;
struct OptVars* vars=new_OptVars();
while (EOF!=(val=getopt_long_TS(argc,argv,optstring_Tokenize,lopts_Tokenize,&index,vars))) {
   switch(val) {
   case 'a': if (vars->optarg[0]=='\0') {
                fatal_error("You must specify a non empty alphabet file name\n");
             }
             strcpy(alphabet,vars->optarg);
             break;
   case 'c': mode=CHAR_BY_CHAR; break;
   case 'w': mode=NORMAL; break;
   case 't': if (vars->optarg[0]=='\0') {
                fatal_error("You must specify a non empty token file name\n");
             }
             strcpy(token_file,vars->optarg);
             break;
   case 'k': if (vars->optarg[0]=='\0') {
                fatal_error("Empty input_encoding argument\n");
             }
             decode_reading_encoding_parameter(&mask_encoding_compatibility_input,vars->optarg);
             break;
   case 'q': if (vars->optarg[0]=='\0') {
                fatal_error("Empty output_encoding argument\n");
             }
             decode_writing_encoding_parameter(&encoding_output,&bom_output,vars->optarg);
             break;
   case 'h': usage(); return 0;
   case ':': if (index==-1) fatal_error("Missing argument for option -%c\n",vars->optopt);
             else fatal_error("Missing argument for option --%s\n",lopts_Tokenize[index].name);
   case '?': if (index==-1) fatal_error("Invalid option -%c\n",vars->optopt);
             else fatal_error("Invalid option --%s\n",vars->optarg);
             break;
   }
   index=-1;
}

if (vars->optind!=argc-1) {
   fatal_error("Invalid arguments: rerun with --help\n");
}
U_FILE* text;
U_FILE* out;
U_FILE* output;
U_FILE* enter;
char tokens_txt[FILENAME_MAX];
char text_cod[FILENAME_MAX];
char enter_pos[FILENAME_MAX];
Alphabet* alph=NULL;

get_snt_path(argv[vars->optind],text_cod);
strcat(text_cod,"text.cod");
get_snt_path(argv[vars->optind],tokens_txt);
strcat(tokens_txt,"tokens.txt");
get_snt_path(argv[vars->optind],enter_pos);
strcat(enter_pos,"enter.pos");
text=u_fopen_existing_versatile_encoding(mask_encoding_compatibility_input,argv[vars->optind],U_READ);
if (text==NULL) {
   fatal_error("Cannot open text file %s\n",argv[vars->optind]);
}
if (alphabet[0]!='\0') {
   alph=load_alphabet(alphabet);
   if (alph==NULL) {
      error("Cannot load alphabet file %s\n",alphabet);
      u_fclose(text);
      return 1;
   }
}
out=u_fopen(BINARY,text_cod,U_WRITE);
if (out==NULL) {
   error("Cannot create file %s\n",text_cod);
   u_fclose(text);
   if (alph!=NULL) {
      free_alphabet(alph);
   }
   return 1;
}
enter=u_fopen(BINARY,enter_pos,U_WRITE);
if (enter==NULL) {
   error("Cannot create file %s\n",enter_pos);
   u_fclose(text);
   u_fclose(out);
   if (alph!=NULL) {
      free_alphabet(alph);
   }
   return 1;
}


vector_ptr* tokens=new_vector_ptr(4096);
vector_int* n_occur=new_vector_int(4096);
vector_int* n_enter_pos=new_vector_int(4096);
struct hash_table* hashtable=new_hash_table((HASH_FUNCTION)hash_unichar,(EQUAL_FUNCTION)u_equal,
                                            (FREE_FUNCTION)free,NULL,(KEYCOPY_FUNCTION)keycopy);
if (token_file[0]!='\0') {
   load_token_file(token_file,mask_encoding_compatibility_input,tokens,hashtable,n_occur);
}

output=u_fopen_creating_versatile_encoding(encoding_output,bom_output,tokens_txt,U_WRITE);
if (output==NULL) {
   error("Cannot create file %s\n",tokens_txt);
   u_fclose(text);
   u_fclose(out);
   u_fclose(enter);
   if (alph!=NULL) {
      free_alphabet(alph);
   }

   free_hash_table(hashtable);
   free_vector_ptr(tokens,free);
   free_vector_int(n_occur);
   free_vector_int(n_enter_pos);

   return 1;
}
u_fprintf(output,"0000000000\n");

int SENTENCES=0;
int TOKENS_TOTAL=0;
int WORDS_TOTAL=0;
int DIGITS_TOTAL=0;
u_printf("Tokenizing text...\n");
if (mode==NORMAL) {
   normal_tokenization(text,out,output,alph,tokens,hashtable,n_occur,n_enter_pos,
		   &SENTENCES,&TOKENS_TOTAL,&WORDS_TOTAL,&DIGITS_TOTAL);
}
else {
   char_by_char_tokenization(text,out,output,alph,tokens,hashtable,n_occur,n_enter_pos,
		   &SENTENCES,&TOKENS_TOTAL,&WORDS_TOTAL,&DIGITS_TOTAL);
}
u_printf("\nDone.\n");
save_new_line_positions(enter,n_enter_pos);
u_fclose(enter);
u_fclose(text);
u_fclose(out);
u_fclose(output);
write_number_of_tokens(tokens_txt,encoding_output,bom_output,tokens->nbelems);
// we compute some statistics
get_snt_path(argv[vars->optind],tokens_txt);
strcat(tokens_txt,"stats.n");
output=u_fopen_creating_versatile_encoding(encoding_output,bom_output,tokens_txt,U_WRITE);
if (output==NULL) {
   error("Cannot write %s\n",tokens_txt);
}
else {
   compute_statistics(output,tokens,alph,SENTENCES,TOKENS_TOTAL,WORDS_TOTAL,DIGITS_TOTAL);
   u_fclose(output);
}
// we save the tokens by frequence
get_snt_path(argv[vars->optind],tokens_txt);
strcat(tokens_txt,"tok_by_freq.txt");
output=u_fopen_creating_versatile_encoding(encoding_output,bom_output,tokens_txt,U_WRITE);
if (output==NULL) {
   error("Cannot write %s\n",tokens_txt);
}
else {
   sort_and_save_by_frequence(output,tokens,n_occur);
   u_fclose(output);
}
// we save the tokens by alphabetical order
get_snt_path(argv[vars->optind],tokens_txt);
strcat(tokens_txt,"tok_by_alph.txt");
output=u_fopen_creating_versatile_encoding(encoding_output,bom_output,tokens_txt,U_WRITE);
if (output==NULL) {
   error("Cannot write %s\n",tokens_txt);
}
else {
   sort_and_save_by_alph_order(output,tokens,n_occur);
   u_fclose(output);
}
free_hash_table(hashtable);
free_vector_ptr(tokens,free);
free_vector_int(n_occur);
free_vector_int(n_enter_pos);
if (alph!=NULL) {
   free_alphabet(alph);
}
free_OptVars(vars);
return 0;
}