int main_Fst2Txt(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } struct fst2txt_parameters* p=new_fst2txt_parameters(); char in_offsets[FILENAME_MAX]=""; char out_offsets[FILENAME_MAX]=""; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_Fst2Txt,lopts_Fst2Txt,&index))) { switch(val) { case 't': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty text file name\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } p->input_text_file=strdup(options.vars()->optarg); if (p->input_text_file==NULL) { alloc_error("main_Fst2Txt"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } break; case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty text output file name\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } p->output_text_file=strdup(options.vars()->optarg); p->output_text_file_is_temp=0; if (p->output_text_file==NULL) { alloc_error("main_Fst2Txt"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } break; case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet file name\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } p->alphabet_file=strdup(options.vars()->optarg); if (p->alphabet_file==NULL) { alloc_error("main_Fst2Txt"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } break; case 'M': p->output_policy=MERGE_OUTPUTS; break; case 'R': p->output_policy=REPLACE_OUTPUTS; break; case 'c': p->tokenization_policy=CHAR_BY_CHAR_TOKENIZATION; break; case 'w': p->tokenization_policy=WORD_BY_WORD_TOKENIZATION; break; case 's': p->space_policy=START_WITH_SPACE; break; case 'x': p->space_policy=DONT_START_WITH_SPACE; break; case 'V': only_verify_arguments = true; break; case 'h': usage(); free_fst2txt_parameters(p); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_Fst2Txt[index].name); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(p->vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(p->vec.encoding_output),&(p->vec.bom_output),options.vars()->optarg); break; case '$': if (options.vars()->optarg[0]=='\0') { error("Empty input_offsets argument\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } strcpy(in_offsets,options.vars()->optarg); break; case '@': if (options.vars()->optarg[0]=='\0') { error("Empty output_offsets argument\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } strcpy(out_offsets,options.vars()->optarg); break; case 'l': p->convLFtoCRLF=0; break; case 'r': p->keepCR = 1; break; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } if (p->input_text_file==NULL) { error("You must specify the text file\n"); free_fst2txt_parameters(p); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory free_fst2txt_parameters(p); return SUCCESS_RETURN_CODE; } if (out_offsets[0]!='\0') { /* We deal with offsets only if the program is expected to produce some */ if (in_offsets[0]!='\0') { p->v_in_offsets=load_offsets(&(p->vec),in_offsets); if (p->v_in_offsets==NULL) { error("Cannot load offset file %s\n",in_offsets); free_fst2txt_parameters(p); return DEFAULT_ERROR_CODE; } } else { /* If there is no input offset file, we create an empty offset vector * in order to avoid testing whether the vector is NULL or not */ p->v_in_offsets=new_vector_offset(1); } p->f_out_offsets=u_fopen(&(p->vec),out_offsets,U_WRITE); if (p->f_out_offsets==NULL) { error("Cannot create file %s\n",out_offsets); free_fst2txt_parameters(p); return DEFAULT_ERROR_CODE; } } if (p->output_text_file == NULL) { char tmp[FILENAME_MAX]; remove_extension(p->input_text_file, tmp); strcat(tmp, ".tmp"); p->output_text_file_is_temp=1; p->output_text_file = strdup(tmp); if (p->output_text_file == NULL) { alloc_error("main_Fst2Txt"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } } p->fst_file=strdup(argv[options.vars()->optind]); if (p->fst_file==NULL) { alloc_error("main_Fst2Txt"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } int result=main_fst2txt(p); free_fst2txt_parameters(p); return result; }
int main_XMLizer(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } int output_style=TEI; char output[FILENAME_MAX]=""; char alphabet[FILENAME_MAX]=""; char normalization[FILENAME_MAX]=""; char segmentation[FILENAME_MAX]=""; VersatileEncodingConfig vec=VEC_DEFAULT; int convLFtoCRLF=1; int val,index=-1; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_XMLizer,lopts_XMLizer,&index))) { switch(val) { case 'x': output_style=XML; break; case 't': output_style=TEI; break; case 'n': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty normalization grammar name\n"); return USAGE_ERROR_CODE; } strcpy(normalization,options.vars()->optarg); break; case 'o': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty output file name\n"); return USAGE_ERROR_CODE; } strcpy(output,options.vars()->optarg); break; case 'a': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty alphabet file name\n"); return USAGE_ERROR_CODE; } strcpy(alphabet,options.vars()->optarg); break; case 's': if (options.vars()->optarg[0]=='\0') { error("You must specify a non empty segmentation grammar name\n"); return USAGE_ERROR_CODE; } strcpy(segmentation,options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_XMLizer[index].name); return USAGE_ERROR_CODE; case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind!=argc-1) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (segmentation[0]=='\0') { error("You must specify the segmentation grammar to use\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } char input[FILENAME_MAX]; strcpy(input,argv[options.vars()->optind]); char snt[FILENAME_MAX]; remove_extension(input,snt); strcat(snt,"_tmp.snt"); char tmp[FILENAME_MAX]; remove_extension(input,tmp); strcat(tmp,".tmp"); normalize(input,snt,&vec,KEEP_CARRIAGE_RETURN,convLFtoCRLF,normalization,NULL,1); struct fst2txt_parameters* p=new_fst2txt_parameters(); p->vec=vec; p->input_text_file=strdup(snt); if (p->input_text_file ==NULL) { alloc_error("main_XMLizer"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } p->output_text_file_is_temp=1; p->output_text_file=strdup(tmp); if (p->output_text_file==NULL) { alloc_error("main_XMLizer"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } p->fst_file=strdup(segmentation); if (p->fst_file==NULL) { alloc_error("main_XMLizer"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } p->alphabet_file=strdup(alphabet); if (p->alphabet_file==NULL) { alloc_error("main_XMLizer"); free_fst2txt_parameters(p); return ALLOC_ERROR_CODE; } p->output_policy=MERGE_OUTPUTS; p->tokenization_policy=WORD_BY_WORD_TOKENIZATION; p->space_policy=DONT_START_WITH_SPACE; main_fst2txt(p); free_fst2txt_parameters(p); if (output[0]=='\0') { remove_extension(input,output); strcat(output,".xml"); } int return_value = xmlize(&vec,snt,output,output_style); af_remove(snt); af_remove(tmp); return return_value; }