Beispiel #1
0
int main_XMLizer(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

int output_style=TEI;
char output[FILENAME_MAX]="";
char alphabet[FILENAME_MAX]="";
char normalization[FILENAME_MAX]="";
char segmentation[FILENAME_MAX]="";
VersatileEncodingConfig vec=VEC_DEFAULT;
int convLFtoCRLF=1;
int val,index=-1;
bool only_verify_arguments = false;
UnitexGetOpt options;
while (EOF!=(val=options.parse_long(argc,argv,optstring_XMLizer,lopts_XMLizer,&index))) {
   switch(val) {
   case 'x': output_style=XML; break;
   case 't': output_style=TEI; break;
   case 'n': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty normalization grammar name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(normalization,options.vars()->optarg);
             break;
   case 'o': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty output file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(output,options.vars()->optarg);
             break;
   case 'a': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty alphabet file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(alphabet,options.vars()->optarg);
             break;
   case 's': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty segmentation grammar name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(segmentation,options.vars()->optarg);
             break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage(); 
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_XMLizer[index].name);
             return USAGE_ERROR_CODE;                         
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case '?': index==-1  ? error("Invalid option -%c\n",options.vars()->optopt) :
                          error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;             
  
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}

if (segmentation[0]=='\0') {
   error("You must specify the segmentation grammar to use\n");
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

char input[FILENAME_MAX];
strcpy(input,argv[options.vars()->optind]);
char snt[FILENAME_MAX];
remove_extension(input,snt);
strcat(snt,"_tmp.snt");
char tmp[FILENAME_MAX];
remove_extension(input,tmp);
strcat(tmp,".tmp");
normalize(input,snt,&vec,KEEP_CARRIAGE_RETURN,convLFtoCRLF,normalization,NULL,1);
struct fst2txt_parameters* p=new_fst2txt_parameters();
p->vec=vec;
p->input_text_file=strdup(snt);
if (p->input_text_file ==NULL) {
   alloc_error("main_XMLizer");
   free_fst2txt_parameters(p);
   return ALLOC_ERROR_CODE;
}

p->output_text_file_is_temp=1;
p->output_text_file=strdup(tmp);
if (p->output_text_file==NULL) {
   alloc_error("main_XMLizer");
   free_fst2txt_parameters(p);
   return ALLOC_ERROR_CODE;
}
p->fst_file=strdup(segmentation);
if (p->fst_file==NULL) {
   alloc_error("main_XMLizer");
   free_fst2txt_parameters(p);
   return ALLOC_ERROR_CODE;
}
p->alphabet_file=strdup(alphabet);
if (p->alphabet_file==NULL) {
   alloc_error("main_XMLizer");
   free_fst2txt_parameters(p);
   return ALLOC_ERROR_CODE;
}

p->output_policy=MERGE_OUTPUTS;
p->tokenization_policy=WORD_BY_WORD_TOKENIZATION;
p->space_policy=DONT_START_WITH_SPACE;

main_fst2txt(p);

free_fst2txt_parameters(p);

if (output[0]=='\0') {
  remove_extension(input,output);
	strcat(output,".xml");
}

int return_value = xmlize(&vec,snt,output,output_style);

af_remove(snt);
af_remove(tmp);

return return_value;
}
int main_RebuildTfst(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

VersatileEncodingConfig vec=VEC_DEFAULT;
int val, index=-1;
bool only_verify_arguments = false;
UnitexGetOpt options;
int save_statistics=1;
while (EOF!=(val=options.parse_long(argc,argv,optstring_RebuildTfst,lopts_RebuildTfst,&index))) {
   switch (val) {
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case 'S': save_statistics = 0;
             break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h':
      usage();
      return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n", options.vars()->optopt) :
                         error("Missing argument for option --%s\n", lopts_RebuildTfst[index].name);
     return USAGE_ERROR_CODE;
   case '?': index==-1 ? error("Invalid option -%c\n", options.vars()->optopt) :
                         error("Invalid option --%s\n", options.vars()->optarg);
     return USAGE_ERROR_CODE;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

char input_tfst[FILENAME_MAX];
char input_tind[FILENAME_MAX];
strcpy(input_tfst,argv[options.vars()->optind]);
remove_extension(input_tfst,input_tind);
strcat(input_tind,".tind");

u_printf("Loading %s...\n",input_tfst);

Tfst* tfst = open_text_automaton(&vec,input_tfst);
if (tfst==NULL) {
   error("Unable to load %s automaton\n",input_tfst);
   return DEFAULT_ERROR_CODE;
}

char basedir[FILENAME_MAX];
get_path(input_tfst,basedir);
char output_tfst[FILENAME_MAX];
sprintf(output_tfst, "%s.new.tfst",input_tfst);
char output_tind[FILENAME_MAX];
sprintf(output_tind, "%s.new.tind",input_tfst);

U_FILE* f_tfst;
if ((f_tfst = u_fopen(&vec,output_tfst,U_WRITE)) == NULL) {
   error("Unable to open %s for writing\n", output_tfst);
   close_text_automaton(tfst);
   return DEFAULT_ERROR_CODE;
}

U_FILE* f_tind;
if ((f_tind = u_fopen(BINARY,output_tind,U_WRITE)) == NULL) {
   u_fclose(f_tfst);
   close_text_automaton(tfst);
   error("Unable to open %s for writing\n", output_tind);
   return DEFAULT_ERROR_CODE;
}
/* We use this hash table to rebuild files tfst_tags_by_freq/alph.txt */
struct hash_table* form_frequencies=new_hash_table((HASH_FUNCTION)hash_unichar,(EQUAL_FUNCTION)u_equal,
        (FREE_FUNCTION)free,NULL,(KEYCOPY_FUNCTION)keycopy);

u_fprintf(f_tfst,"%010d\n",tfst->N);
for (int i = 1; i <= tfst->N; i++) {
   if ((i % 100) == 0) {
      u_printf("%d/%d sentences rebuilt...\n", i, tfst->N);
   }
   load_sentence(tfst,i);

   char grfname[FILENAME_MAX];
   sprintf(grfname, "%ssentence%d.grf", basedir, i);
   unichar** tags=NULL;
   int n_tags=-1;
   if (fexists(grfname)) {
      /* If there is a .grf for the current sentence, then we must
       * take it into account */
      if (0==pseudo_main_Grf2Fst2(&vec,grfname,0,NULL,1,1,NULL,NULL,0)) {
         /* We proceed only if the graph compilation was a success */
         char fst2name[FILENAME_MAX];
         sprintf(fst2name, "%ssentence%d.fst2", basedir, i);
         struct FST2_free_info fst2_free;
         Fst2* fst2=load_abstract_fst2(&vec,fst2name,0,&fst2_free);
         af_remove(fst2name);
         free_SingleGraph(tfst->automaton,NULL);
         tfst->automaton=create_copy_of_fst2_subgraph(fst2,1);
         tags=create_tfst_tags(fst2,&n_tags);
         free_abstract_Fst2(fst2,&fst2_free);
      } else {
         error("Error: %s is not a valid sentence automaton\n",grfname);
      }
   }
   save_current_sentence(tfst,f_tfst,f_tind,tags,n_tags,form_frequencies);
   if (tags!=NULL) {
      /* If necessary, we free the tags we created */
      for (int count_tags=0;count_tags<n_tags;count_tags++) {
         free(tags[count_tags]);
      }
      free(tags);
   }
}

u_printf("Text automaton rebuilt.\n");

u_fclose(f_tind);
u_fclose(f_tfst);
close_text_automaton(tfst);

/* Finally, we save statistics */
if (save_statistics) {
    char tfst_tags_by_freq[FILENAME_MAX];
    char tfst_tags_by_alph[FILENAME_MAX];
    strcpy(tfst_tags_by_freq, basedir);
    strcat(tfst_tags_by_freq, "tfst_tags_by_freq.txt");
    strcpy(tfst_tags_by_alph, basedir);
    strcat(tfst_tags_by_alph, "tfst_tags_by_alph.txt");
    U_FILE* f_tfst_tags_by_freq = u_fopen(&vec, tfst_tags_by_freq, U_WRITE);
    if (f_tfst_tags_by_freq == NULL) {
        error("Cannot open %s\n", tfst_tags_by_freq);
    }
    U_FILE* f_tfst_tags_by_alph = u_fopen(&vec, tfst_tags_by_alph, U_WRITE);
    if (f_tfst_tags_by_alph == NULL) {
        error("Cannot open %s\n", tfst_tags_by_alph);
    }
    sort_and_save_tfst_stats(form_frequencies, f_tfst_tags_by_freq, f_tfst_tags_by_alph);
    u_fclose(f_tfst_tags_by_freq);
    u_fclose(f_tfst_tags_by_alph);
}
free_hash_table(form_frequencies);

/* make a backup and replace old automaton with new */
char backup_tfst[FILENAME_MAX];
char backup_tind[FILENAME_MAX];
sprintf(backup_tfst,"%s.bck",input_tfst);
sprintf(backup_tind,"%s.bck",input_tind);
/* We remove the existing backup files, if any */
af_remove(backup_tfst);
af_remove(backup_tind);
af_rename(input_tfst,backup_tfst);
af_rename(input_tind,backup_tind);
af_rename(output_tfst,input_tfst);
af_rename(output_tind,input_tind);
u_printf("\nYou can find a backup of the original files in:\n    %s\nand %s\n",
         backup_tfst,backup_tind);

return SUCCESS_RETURN_CODE;
}
int main_Uncompress(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

VersatileEncodingConfig vec=VEC_DEFAULT;
int val,index=-1;
char output[FILENAME_MAX]="";
bool only_verify_arguments = false;
UnitexGetOpt options;
while (EOF!=(val=options.parse_long(argc,argv,optstring_Uncompress,lopts_Uncompress,&index))) {
   switch(val) {
   case 'o': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty output file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(output,options.vars()->optarg);
             break;
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case 'V': only_verify_arguments = true;
             break;             
   case 'h': usage(); 
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_Uncompress[index].name);
             return USAGE_ERROR_CODE;                         
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

if (output[0]=='\0') {
   remove_extension(argv[options.vars()->optind],output);
   strcat(output,".dic");
}

U_FILE* f=u_fopen(&vec,output,U_WRITE);
if (f==NULL) {
   error("Cannot open file %s\n",output);
   return DEFAULT_ERROR_CODE;
}

char inf_file[FILENAME_MAX];
remove_extension(argv[options.vars()->optind],inf_file);
strcat(inf_file,".inf");
u_printf("Uncompressing %s...\n",argv[options.vars()->optind]);
Dictionary* d=new_Dictionary(&vec,argv[options.vars()->optind],inf_file);

if (d!=NULL) {
  rebuild_dictionary(d,f);
}

u_fclose(f);
free_Dictionary(d);
u_printf("Done.\n");

return SUCCESS_RETURN_CODE;
}
Beispiel #4
0
int main_TrainingTagger(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return 0;
}

int val,index=-1,binaries=1,r_forms=1,i_forms=1;
int semitic=0;
struct OptVars* vars=new_OptVars();
char text[FILENAME_MAX]="";
char raw_forms[FILENAME_MAX]="";
char inflected_forms[FILENAME_MAX]="";
char output[FILENAME_MAX]="";
Encoding encoding_output = DEFAULT_ENCODING_OUTPUT;
int bom_output = DEFAULT_BOM_OUTPUT;
int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT;
while (EOF!=(val=getopt_long_TS(argc,argv,optstring_TrainingTagger,lopts_TrainingTagger,&index,vars))) {
   switch(val) {
   case 'o': if (vars->optarg[0]=='\0') {
                fatal_error("You must specify a non empty pattern\n");
             }
             strcpy(output,vars->optarg);
             break;
   case 'b': binaries = 1;
			 break;
   case 'n': binaries = 0;
			 break;
   case 'a': break;
   case 'c': i_forms = 0;
			 break;
   case 'm': r_forms = 0;
   			 break;
   case 'S': semitic=1;
   			 break;
   case 'k': if (vars->optarg[0]=='\0') {
                fatal_error("Empty input_encoding argument\n");
             }
             decode_reading_encoding_parameter(&mask_encoding_compatibility_input,vars->optarg);
             break;
   case 'q': if (vars->optarg[0]=='\0') {
                fatal_error("Empty output_encoding argument\n");
             }
             decode_writing_encoding_parameter(&encoding_output,&bom_output,vars->optarg);
             break;
   case 'h': usage(); return 0;
   case ':': if (index==-1) fatal_error("Missing argument for option -%c\n",vars->optopt);
             else fatal_error("Missing argument for option --%s\n",lopts_TrainingTagger[index].name);
   case '?': if (index==-1) fatal_error("Invalid option -%c\n",vars->optopt);
             else fatal_error("Invalid option --%s\n",vars->optarg);
             break;
   }
   index=-1;
}

if (vars->optind!=argc-1) {
   free_OptVars(vars);
   error("Invalid arguments: rerun with --help\n");
   return 1;
}
strcpy(text,argv[vars->optind]);
U_FILE* input_text=u_fopen_existing_versatile_encoding(mask_encoding_compatibility_input,text,U_READ);
if (input_text==NULL) {
   free_OptVars(vars);
   fatal_error("cannot open file %s\n",text);
   return 1;
}

if(output[0]=='\0'){
	remove_path_and_extension(text,output);
}

char path[FILENAME_MAX],filename[FILENAME_MAX];
get_path(text,path);
if(strlen(path) == 0){
	strcpy(path,".");
}
/* we create files which will contain statistics extracted from the tagged corpus */
U_FILE* rforms_file = NULL, *iforms_file = NULL;
if(r_forms == 1){
	sprintf(filename,"%s_data_cat.dic",output);
	new_file(path,filename,raw_forms);
	rforms_file=u_fopen_creating_versatile_encoding(encoding_output,bom_output,raw_forms,U_WRITE);
}
if(i_forms == 1){
	sprintf(filename,"%s_data_morph.dic",output);
	new_file(path,filename,inflected_forms);
	iforms_file=u_fopen_creating_versatile_encoding(encoding_output,bom_output,inflected_forms,U_WRITE);
}

u_printf("Gathering statistics from tagged corpus...\n");
do_training(input_text,rforms_file,iforms_file);

/* we close all files and then we sort text dictionaries */
u_fclose(input_text);
char disclaimer[FILENAME_MAX];
if(rforms_file != NULL){
	u_fclose(rforms_file);
	pseudo_main_SortTxt(DEFAULT_ENCODING_OUTPUT,DEFAULT_BOM_OUTPUT,ALL_ENCODING_BOM_POSSIBLE,0,0,NULL,NULL,0,raw_forms);
	strcpy(disclaimer,raw_forms);
	remove_extension(disclaimer);
	strcat(disclaimer,".txt");
	create_disclaimer(disclaimer);
}
if(iforms_file != NULL){
	u_fclose(iforms_file);
	pseudo_main_SortTxt(DEFAULT_ENCODING_OUTPUT,DEFAULT_BOM_OUTPUT,ALL_ENCODING_BOM_POSSIBLE,0,0,NULL,NULL,0,inflected_forms);
	strcpy(disclaimer,inflected_forms);
	remove_extension(disclaimer);
	strcat(disclaimer,".txt");
	create_disclaimer(disclaimer);
}

/* we compress dictionaries if option is specified by user (output is ".bin") */
if(binaries == 1){
/* simple forms dictionary */
if(r_forms == 1){
	pseudo_main_Compress(DEFAULT_ENCODING_OUTPUT,DEFAULT_BOM_OUTPUT,ALL_ENCODING_BOM_POSSIBLE,0,semitic,raw_forms);
}
/* compound forms dictionary */
if(i_forms == 1){
	pseudo_main_Compress(DEFAULT_ENCODING_OUTPUT,DEFAULT_BOM_OUTPUT,ALL_ENCODING_BOM_POSSIBLE,0,semitic,inflected_forms);
}
}
free_OptVars(vars);
u_printf("Done.\n");
return 0;
}
Beispiel #5
0
int main_Extract(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return 0;
}


int val,index=-1;
char extract_matching_units=1;
char text_name[FILENAME_MAX]="";
char concord_ind[FILENAME_MAX]="";
char output[FILENAME_MAX]="";
Encoding encoding_output = DEFAULT_ENCODING_OUTPUT;
int bom_output = DEFAULT_BOM_OUTPUT;
int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT;
struct OptVars* vars=new_OptVars();
while (EOF!=(val=getopt_long_TS(argc,argv,optstring_Extract,lopts_Extract,&index,vars))) {
   switch(val) {
   case 'y': extract_matching_units=1; break;
   case 'n': extract_matching_units=0; break;
   case 'o': if (vars->optarg[0]=='\0') {
                fatal_error("You must specify a non empty output file name\n");
             }
             strcpy(output,vars->optarg);
             break;
   case 'i': if (vars->optarg[0]=='\0') {
                fatal_error("You must specify a non empty concordance file name\n");
             }
             strcpy(concord_ind,vars->optarg);
             break;
   case 'h': usage(); return 0;
   case ':': if (index==-1) fatal_error("Missing argument for option -%c\n",vars->optopt);
             else fatal_error("Missing argument for option --%s\n",lopts_Extract[index].name);
   case '?': if (index==-1) fatal_error("Invalid option -%c\n",vars->optopt);
             else fatal_error("Invalid option --%s\n",vars->optarg);
             break;
   case 'k': if (vars->optarg[0]=='\0') {
                fatal_error("Empty input_encoding argument\n");
             }
             decode_reading_encoding_parameter(&mask_encoding_compatibility_input,vars->optarg);
             break;
   case 'q': if (vars->optarg[0]=='\0') {
                fatal_error("Empty output_encoding argument\n");
             }
             decode_writing_encoding_parameter(&encoding_output,&bom_output,vars->optarg);
             break;
   }
   index=-1;
}

if (output[0]=='\0') {
   fatal_error("You must specify the output text file\n");
}
if (vars->optind!=argc-1) {
   fatal_error("Invalid arguments: rerun with --help\n");
}
strcpy(text_name,argv[vars->optind]);

struct snt_files* snt_files=new_snt_files(text_name);
ABSTRACTMAPFILE* text=af_open_mapfile(snt_files->text_cod,MAPFILE_OPTION_READ,0);
if (text==NULL) {
   error("Cannot open %s\n",snt_files->text_cod);
   return 1;
}
struct text_tokens* tok=load_text_tokens(snt_files->tokens_txt,mask_encoding_compatibility_input);
if (tok==NULL) {
   error("Cannot load token list %s\n",snt_files->tokens_txt);
   af_close_mapfile(text);
   return 1;
}
if (tok->SENTENCE_MARKER==-1) {
   error("The text does not contain any sentence marker {S}\n");
   af_close_mapfile(text);
   free_text_tokens(tok);
   return 1;
}

if (concord_ind[0]=='\0') {
   char tmp[FILENAME_MAX];
   get_extension(text_name,tmp);
   if (strcmp(tmp,"snt")) {
      fatal_error("Unable to find the concord.ind file. Please explicit it\n");
   }
   remove_extension(text_name,concord_ind);
   strcat(concord_ind,"_snt");
   strcat(concord_ind,PATH_SEPARATOR_STRING);
   strcat(concord_ind,"concord.ind");
}
U_FILE* concord=u_fopen_existing_versatile_encoding(mask_encoding_compatibility_input,concord_ind,U_READ);
if (concord==NULL) {
   error("Cannot open concordance %s\n",concord_ind);
   af_close_mapfile(text);
   free_text_tokens(tok);
   return 1;
}
U_FILE* result=u_fopen_creating_versatile_encoding(encoding_output,bom_output,output,U_WRITE);
if (result==NULL) {
   error("Cannot write output file %s\n",output);
   af_close_mapfile(text);
   u_fclose(concord);
   free_text_tokens(tok);
   return 1;
}
free_snt_files(snt_files);
extract_units(extract_matching_units,text,tok,concord,result);
af_close_mapfile(text);
u_fclose(concord);
u_fclose(result);
free_text_tokens(tok);
free_OptVars(vars);
u_printf("Done.\n");
return 0;
}
int write_vis(std::string &nemI_out_file,
	      std::string &exoII_inp_file,
	      Machine_Description* machine,
	      Problem_Description* prob,
	      Mesh_Description<INT>* mesh,
	      LB_Description<INT>* lb)
{
  int    exid_vis, exid_inp;

  char  title[MAX_LINE_LENGTH+1];
  const char   *coord_names[] = {"X", "Y", "Z"};

  /*-----------------------------Execution Begins------------------------------*/

  /* Generate the file name for the visualization file */
  std::string vis_file_name = remove_extension(nemI_out_file);
  vis_file_name += "-vis.exoII";

  /* Generate the title for the file */
  strcpy(title, UTIL_NAME);
  strcat(title, " ");
  strcat(title, ELB_VERSION);
  strcat(title, " load balance visualization file");

  /*
   * If the vis technique is to be by element block then calculate the
   * number of element blocks.
   */
  int    vis_nelem_blks;
  if(prob->type == ELEMENTAL)
    vis_nelem_blks = machine->num_procs;
  else
    vis_nelem_blks = machine->num_procs + 1;

  /* Create the ExodusII file */
  std::cout << "Outputting load balance visualization file " << vis_file_name.c_str() << "\n";
  int cpu_ws = 0;
  int io_ws = 0;
  int mode = EX_CLOBBER;
  if (prob->int64db|prob->int64api) {
    mode |= EX_NETCDF4|EX_NOCLASSIC|prob->int64db|prob->int64api;
  }
  if((exid_vis=ex_create(vis_file_name.c_str(), mode, &cpu_ws, &io_ws)) < 0) {
    Gen_Error(0, "fatal: unable to create visualization output file");
    return 0;
  }
  ON_BLOCK_EXIT(ex_close, exid_vis);

  /*
   * Open the original input ExodusII file, read the values for the
   * element blocks and output them to the visualization file.
   */
  int icpu_ws=0;
  int iio_ws=0;
  float vers=0.0;
  mode = EX_READ | prob->int64api;
  if((exid_inp=ex_open(exoII_inp_file.c_str(), mode, &icpu_ws, &iio_ws, &vers)) < 0) {
    Gen_Error(0, "fatal: unable to open input ExodusII file");
    return 0;
  }
  ON_BLOCK_EXIT(ex_close, exid_inp);
  
  char **elem_type  = (char**)array_alloc(2, mesh->num_el_blks, MAX_STR_LENGTH+1,
					  sizeof(char));
  if(!elem_type) {
    Gen_Error(0, "fatal: insufficient memory");
    return 0;
  }
  ON_BLOCK_EXIT(free, elem_type);

  std::vector<INT> el_blk_ids(mesh->num_el_blks);
  std::vector<INT> el_cnt_blk(mesh->num_el_blks);
  std::vector<INT> node_pel_blk(mesh->num_el_blks);
  std::vector<INT> nattr_el_blk(mesh->num_el_blks);

  if(ex_get_elem_blk_ids(exid_inp, TOPTR(el_blk_ids)) < 0) {
    Gen_Error(0, "fatal: unable to get element block IDs");
    return 0;
  }

  int acc_vis = ELB_TRUE; // Output a different element block per processor
  if (prob->vis_out == 2)
    acc_vis = ELB_FALSE; // Output a nodal/element variable showing processor

  size_t nsize = 0;

  /*
   * Find out if the mesh consists of mixed elements. If not then
   * element blocks will be used to visualize the partitioning. Otherwise
   * nodal/element results will be used.
   */
  for(size_t ecnt=0; ecnt < mesh->num_el_blks; ecnt++) {
    if(ex_get_elem_block(exid_inp, el_blk_ids[ecnt], elem_type[ecnt],
			 &el_cnt_blk[ecnt], &node_pel_blk[ecnt],
			 &nattr_el_blk[ecnt]) < 0) {
      Gen_Error(0, "fatal: unable to get element block parameters");
      return 0;
    }

    nsize += el_cnt_blk[ecnt]*node_pel_blk[ecnt];

    if(strcmp(elem_type[0], elem_type[ecnt]) == 0) {
      if(node_pel_blk[0] != node_pel_blk[ecnt])
	acc_vis = ELB_FALSE;
    }
    else
      acc_vis = ELB_FALSE;
  }

  if(acc_vis == ELB_TRUE) {
    /* Output the initial information */
    if(ex_put_init(exid_vis, title, mesh->num_dims, mesh->num_nodes,
		   mesh->num_elems, vis_nelem_blks, 0, 0) < 0) {
      Gen_Error(0, "fatal: unable to output initial params to vis file");
      return 0;
    }
	
    /* Output the nodal coordinates */
    float *xptr = nullptr;
    float *yptr = nullptr;
    float *zptr = nullptr;
    switch(mesh->num_dims) {
    case 3:
      zptr = (mesh->coords) + 2*mesh->num_nodes;
      /* FALLTHRU */
    case 2:
      yptr = (mesh->coords) + mesh->num_nodes;
      /* FALLTHRU */
    case 1:
      xptr = mesh->coords;
    }
    if(ex_put_coord(exid_vis, xptr, yptr, zptr) < 0) {
      Gen_Error(0, "fatal: unable to output coords to vis file");
      return 0;
    }
	
    if(ex_put_coord_names(exid_vis, (char**)coord_names) < 0) {
      Gen_Error(0, "fatal: unable to output coordinate names");
      return 0;
    }

    std::vector<INT> elem_block(mesh->num_elems);
    std::vector<INT> elem_map(mesh->num_elems);
    std::vector<INT> tmp_connect(nsize);
    for(size_t ecnt=0; ecnt < mesh->num_elems; ecnt++) {
      elem_map[ecnt] = ecnt+1;
      if(prob->type == ELEMENTAL)
	elem_block[ecnt] = lb->vertex2proc[ecnt];
      else {
	int proc   = lb->vertex2proc[mesh->connect[ecnt][0]];
	int nnodes = get_elem_info(NNODES, mesh->elem_type[ecnt]);
	elem_block[ecnt] = proc;
	for(int ncnt=1; ncnt < nnodes; ncnt++) {
	  if(lb->vertex2proc[mesh->connect[ecnt][ncnt]] != proc) {
	    elem_block[ecnt] = machine->num_procs;
	    break;
	  }
	}
      }
    }

    int ccnt = 0;
    std::vector<INT> vis_el_blk_ptr(vis_nelem_blks+1);
    for(INT bcnt=0; bcnt < vis_nelem_blks; bcnt++) {
      vis_el_blk_ptr[bcnt] = ccnt;
      int pos = 0;
      int old_pos = 0;
      INT* el_ptr = TOPTR(elem_block);
      size_t ecnt   = mesh->num_elems;
      while(pos != -1) {
	pos = in_list(bcnt, ecnt, el_ptr);
	if(pos != -1) {
	  old_pos += pos + 1;
	  ecnt     = mesh->num_elems - old_pos;
	  el_ptr   = TOPTR(elem_block) + old_pos;
	  int nnodes = get_elem_info(NNODES, mesh->elem_type[old_pos-1]);
	  for(int ncnt=0; ncnt < nnodes; ncnt++)
	    tmp_connect[ccnt++] = mesh->connect[old_pos-1][ncnt] + 1;
	}
      }
    }
    vis_el_blk_ptr[vis_nelem_blks] = ccnt;
	
    /* Output the element map */
    if(ex_put_map(exid_vis, TOPTR(elem_map)) < 0) {
      Gen_Error(0, "fatal: unable to output element number map");
      return 0;
    }
	
    /* Output the visualization element blocks */
    for(int bcnt=0; bcnt < vis_nelem_blks; bcnt++) {
      /*
       * Note this assumes all the blocks contain the same type
       * element.
       */
      int ecnt = (vis_el_blk_ptr[bcnt+1]-vis_el_blk_ptr[bcnt])/node_pel_blk[0];
      if(ex_put_elem_block(exid_vis, bcnt+1, elem_type[0],
			   ecnt, node_pel_blk[0], 0) < 0) {
	Gen_Error(0, "fatal: unable to output element block params");
	return 0;
      }
	  
      /* Output the connectivity */
      if(ex_put_elem_conn(exid_vis, bcnt+1,
			  &tmp_connect[vis_el_blk_ptr[bcnt]]) < 0) {
	Gen_Error(0, "fatal: unable to output element connectivity");
	return 0;
      }
    }
  }

  else {	/* For nodal/element results visualization of the partioning. */
    // Copy the mesh portion to the vis file.
    ex_copy(exid_inp, exid_vis);

    /* Set up the file for nodal/element results */
    float time_val = 0.0;
    if(ex_put_time(exid_vis, 1, &time_val) < 0) {
      Gen_Error(0, "fatal: unable to output time to vis file");
      return 0;
    }

    const char  *var_names[] = {"proc"};
    if(prob->type == NODAL) {
      /* Allocate memory for the nodal values */
      std::vector<float> proc_vals(mesh->num_nodes);

      if(ex_put_variable_param(exid_vis, EX_NODAL, 1) < 0) {
	Gen_Error(0, "fatal: unable to output var params to vis file");
	return 0;
      }

      if(ex_put_variable_names(exid_vis, EX_NODAL, 1, (char**)var_names) < 0) {
	Gen_Error(0, "fatal: unable to output variable name");
	return 0;
      }

      /* Do some problem specific assignment */
      for(size_t ncnt=0; ncnt < mesh->num_nodes; ncnt++)
	proc_vals[ncnt] = lb->vertex2proc[ncnt];

      for(int pcnt=0; pcnt < machine->num_procs; pcnt++) {
	for(auto & elem : lb->bor_nodes[pcnt])
	  proc_vals[elem] = machine->num_procs + 1;
      }

      /* Output the nodal variables */
      if(ex_put_nodal_var(exid_vis, 1, 1, mesh->num_nodes, TOPTR(proc_vals)) < 0) {
	Gen_Error(0, "fatal: unable to output nodal variables");
	return 0;
      }
    }
    else if(prob->type == ELEMENTAL) {
      /* Allocate memory for the element values */
      std::vector<float> proc_vals(mesh->num_elems);

      if(ex_put_variable_param(exid_vis, EX_ELEM_BLOCK, 1) < 0) {
	Gen_Error(0, "fatal: unable to output var params to vis file");
	return 0;
      }

      if(ex_put_variable_names(exid_vis, EX_ELEM_BLOCK, 1, (char**)var_names) < 0) {
	Gen_Error(0, "fatal: unable to output variable name");
	return 0;
      }

      /* Do some problem specific assignment */
      for(int proc=0; proc < machine->num_procs; proc++) {
	for (size_t e = 0; e < lb->int_elems[proc].size(); e++) {
	  size_t ecnt = lb->int_elems[proc][e];
	  proc_vals[ecnt] = proc;
	}

	for (size_t e = 0; e < lb->bor_elems[proc].size(); e++) {
	  size_t ecnt = lb->bor_elems[proc][e];
	  proc_vals[ecnt] = proc;
	}
      }

      /* Output the element variables */
      size_t offset = 0;
      for (size_t i=0; i < mesh->num_el_blks; i++) {
	if(ex_put_var(exid_vis, 1, EX_ELEM_BLOCK, 1, el_blk_ids[i],
		      el_cnt_blk[i], &proc_vals[offset]) < 0) {
	  Gen_Error(0, "fatal: unable to output nodal variables");
	  return 0;
	}
	offset += el_cnt_blk[i];
      }
    }
  }
  return 1;
} /*---------------------------End write_vis()-------------------------------*/
Beispiel #7
0
/**
 * Takes a file name and copies it into 'result' without its extension,
 * if any.
 */
void remove_extension(const char* filename,char* result) {
strcpy(result,filename);
remove_extension(result);
}
Beispiel #8
0
bool file_utils::split_path(const char *p, dynamic_string *pDrive, dynamic_string *pDir, dynamic_string *pFilename, dynamic_string *pExt)
{
    VOGL_ASSERT(p);

#if defined(PLATFORM_WINDOWS)
    char drive_buf[_MAX_DRIVE];
    char dir_buf[_MAX_DIR];
    char fname_buf[_MAX_FNAME];
    char ext_buf[_MAX_EXT];

#if defined(COMPILER_MSVC)
    // Compiling with MSVC
    errno_t error = _splitpath_s(p,
                                 pDrive ? drive_buf : NULL, pDrive ? _MAX_DRIVE : 0,
                                 pDir ? dir_buf : NULL, pDir ? _MAX_DIR : 0,
                                 pFilename ? fname_buf : NULL, pFilename ? _MAX_FNAME : 0,
                                 pExt ? ext_buf : NULL, pExt ? _MAX_EXT : 0);
    if (error != 0)
        return false;
#elif defined(COMPILER_MINGW)
    // Compiling with MinGW
    _splitpath(p,
               pDrive ? drive_buf : NULL,
               pDir ? dir_buf : NULL,
               pFilename ? fname_buf : NULL,
               pExt ? ext_buf : NULL);
#else
#error "Need to provide splitpath functionality for this compiler / platform combo."
#endif

    if (pDrive)
        *pDrive = drive_buf;
    if (pDir)
        *pDir = dir_buf;
    if (pFilename)
        *pFilename = fname_buf;
    if (pExt)
        *pExt = ext_buf;
#else // !PLATFORM_WINDOWS
    char dirtmp[1024];
    char nametmp[1024];
    strcpy_safe(dirtmp, sizeof(dirtmp), p);
    strcpy_safe(nametmp, sizeof(nametmp), p);

    if (pDrive)
        pDrive->clear();

    const char *pDirName = dirname(dirtmp);
    if (!pDirName)
        return false;

    if (pDir)
    {
        pDir->set(pDirName);
        if ((!pDir->is_empty()) && (pDir->back() != '/'))
            pDir->append_char('/');
    }

    const char *pBaseName = basename(nametmp);
    if (!pBaseName)
        return false;

    if (pFilename)
    {
        pFilename->set(pBaseName);
        remove_extension(*pFilename);
    }

    if (pExt)
    {
        pExt->set(pBaseName);
        get_extension(*pExt);
        if (pExt->get_len())
            *pExt = "." + *pExt;
    }
#endif // #if defined(PLATFORM_WINDOWS)

    return true;
}
Beispiel #9
0
/**
 * Takes a file name, removes its extension and adds the suffix "_snt"
 * followed by the separator character.
 *
 * Example: filename="C:\English\novel.txt" => result="C:\English\novel_snt\"
 */
void get_snt_path(const char* filename,char* result) {
remove_extension(filename,result);
strcat(result,"_snt");
strcat(result,PATH_SEPARATOR_STRING);
}
Beispiel #10
0
/**
 * Takes a file name and copies it without its path and extension, if
 * any, into 'result'.
 */
void remove_path_and_extension(const char* filename,char* result) {
char temp[FILENAME_MAX];
remove_path(filename,temp);
remove_extension(temp,result);
}
Beispiel #11
0
int main_ElagComp(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

VersatileEncodingConfig vec=VEC_DEFAULT;
int val,index=-1;
char compilename[FILENAME_MAX]="";
char directory[FILENAME_MAX]="";
char grammar[FILENAME_MAX]="";
char rule_file[FILENAME_MAX]="";
char lang[FILENAME_MAX]="";
bool only_verify_arguments = false;
UnitexGetOpt options;
while (EOF!=(val=options.parse_long(argc,argv,optstring_ElagComp,lopts_ElagComp,&index))) {
   switch(val) {
   case 'l': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty language definition file\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(lang,options.vars()->optarg);
             break;
   case 'r': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty rule file\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(rule_file,options.vars()->optarg);
             get_path(rule_file,directory);
             break;
   case 'g': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty grammar file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(grammar,options.vars()->optarg);
             get_path(grammar,directory);
             break;
   case 'o': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty output file\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(compilename,options.vars()->optarg);
             break;
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage();
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_ElagComp[index].name);
             return USAGE_ERROR_CODE;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
   }
   index=-1;
}

if (lang[0]=='\0') {
   error("You must define the language definition file\n");
   return USAGE_ERROR_CODE;
}
if ((rule_file[0]=='\0' && grammar[0]=='\0')
     || (rule_file[0]!='\0' && grammar[0]!='\0')) {
   error("You must define a rule list OR a grammar\n");
   return USAGE_ERROR_CODE;
}
if (options.vars()->optind!=argc) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}

if (rule_file[0]=='\0' && grammar[0]=='\0') {
   error("You must specified a grammar or a rule file name\n");
   return USAGE_ERROR_CODE;
}

if (rule_file[0]!='\0' && grammar[0]!='\0') {
   error("Cannot handle both a rule file and a grammar\n");
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

language_t* language = load_language_definition(&vec,lang);
if (rule_file[0]!='\0') {
   /* If we work with a rule list */
   if (compilename[0]=='\0') {
      int l=(int)strlen(rule_file);
      if (strcmp(rule_file+l-4,".lst")==0) {
         strcpy(compilename,rule_file);
         strcpy(compilename+l-4,".rul");
      } else {
         sprintf(compilename,"%s.rul",rule_file);
      }
   }
   if (compile_elag_rules(rule_file,compilename,&vec,language)==-1) {
      error("An error occurred while compiling %s\n",compilename);
      free_language_t(language);
      return DEFAULT_ERROR_CODE;
   }
   u_printf("\nElag grammars are compiled in %s.\n",compilename);
} else {
   /* If we must compile a single grammar */
   char elg_file[FILENAME_MAX];
   get_extension(grammar,elg_file);
   if (strcmp(elg_file,".fst2")) {
     error("Grammar '%s' should be a .fst2 file\n");
     free_language_t(language);
     return DEFAULT_ERROR_CODE;
   }
   remove_extension(grammar,elg_file);
   strcat(elg_file,".elg");
   if (compile_elag_grammar(grammar,elg_file,&vec,language)==-1) {
     error("An error occured while compiling %s\n",grammar);
     free_language_t(language);
     return DEFAULT_ERROR_CODE;
   }
   u_printf("Elag grammar is compiled into %s.\n",elg_file);
}
free_language_t(language);
return SUCCESS_RETURN_CODE;
}
Beispiel #12
0
int do_init(int argc, char** argv)
{
	/* setup pre-defined, #define-dependant */
	map_cache_file = std::string(db_path) + "/" + std::string(DBPATH) + "map_cache.dat";

	// Process the command-line arguments
	process_args(argc, argv);

	ShowStatus("Initializing grfio with %s\n", grf_list_file.c_str());
	grfio_init(grf_list_file.c_str());

	// Attempt to open the map cache file and force rebuild if not found
	ShowStatus("Opening map cache: %s\n", map_cache_file.c_str());
	if(!rebuild) {
		map_cache_fp = fopen(map_cache_file.c_str(), "rb");
		if(map_cache_fp == NULL) {
			ShowNotice("Existing map cache not found, forcing rebuild mode\n");
			rebuild = 1;
		} else
			fclose(map_cache_fp);
	}
	if(rebuild)
		map_cache_fp = fopen(map_cache_file.c_str(), "w+b");
	else
		map_cache_fp = fopen(map_cache_file.c_str(), "r+b");
	if(map_cache_fp == NULL) {
		ShowError("Failure when opening map cache file %s\n", map_cache_file.c_str());
		exit(EXIT_FAILURE);
	}

	// Open the map list
	FILE *list;
	std::vector<std::string> directories = { std::string(db_path) + "/",  std::string(db_path) + "/" + std::string(DBIMPORT) + "/" };

	for (const auto &directory : directories) {
		std::string filename = directory + map_list_file;

		ShowStatus("Opening map list: %s\n", filename.c_str());
		list = fopen(filename.c_str(), "r");
		if (list == NULL) {
			ShowError("Failure when opening maps list file %s\n", filename.c_str());
			exit(EXIT_FAILURE);
		}

		// Initialize the main header
		if (rebuild) {
			header.file_size = sizeof(struct main_header);
			header.map_count = 0;
		} else {
			if (fread(&header, sizeof(struct main_header), 1, map_cache_fp) != 1) { printf("An error as occured while reading map_cache_fp \n"); }
			header.file_size = GetULong((unsigned char *)&(header.file_size));
			header.map_count = GetUShort((unsigned char *)&(header.map_count));
		}

		// Read and process the map list
		char line[1024];

		while (fgets(line, sizeof(line), list))
		{
			if (line[0] == '/' && line[1] == '/')
				continue;

			char name[MAP_NAME_LENGTH_EXT];

			if (sscanf(line, "%15s", name) < 1)
				continue;

			if (strcmp("map:", name) == 0 && sscanf(line, "%*s %15s", name) < 1)
				continue;

			struct map_data map;

			name[MAP_NAME_LENGTH_EXT - 1] = '\0';
			remove_extension(name);
			if (find_map(name))
				ShowInfo("Map '" CL_WHITE "%s" CL_RESET "' already in cache.\n", name);
			else if (read_map(name, &map)) {
				cache_map(name, &map);
				ShowInfo("Map '" CL_WHITE "%s" CL_RESET "' successfully cached.\n", name);
			}
			else
				ShowError("Map '" CL_WHITE "%s" CL_RESET "' not found!\n", name);

		}

		ShowStatus("Closing map list: %s\n", filename.c_str());
		fclose(list);
	}

	// Write the main header and close the map cache
	ShowStatus("Closing map cache: %s\n", map_cache_file.c_str());
	fseek(map_cache_fp, 0, SEEK_SET);
	fwrite(&header, sizeof(struct main_header), 1, map_cache_fp);
	fclose(map_cache_fp);

	ShowStatus("Finalizing grfio\n");
	grfio_final();

	ShowInfo("%d maps now in cache\n", header.map_count);

	return 0;
}
Beispiel #13
0
int main_Normalize(int argc,char* const argv[]) {
if (argc==1) {
  usage();
  return SUCCESS_RETURN_CODE;
}
int mode=KEEP_CARRIAGE_RETURN;
int separator_normalization=1;
char rules[FILENAME_MAX]="";
char input_offsets[FILENAME_MAX]="";
char output_offsets[FILENAME_MAX]="";
VersatileEncodingConfig vec=VEC_DEFAULT;
int convLFtoCRLF=1;
int val,index=-1;
bool only_verify_arguments = false;
UnitexGetOpt options;
while (EOF!=(val=options.parse_long(argc,argv,optstring_Normalize,lopts_Normalize,&index))) {
   switch(val) {
   case 'l': convLFtoCRLF=0; break;
   case 'n': mode=REMOVE_CARRIAGE_RETURN; break;
   case 'r': if (options.vars()->optarg[0]=='\0') {
              error("You must specify a non empty replacement rule file name\n");
              return USAGE_ERROR_CODE;
             }
             strcpy(rules,options.vars()->optarg);
             break;
   case 1: separator_normalization=0; break;
   case 'k': if (options.vars()->optarg[0]=='\0') {
              error("Empty input_encoding argument\n");
              return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
              error("Empty output_encoding argument\n");
              return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case '$': if (options.vars()->optarg[0]=='\0') {
              error("You must specify a non empty input offset file name\n");
              return USAGE_ERROR_CODE;
             }
             strcpy(input_offsets,options.vars()->optarg);
             break;
   case '@': if (options.vars()->optarg[0]=='\0') {
              error("You must specify a non empty output offset file name\n");
              return USAGE_ERROR_CODE;
             }
             strcpy(output_offsets,options.vars()->optarg);
             break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage();
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_Normalize[index].name);
             return USAGE_ERROR_CODE;
             break;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
             break;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
  error("Invalid arguments: rerun with --help\n");
  return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

vector_offset* v_input_offsets=NULL;
vector_offset* v_output_offsets=NULL;
U_FILE* f_output_offsets=NULL;

if (output_offsets[0]!='\0') {
  /* We deal with offsets only if we have to produce output offsets */
  if (input_offsets[0]!='\0') {
    v_input_offsets=load_offsets(&vec,input_offsets);
  }
  f_output_offsets=u_fopen(&vec, output_offsets, U_WRITE);
  if (f_output_offsets==NULL) {
    error("Cannot create offset file %s\n",output_offsets);
    return DEFAULT_ERROR_CODE;
  }
  v_output_offsets=new_vector_offset();
}
char tmp_file[FILENAME_MAX];
get_extension(argv[options.vars()->optind],tmp_file);
if (!strcmp(tmp_file, ".snt")) {
   /* If the file to process has already the .snt extension, we temporary rename it to
   * .snt.normalizing */
  strcpy(tmp_file,argv[options.vars()->optind]);
  strcat(tmp_file,".normalizing");
  af_rename(argv[options.vars()->optind],tmp_file);
} else {
   strcpy(tmp_file,argv[options.vars()->optind]);
}
/* We set the destination file */
char dest_file[FILENAME_MAX];
remove_extension(argv[options.vars()->optind],dest_file);
strcat(dest_file,".snt");
u_printf("Normalizing %s...\n",argv[options.vars()->optind]);

int return_value = normalize(tmp_file,
                             dest_file,
                             &vec,
                             mode,
                             convLFtoCRLF,
                             rules,
                             v_output_offsets,
                             separator_normalization);
u_printf("\n");
/* If we have used a temporary file, we delete it */
if (strcmp(tmp_file,argv[options.vars()->optind])) {
   af_remove(tmp_file);
}
process_offsets(v_input_offsets,v_output_offsets,f_output_offsets);
u_fclose(f_output_offsets);
free_vector_offset(v_input_offsets);
free_vector_offset(v_output_offsets);
u_printf((return_value==SUCCESS_RETURN_CODE) ? "Done.\n" : "Unsuccessfull.\n");

return return_value;
}
Beispiel #14
0
/**
 * The main function of the cascade
 *
 *
 */
int cascade(const char* text, int in_place, int must_create_directory, fifo* transducer_list, const char *alphabet,
    const char*negation_operator,
    Encoding encoding_output,int bom_output,int mask_encoding_compatibility_input) {

	launch_tokenize_in_Cassys(text,alphabet,NULL,encoding_output,bom_output,mask_encoding_compatibility_input);

    //if (in_place == 0)
        initialize_working_directory(text, must_create_directory);

	struct snt_files *snt_text_files = new_snt_files(text);

	struct text_tokens *tokens = NULL;
	cassys_tokens_list *tokens_list = cassys_load_text(snt_text_files->tokens_txt, snt_text_files->text_cod,&tokens);

	fprintf(stdout,"Cascade begins\n");

	int transducer_number = 1;
    char *labeled_text_name = NULL;

    if ((in_place != 0))
	   labeled_text_name = create_labeled_files_and_directory(text,
		    transducer_number*0, must_create_directory,0);


	while(!is_empty(transducer_list)){

        if ((in_place == 0))
		    labeled_text_name = create_labeled_files_and_directory(text,
				    transducer_number, must_create_directory,1);
        /*
        else
        {
            labeled_text_name = strdup(text);
        }*/

		launch_tokenize_in_Cassys(labeled_text_name,alphabet,snt_text_files->tokens_txt,encoding_output,bom_output,mask_encoding_compatibility_input);
		free_snt_files(snt_text_files);

		// apply transducer
		transducer *current_transducer = (transducer*)take_ptr(transducer_list);
		launch_locate_in_Cassys(labeled_text_name, current_transducer, alphabet, negation_operator,encoding_output,bom_output,mask_encoding_compatibility_input);

		// generate concordance for this transducer
		snt_text_files = new_snt_files(labeled_text_name);
		launch_concord_in_Cassys(labeled_text_name,
				snt_text_files -> concord_ind, alphabet,encoding_output,bom_output,mask_encoding_compatibility_input);

		//
		add_replaced_text(labeled_text_name,tokens_list,transducer_number,alphabet,mask_encoding_compatibility_input);

		// add protection character in braces when needed
		protect_special_characters(labeled_text_name,encoding_output,bom_output,mask_encoding_compatibility_input);

		transducer_number++;

		free(current_transducer -> transducer_file_name);
		free(current_transducer);

        if ((in_place == 0))
		       free(labeled_text_name);

	}
    if ((in_place != 0))
		    free(labeled_text_name);

	free_snt_files(snt_text_files);

	construct_cascade_concord(tokens_list,text,transducer_number,encoding_output,bom_output,mask_encoding_compatibility_input);


	struct snt_files *snt_files = new_snt_files(text);

	char result_file_name[FILENAME_MAX];
	char text_name_without_extension[FILENAME_MAX];
	remove_extension(text,text_name_without_extension);
	sprintf(result_file_name,"%s.csc",text_name_without_extension);

	copy_file(result_file_name,text);
	launch_concord_in_Cassys(result_file_name,snt_files->concord_ind,alphabet,encoding_output,bom_output,mask_encoding_compatibility_input);

    free_cassys_tokens_list(tokens_list);
	free_snt_files(snt_files);
	free_text_tokens(tokens);
	return 0;
}
Beispiel #15
0
void
write_rule_recipe (FILE* stream, const char* target, list* dependencies)
{
    fprintf(stream, "\tgcc -o %s ", remove_extension(target));
    write_rule_prerequisites(stream, target, dependencies);
}
Beispiel #16
0
int main_Fst2Txt(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

struct fst2txt_parameters* p=new_fst2txt_parameters();
char in_offsets[FILENAME_MAX]="";
char out_offsets[FILENAME_MAX]="";
int val,index=-1;
bool only_verify_arguments = false;
UnitexGetOpt options;

while (EOF!=(val=options.parse_long(argc,argv,optstring_Fst2Txt,lopts_Fst2Txt,&index))) {
   switch(val) {
   case 't': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty text file name\n");
                free_fst2txt_parameters(p);
                return USAGE_ERROR_CODE;
             }
             p->input_text_file=strdup(options.vars()->optarg);
             if (p->input_text_file==NULL) {
                alloc_error("main_Fst2Txt");
                free_fst2txt_parameters(p);
                return ALLOC_ERROR_CODE;
             }
             break;
   case 'o': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty text output file name\n");
                free_fst2txt_parameters(p);
                return USAGE_ERROR_CODE;
             }
             p->output_text_file=strdup(options.vars()->optarg);
			 p->output_text_file_is_temp=0;
             if (p->output_text_file==NULL) {
                alloc_error("main_Fst2Txt");
                free_fst2txt_parameters(p);
                return ALLOC_ERROR_CODE;
             }
             break;
   case 'a': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty alphabet file name\n");
                free_fst2txt_parameters(p);
                return USAGE_ERROR_CODE;                
             }
             p->alphabet_file=strdup(options.vars()->optarg);
             if (p->alphabet_file==NULL) {
               alloc_error("main_Fst2Txt");
               free_fst2txt_parameters(p);
               return ALLOC_ERROR_CODE;               
             }
             break;
   case 'M': p->output_policy=MERGE_OUTPUTS; break;
   case 'R': p->output_policy=REPLACE_OUTPUTS; break;
   case 'c': p->tokenization_policy=CHAR_BY_CHAR_TOKENIZATION; break;
   case 'w': p->tokenization_policy=WORD_BY_WORD_TOKENIZATION; break;
   case 's': p->space_policy=START_WITH_SPACE; break;
   case 'x': p->space_policy=DONT_START_WITH_SPACE; break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage();
             free_fst2txt_parameters(p);
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_Fst2Txt[index].name);
             free_fst2txt_parameters(p);
             return USAGE_ERROR_CODE; 
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                free_fst2txt_parameters(p);
                return USAGE_ERROR_CODE;                 
             }
             decode_reading_encoding_parameter(&(p->vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                free_fst2txt_parameters(p);
                return USAGE_ERROR_CODE; 
             }
             decode_writing_encoding_parameter(&(p->vec.encoding_output),&(p->vec.bom_output),options.vars()->optarg);
             break;
   case '$': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_offsets argument\n");
                free_fst2txt_parameters(p);
                return USAGE_ERROR_CODE; 
             }
             strcpy(in_offsets,options.vars()->optarg);
             break;
   case '@': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_offsets argument\n");
                free_fst2txt_parameters(p);
                return USAGE_ERROR_CODE; 
             }
             strcpy(out_offsets,options.vars()->optarg);
             break;
   case 'l': p->convLFtoCRLF=0; break;
   case 'r': p->keepCR = 1; break;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             free_fst2txt_parameters(p);
             return USAGE_ERROR_CODE;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   free_fst2txt_parameters(p);
   return USAGE_ERROR_CODE;
}

if (p->input_text_file==NULL) {
   error("You must specify the text file\n");
   free_fst2txt_parameters(p);
   return USAGE_ERROR_CODE;   
}

if (only_verify_arguments) {
  // freeing all allocated memory
  free_fst2txt_parameters(p);
  return SUCCESS_RETURN_CODE;
}

if (out_offsets[0]!='\0') {
	/* We deal with offsets only if the program is expected to produce some */
	if (in_offsets[0]!='\0') {
		p->v_in_offsets=load_offsets(&(p->vec),in_offsets);
		if (p->v_in_offsets==NULL) {
			error("Cannot load offset file %s\n",in_offsets);
      free_fst2txt_parameters(p);
      return DEFAULT_ERROR_CODE;      
		}
	} else {
		/* If there is no input offset file, we create an empty offset vector
		 * in order to avoid testing whether the vector is NULL or not */
		p->v_in_offsets=new_vector_offset(1);
	}
	p->f_out_offsets=u_fopen(&(p->vec),out_offsets,U_WRITE);
	if (p->f_out_offsets==NULL) {
		error("Cannot create file %s\n",out_offsets);
    free_fst2txt_parameters(p);
    return DEFAULT_ERROR_CODE;     
	}
}

if (p->output_text_file == NULL) {
	char tmp[FILENAME_MAX];
	remove_extension(p->input_text_file, tmp);
	strcat(tmp, ".tmp");
	p->output_text_file_is_temp=1;
	p->output_text_file = strdup(tmp);
	if (p->output_text_file == NULL) {
		alloc_error("main_Fst2Txt");
		free_fst2txt_parameters(p);
		return ALLOC_ERROR_CODE;
	}
}
p->fst_file=strdup(argv[options.vars()->optind]);
if (p->fst_file==NULL) {
   alloc_error("main_Fst2Txt");
   free_fst2txt_parameters(p);
   return ALLOC_ERROR_CODE;   
}

int result=main_fst2txt(p);

free_fst2txt_parameters(p);
return result;
}
/**
 * The same than main, but no call to setBufferMode.
 */
int main_BuildKrMwuDic(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

int val,index=-1;
char output[FILENAME_MAX]="";
char inflection_dir[FILENAME_MAX]="";
char alphabet[FILENAME_MAX]="";
char dic_bin[FILENAME_MAX]="";
char dic_inf[FILENAME_MAX]="";

// default policy is to compile only out of date graphs
GraphRecompilationPolicy graph_recompilation_policy = ONLY_OUT_OF_DATE;

VersatileEncodingConfig vec=VEC_DEFAULT;

bool only_verify_arguments = false;

UnitexGetOpt options;

while (EOF!=(val=options.parse_long(argc,argv,optstring_BuildKrMwuDic,lopts_BuildKrMwuDic,&index))) {
   switch(val) {
   case 'o': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty output file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(output,options.vars()->optarg);
             break;
   case 'd': if (options.vars()->optarg[0]=='\0') {
                error("Empty inflection directory\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(inflection_dir,options.vars()->optarg);
             break;
   case 'a': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty alphabet file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(alphabet,options.vars()->optarg);
             break;
   case 'b': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty binary dictionary name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(dic_bin,options.vars()->optarg);
             remove_extension(dic_bin,dic_inf);
             strcat(dic_inf,".inf");
             break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage(); 
             return SUCCESS_RETURN_CODE;
   case 'f': graph_recompilation_policy = ALWAYS_RECOMPILE; break;
   case 'n': graph_recompilation_policy = NEVER_RECOMPILE;  break;
   case 't': graph_recompilation_policy = ONLY_OUT_OF_DATE; break;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_BuildKrMwuDic[index].name);
             return USAGE_ERROR_CODE;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   }
   index=-1;
}
if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}
if (output[0]=='\0') {
   error("Output file must be specified\n");
   return USAGE_ERROR_CODE;
}
if (inflection_dir[0]=='\0') {
   error("Inflection directory must be specified\n");
   return USAGE_ERROR_CODE;
}
if (alphabet[0]=='\0') {
   error("Alphabet file must be specified\n");
   return USAGE_ERROR_CODE;
}
if (dic_bin[0]=='\0') {
   error("Binary dictionary must be specified\n");
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory 
  return SUCCESS_RETURN_CODE;
}

U_FILE* delas=u_fopen(&vec,argv[options.vars()->optind],U_READ);
if (delas==NULL) {
   error("Cannot open %s\n",argv[options.vars()->optind]);
   return DEFAULT_ERROR_CODE;
}

U_FILE* grf=u_fopen(&vec,output,U_WRITE);
if (grf==NULL) {
   error("Cannot open %s\n",output);
   u_fclose(delas);  
   return DEFAULT_ERROR_CODE;
}

Alphabet* alph=load_alphabet(&vec,alphabet,1);
if (alph==NULL) {
   u_fclose(grf);
   u_fclose(delas);
   error("Cannot open alphabet file %s\n",alphabet);
   return DEFAULT_ERROR_CODE;
}
Korean* korean=new Korean(alph);

MultiFlex_ctx* multiFlex_ctx=new_MultiFlex_ctx(inflection_dir,
                                               NULL,
                                               NULL,
                                               &vec,
                                               korean,
                                               NULL,
                                               NULL,
                                               graph_recompilation_policy);

Dictionary* d=new_Dictionary(&vec,dic_bin,dic_inf);

create_mwu_dictionary(delas,grf,multiFlex_ctx,d);

free_Dictionary(d);
u_fclose(delas);
u_fclose(grf);
free_alphabet(alph);
delete korean;
for (int count_free_fst2=0;count_free_fst2<multiFlex_ctx->n_fst2;count_free_fst2++) {
    free_abstract_Fst2(multiFlex_ctx->fst2[count_free_fst2],&(multiFlex_ctx->fst2_free[count_free_fst2]));
    multiFlex_ctx->fst2[count_free_fst2]=NULL;
}
free_MultiFlex_ctx(multiFlex_ctx);
u_printf("Done.\n");
return SUCCESS_RETURN_CODE;
}
Beispiel #18
0
/*
 * Process the compiler options into options suitable for passing to the
 * preprocessor and the real compiler. The preprocessor options don't include
 * -E; this is added later. Returns true on success, otherwise false.
 */
bool
c166_process_args(struct args *orig_args, struct args **preprocessor_args,
                struct args **compiler_args)
{
	int i;
	bool found_c_opt = false;
	bool found_S_opt = false;
	bool found_H_opt = false;
	/* 0: Choose preprocessor type by the file extension.
	 * 1: Use c preprocessor.
	 * 2: Use c++ preprocessor.*/
	unsigned force_preprocessor_type = 0;
	const char *actual_language;          /* Language to actually use. */
	struct stat st;
	/* is the dependency makefile name overridden with -MF? */
	bool dependency_filename_specified = false;
	/* is the dependency makefile target name specified with -MT or -MQ? */
	bool dependency_target_specified = false;
	struct args *stripped_args = NULL, *dep_args = NULL, *h_args;
	int argc = orig_args->argc;
	char **argv = orig_args->argv;
	bool result = true;

	stripped_args = args_init(0, NULL);
	dep_args = args_init(0, NULL);
	h_args = args_init(0, NULL);

	args_add(stripped_args, argv[0]);

	for (i = 1; i < argc; i++) {
		/* The user knows best: just swallow the next arg */
		if (str_eq(argv[i], "--ccache-skip")) {
			i++;
			if (i == argc) {
				cc_log("--ccache-skip lacks an argument");
				result = false;
				goto out;
			}
			args_add(stripped_args, argv[i]);
			continue;
		}

		/* Special case for -E. */
		if (str_eq(argv[i], "-E")) {
			stats_update(STATS_PREPROCESSING);
			result = false;
			goto out;
		}

		/* These are always too hard. */
		if (compopt_too_hard(argv[i])) {
			cc_log("Compiler option %s is unsupported", argv[i]);
			stats_update(STATS_UNSUPPORTED);
			result = false;
			goto out;
		}

		/* These are too hard in direct mode. */
		if (enable_direct) {
			if (compopt_too_hard_for_direct_mode(argv[i])) {
				cc_log("Unsupported compiler option for direct mode: %s", argv[i]);
				enable_direct = false;
			}
		}

		/* we must have -c */
		if (str_eq(argv[i], "-c")) {
			args_add(stripped_args, argv[i]);
			found_c_opt = true;
			continue;
		}

		/* -S changes the default extension */
		/* TODO: Check this -S out!
		if (str_eq(argv[i], "-S")) {
			args_add(stripped_args, argv[i]);
			found_S_opt = true;
			continue;
		}
		*/

		/* we need to work out where the output was meant to go */
		if (str_eq(argv[i], "-o")) {
			if (i == argc-1) {
				cc_log("Missing argument to %s", argv[i]);
				stats_update(STATS_ARGS);
				result = false;
				goto out;
			}
			output_obj = argv[i+1];
			i++;
			continue;
		}

		/* alternate form of -o, with no space */
		if (str_startswith(argv[i], "-o")) {
			output_obj = &argv[i][2];
			continue;
		}

		/* debugging is handled specially, so that we know if we
		   can strip line number info
		*/
		if (str_startswith(argv[i], "-g")) {
			args_add(stripped_args, argv[i]);
			if (enable_unify) {
				cc_log("%s used; disabling unify mode", argv[i]);
				enable_unify = false;
			}
			continue;
		}

		if (str_startswith(argv[i], "-H")) {
			cc_log("Detected -H %s", argv[i]);
			args_add(h_args, argv[i]);
			found_H_opt = true;
			continue;
		}

		/*
		 * Options taking an argument that that we may want to rewrite
		 * to relative paths to get better hit rate. A secondary effect
		 * is that paths in the standard error output produced by the
		 * compiler will be normalized.
		 */
		if (compopt_takes_path(argv[i])) {
			char *relpath;
			if (i == argc-1) {
				cc_log("Missing argument to %s", argv[i]);
				stats_update(STATS_ARGS);
				result = false;
				goto out;
			}

			args_add(stripped_args, argv[i]);
			relpath = make_relative_path(x_strdup(argv[i+1]));
			args_add(stripped_args, relpath);

			free(relpath);
			i++;
			continue;
		}

		/* Same as above but options with concatenated argument. */
		if (compopt_short(compopt_takes_path, argv[i])) {
			char *relpath;
			char *option;
			relpath = make_relative_path(x_strdup(argv[i] + 2));
			option = format("-%c%s", argv[i][1], relpath);
			args_add(stripped_args, option);
			free(relpath);
			free(option);
			continue;
		}

		/* options that take an argument */
		if (compopt_takes_arg(argv[i])) {
			if (i == argc-1) {
				cc_log("Missing argument to %s", argv[i]);
				stats_update(STATS_ARGS);
				result = false;
				goto out;
			}
			args_add(stripped_args, argv[i]);
			args_add(stripped_args, argv[i+1]);
			i++;
			continue;
		}

		if (str_eq(argv[i], "-c++")) {
			force_preprocessor_type = 2;
			args_add(stripped_args, argv[i]);
			continue;
		}
		if (str_eq(argv[i], "-noc++")) {
			force_preprocessor_type = 1;
			args_add(stripped_args, argv[i]);
			continue;
		}

		/* other options */
		if (argv[i][0] == '-') {
			args_add(stripped_args, argv[i]);
			continue;
		}

		/* if an argument isn't a plain file then assume its
		   an option, not an input file. This allows us to
		   cope better with unusual compiler options */
		if (stat(argv[i], &st) != 0 || !S_ISREG(st.st_mode)) {
			cc_log("%s is not a regular file, not considering as input file",
			       argv[i]);
			args_add(stripped_args, argv[i]);
			continue;
		}

		if (input_file) {
			if (language_for_file(argv[i])) {
				cc_log("Multiple input files: %s and %s", input_file, argv[i]);
				stats_update(STATS_MULTIPLE);
			} else if (!found_c_opt) {
				cc_log("Called for link with %s", argv[i]);
				if (strstr(argv[i], "conftest.")) {
					stats_update(STATS_CONFTEST);
				} else {
					stats_update(STATS_LINK);
				}
			} else {
				cc_log("Unsupported source extension: %s", argv[i]);
				stats_update(STATS_SOURCELANG);
			}
			result = false;
			goto out;
		}

		/* Rewrite to relative to increase hit rate. */
		input_file = make_relative_path(x_strdup(argv[i]));
	}

	if (!input_file) {
		cc_log("No input file found");
		stats_update(STATS_NOINPUT);
		result = false;
		goto out;
	}

	if(force_preprocessor_type == 0) { 
		actual_language = language_for_file(input_file);
	} 
	else if(force_preprocessor_type == 2) { 
		actual_language = "c++";
	}
	else {
		actual_language = "c";
	}
	
	output_is_precompiled_header =
		actual_language && strstr(actual_language, "-header") != NULL;

	if (!found_c_opt && !output_is_precompiled_header) {
		cc_log("No -c option found");
		/* I find that having a separate statistic for autoconf tests is useful,
		   as they are the dominant form of "called for link" in many cases */
		if (strstr(input_file, "conftest.")) {
			stats_update(STATS_CONFTEST);
		} else {
			stats_update(STATS_LINK);
		}
		result = false;
		goto out;
	}

	if (!actual_language) {
		cc_log("Unsupported source extension: %s", input_file);
		stats_update(STATS_SOURCELANG);
		result = false;
		goto out;
	}

	direct_i_file = language_is_preprocessed(actual_language);

	if (output_is_precompiled_header) {
		/* It doesn't work to create the .gch from preprocessed source. */
		cc_log("Creating precompiled header; not compiling preprocessed code");
		compile_preprocessed_source_code = false;
	}

	i_extension = getenv("CCACHE_EXTENSION");
	if (!i_extension) {
		const char *p_language = p_language_for_language(actual_language);
		if(str_eq(p_language, "c++-cpp-output")) { 
			/* Dirty fix for preprocessed file extension for cc166.
			 * The cp166 cannot handle cpp files with extension ii.*/
			i_extension = "ii.cpp";
		}
		else { 
			i_extension = extension_for_language(p_language) + 1;
		}
	}

	/* don't try to second guess the compilers heuristics for stdout handling */
	if (output_obj && str_eq(output_obj, "-")) {
		stats_update(STATS_OUTSTDOUT);
		cc_log("Output file is -");
		result = false;
		goto out;
	}

	if (!output_obj) {
		if (output_is_precompiled_header) {
			output_obj = format("%s.gch", input_file);
		} else {
			char *p;
			output_obj = x_strdup(input_file);
			if ((p = strrchr(output_obj, '/'))) {
				output_obj = p+1;
			}
			p = strrchr(output_obj, '.');
			if (!p || !p[1]) {
				cc_log("Badly formed object filename");
				stats_update(STATS_ARGS);
				result = false;
				goto out;
			}
			*p = 0;
			p = output_obj;
			if(found_S_opt)
			{ 
				output_obj = format("%s.s", p);
			}
			else
			{ /*The default extension of object file is obj for c166.*/
				output_obj = format("%s.obj", p);
			}
			free(p);
		}
	}

	/* cope with -o /dev/null */
	if (!str_eq(output_obj,"/dev/null")
	    && stat(output_obj, &st) == 0
	    && !S_ISREG(st.st_mode)) {
		cc_log("Not a regular file: %s", output_obj);
		stats_update(STATS_DEVICE);
		result = false;
		goto out;
	}

	/*
	 * Some options shouldn't be passed to the real compiler when it compiles
	 * preprocessed code:
	 */
	*preprocessor_args = args_copy(stripped_args);
	/* Args with -H has been already preprocessed.
	 * If it passed to the compiler again, some type redefined error will pop up.*/
	if (found_H_opt) {
		args_extend(*preprocessor_args, h_args);
	}

	/*
	 * Add flags for dependency generation only to the preprocessor command line.
	 */
	if (generating_dependencies) {
		if (!dependency_filename_specified) {
			char *default_depfile_name;
			char *base_name;

			base_name = remove_extension(output_obj);
			default_depfile_name = format("%s.d", base_name);
			free(base_name);
			args_add(dep_args, "-MF");
			args_add(dep_args, default_depfile_name);
			output_dep = make_relative_path(x_strdup(default_depfile_name));
		}

		if (!dependency_target_specified) {
			args_add(dep_args, "-MQ");
			args_add(dep_args, output_obj);
		}
	}

	if (compile_preprocessed_source_code) {
		*compiler_args = args_copy(stripped_args);
	} else {
		*compiler_args = args_copy(*preprocessor_args);
	}
	
	/* Due to bugs or cc166 v8.6r3, the behaviours of c/c++ preprocessor 
	 * are quite different.
	 * When using cpp preprocessor, the output will be directly send to stdout like gcc.
	 * When using c preprocessor, the output will be written to filename.i even without "-o".*/
	if(str_eq(actual_language, "c"))
	{ 
#ifdef _WIN32
#error Never test this in Windows.
#else
		args_add(*preprocessor_args, "-o/dev/stdout");
#endif
	}

	/*
	 * Only pass dependency arguments to the preprocesor since Intel's C++
	 * compiler doesn't produce a correct .d file when compiling preprocessed
	 * source.
	 */
	args_extend(*preprocessor_args, dep_args);

out:
	args_free(stripped_args);
	args_free(dep_args);
	args_free(h_args);

	return result;
}
Beispiel #19
0
/**
 * The same than main, but no call to setBufferMode.
 */
int main_BuildKrMwuDic(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return 0;
}


int val,index=-1;
char output[FILENAME_MAX]="";
char inflection_dir[FILENAME_MAX]="";
char alphabet[FILENAME_MAX]="";
char dic_bin[FILENAME_MAX]="";
char dic_inf[FILENAME_MAX]="";
Encoding encoding_output = DEFAULT_ENCODING_OUTPUT;
int bom_output = DEFAULT_BOM_OUTPUT;
int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT;
struct OptVars* vars=new_OptVars();
while (EOF!=(val=getopt_long_TS(argc,argv,optstring_BuildKrMwuDic,lopts_BuildKrMwuDic,&index,vars))) {
   switch(val) {
   case 'o': if (vars->optarg[0]=='\0') {
                fatal_error("You must specify a non empty output file name\n");
             }
             strcpy(output,vars->optarg);
             break;
   case 'd': if (vars->optarg[0]=='\0') {
                fatal_error("Empty inflection directory\n");
             }
             strcpy(inflection_dir,vars->optarg);
             break;
   case 'a': if (vars->optarg[0]=='\0') {
                fatal_error("You must specify a non empty alphabet file name\n");
             }
             strcpy(alphabet,vars->optarg);
             break;
   case 'b': if (vars->optarg[0]=='\0') {
                fatal_error("You must specify a non empty binary dictionary name\n");
             }
             strcpy(dic_bin,vars->optarg);
             remove_extension(dic_bin,dic_inf);
             strcat(dic_inf,".inf");
             break;
   case 'h': usage(); return 0;
   case ':': if (index==-1) fatal_error("Missing argument for option -%c\n",vars->optopt);
             else fatal_error("Missing argument for option --%s\n",lopts_BuildKrMwuDic[index].name);
   case '?': if (index==-1) fatal_error("Invalid option -%c\n",vars->optopt);
             else fatal_error("Invalid option --%s\n",vars->optarg);
             break;
   case 'k': if (vars->optarg[0]=='\0') {
                fatal_error("Empty input_encoding argument\n");
             }
             decode_reading_encoding_parameter(&mask_encoding_compatibility_input,vars->optarg);
             break;
   case 'q': if (vars->optarg[0]=='\0') {
                fatal_error("Empty output_encoding argument\n");
             }
             decode_writing_encoding_parameter(&encoding_output,&bom_output,vars->optarg);
             break;
   }
   index=-1;
}
if (vars->optind!=argc-1) {
   fatal_error("Invalid arguments: rerun with --help\n");
}
if (output[0]=='\0') {
   fatal_error("Output file must be specified\n");
}
if (inflection_dir[0]=='\0') {
   fatal_error("Inflection directory must be specified\n");
}
if (alphabet[0]=='\0') {
   fatal_error("Alphabet file must be specified\n");
}
if (dic_bin[0]=='\0') {
   fatal_error("Binary dictionary must be specified\n");
}

U_FILE* delas=u_fopen_existing_versatile_encoding(mask_encoding_compatibility_input,argv[vars->optind],U_READ);
if (delas==NULL) {
   fatal_error("Cannot open %s\n",argv[vars->optind]);
}
U_FILE* grf=u_fopen_existing_versatile_encoding(mask_encoding_compatibility_input,output,U_WRITE);
if (grf==NULL) {
   fatal_error("Cannot open %s\n",output);
}
Alphabet* alph=load_alphabet(alphabet,1);
if (alph==NULL) {
   fatal_error("Cannot open alphabet file %s\n",alphabet);
}
Korean* korean=new Korean(alph);
MultiFlex_ctx* multiFlex_ctx = (MultiFlex_ctx*)malloc(sizeof(MultiFlex_ctx));
if (multiFlex_ctx==NULL) {
   fatal_alloc_error("main_BuildKrMwuDic");
}
strcpy(multiFlex_ctx->inflection_directory,inflection_dir);
if (init_transducer_tree(multiFlex_ctx)) {
   fatal_error("init_transducer_tree error\n");
}
struct l_morpho_t* pL_MORPHO=init_langage_morph();
if (pL_MORPHO == NULL) {
   fatal_error("init_langage_morph error\n");
}

unsigned char* bin=load_BIN_file(dic_bin);
struct INF_codes* inf=load_INF_file(dic_inf);

create_mwu_dictionary(delas,grf,multiFlex_ctx,korean,pL_MORPHO,encoding_output,
       bom_output,mask_encoding_compatibility_input,bin,inf);

free(bin);
free_INF_codes(inf);
u_fclose(delas);
u_fclose(grf);
free_alphabet(alph);
delete korean;
free_transducer_tree(multiFlex_ctx);
for (int count_free_fst2=0;count_free_fst2<multiFlex_ctx->n_fst2;count_free_fst2++) {
    free_abstract_Fst2(multiFlex_ctx->fst2[count_free_fst2],&(multiFlex_ctx->fst2_free[count_free_fst2]));
    multiFlex_ctx->fst2[count_free_fst2]=NULL;
}
free_language_morpho(pL_MORPHO);
free(multiFlex_ctx);
free_OptVars(vars);
u_printf("Done.\n");
return 0;
}
Beispiel #20
0
/*
 * Process the compiler options into options suitable for passing to the
 * preprocessor and the real compiler. The preprocessor options don't include
 * -E; this is added later. Returns true on success, otherwise false.
 */
bool
armcc_process_args(struct args *orig_args, struct args **preprocessor_args,
                struct args **compiler_args)
{
	int i;
	bool found_c_opt = false;
	bool found_S_opt = false;
	bool found_pch = false;
	bool found_fpch_preprocess = false;
	const char *actual_language;          /* Language to actually use. */
	const char *input_charset = NULL;
	struct stat st;
	/* is the dependency makefile name overridden with --depend? */
	bool dependency_filename_specified = false;
	/* is the dependency makefile target name specified ? */
	bool dependency_target_specified = false;
	char *dep_file = NULL, *dep_dir = NULL;
	struct args *stripped_args = NULL, *dep_args = NULL;
	int argc = orig_args->argc;
	char **argv = orig_args->argv;
	bool result = true;
	/* 0: Choose preprocessor type by the file extension.
	 * 1: Use c preprocessor.
	 * 2: Use c++ preprocessor.*/
	unsigned force_preprocessor_type = 0;

	stripped_args = args_init(0, NULL);
	dep_args = args_init(0, NULL);

	args_add(stripped_args, argv[0]);

	for (i = 1; i < argc; i++) {
		/* The user knows best: just swallow the next arg */
		if (str_eq(argv[i], "--ccache-skip")) {
			i++;
			if (i == argc) {
				cc_log("--ccache-skip lacks an argument");
				result = false;
				goto out;
			}
			args_add(stripped_args, argv[i]);
			continue;
		}

		/* Special case for -E. */
		if (str_eq(argv[i], "-E")) {
			stats_update(STATS_PREPROCESSING);
			result = false;
			goto out;
		}

		/* These are always too hard. */
		if (compopt_too_hard(argv[i])
		    || str_startswith(argv[i], "@")
		    || str_startswith(argv[i], "-fdump-")) {
			cc_log("Compiler option %s is unsupported", argv[i]);
			stats_update(STATS_UNSUPPORTED);
			result = false;
			goto out;
		}

		/* These are too hard in direct mode. */
		if (enable_direct) {
			if (compopt_too_hard_for_direct_mode(argv[i])) {
				cc_log("Unsupported compiler option for direct mode: %s", argv[i]);
				enable_direct = false;
			}
		}

		/* we must have -c */
		if (str_eq(argv[i], "-c")) {
			args_add(stripped_args, argv[i]);
			found_c_opt = true;
			continue;
		}

		/* -S changes the default extension */
		if (str_eq(argv[i], "-S")) {
			args_add(stripped_args, argv[i]);
			found_S_opt = true;
			continue;
		}

		/* we need to work out where the output was meant to go */
		if (str_eq(argv[i], "-o")) {
			if (i == argc-1) {
				cc_log("Missing argument to %s", argv[i]);
				stats_update(STATS_ARGS);
				result = false;
				goto out;
			}
			output_obj = argv[i+1];
			i++;
			continue;
		}

		/* alternate form of -o, with no space */
		if (str_startswith(argv[i], "-o")) {
			output_obj = &argv[i][2];
			continue;
		}


		/* If multiple source type options are there, the armcc will use the last one. */
		if (str_eq(argv[i], "--cpp")) { 
			force_preprocessor_type = 2;
			continue;
		}
		else if (str_eq(argv[i], "--c90") || str_eq(argv[i], "--c99")) { 
			force_preprocessor_type = 1;
			continue;
		}

		if (str_eq(argv[i], "--md")) { 
			generating_dependencies = true;
			continue;
		}

		/* The rvct started supporting --depend_target from 4.0.
		 * And there is a bug when using -E and --depend together with rvct which version is earlier than 4.0_697.
		 * That is too hard to support "--depend" for the earlier version of rvct.*/
		if (str_startswith(argv[i], "--depend_dir")) {
			/* We just concat the dir and the filename and pass the result 
			 * to --depend. */
			if (i >= argc - 1) {
				cc_log("Missing argument to %s", argv[i]);
				stats_update(STATS_ARGS);
				result = false;
				goto out;
			}
			dep_dir= x_strdup(argv[i+1]);
			i++;
			continue;
		}
		else if (str_startswith(argv[i], "--depend_target")) {
			if (i >= argc - 1) {
				cc_log("Missing argument to %s", argv[i]);
				stats_update(STATS_ARGS);
				result = false;
				goto out;
			}
			dependency_target_specified = true;
			args_add(dep_args, argv[i]);
			args_add(dep_args, argv[i+1]);
			i++;
			continue;
		}
		else if (str_startswith(argv[i], "--depend")) {
			dependency_filename_specified = true;
			generating_dependencies = true;

			if (i >= argc - 1) {
				cc_log("Missing argument to %s", argv[i]);
				stats_update(STATS_ARGS);
				result = false;
				goto out;
			}
			dep_file = x_strdup(argv[i + 1]);
			i++;
			continue;
		}


		/*
		 * Options taking an argument that that we may want to rewrite
		 * to relative paths to get better hit rate. A secondary effect
		 * is that paths in the standard error output produced by the
		 * compiler will be normalized.
		 */
		if (compopt_takes_path(argv[i])) {
			char *relpath;
			char *pchpath;
			if (i == argc-1) {
				cc_log("Missing argument to %s", argv[i]);
				stats_update(STATS_ARGS);
				result = false;
				goto out;
			}

			args_add(stripped_args, argv[i]);
			relpath = make_relative_path(x_strdup(argv[i+1]));
			args_add(stripped_args, relpath);

			/* Try to be smart about detecting precompiled headers */
			pchpath = format("%s.gch", argv[i+1]);
			if (stat(pchpath, &st) == 0) {
				cc_log("Detected use of precompiled header: %s", pchpath);
				found_pch = true;
			}

			free(pchpath);
			free(relpath);
			i++;
			continue;
		}

		/* Same as above but options with concatenated argument. */
		if (compopt_short(compopt_takes_path, argv[i])) {
			char *relpath;
			char *option;
			relpath = make_relative_path(x_strdup(argv[i] + 2));
			option = format("-%c%s", argv[i][1], relpath);
			args_add(stripped_args, option);
			free(relpath);
			free(option);
			continue;
		}

		/* options that take an argument */
		if (compopt_takes_arg(argv[i])) {
			if (i == argc-1) {
				cc_log("Missing argument to %s", argv[i]);
				stats_update(STATS_ARGS);
				result = false;
				goto out;
			}
			args_add(stripped_args, argv[i]);
			args_add(stripped_args, argv[i+1]);
			i++;
			continue;
		}

		/* other options */
		if (argv[i][0] == '-') {
			args_add(stripped_args, argv[i]);
			continue;
		}

		/* if an argument isn't a plain file then assume its
		   an option, not an input file. This allows us to
		   cope better with unusual compiler options */
		if (stat(argv[i], &st) != 0 || !S_ISREG(st.st_mode)) {
			cc_log("%s is not a regular file, not considering as input file",
			       argv[i]);
			args_add(stripped_args, argv[i]);
			continue;
		}

		if (input_file) {
			if (language_for_file(argv[i])) {
				cc_log("Multiple input files: %s and %s", input_file, argv[i]);
				stats_update(STATS_MULTIPLE);
			} else if (!found_c_opt) {
				cc_log("Called for link with %s", argv[i]);
				if (strstr(argv[i], "conftest.")) {
					stats_update(STATS_CONFTEST);
				} else {
					stats_update(STATS_LINK);
				}
			} else {
				cc_log("Unsupported source extension: %s", argv[i]);
				stats_update(STATS_SOURCELANG);
			}
			result = false;
			goto out;
		}

		/* Rewrite to relative to increase hit rate. */
		input_file = make_relative_path(x_strdup(argv[i]));
	}

	if (!input_file) {
		cc_log("No input file found");
		stats_update(STATS_NOINPUT);
		result = false;
		goto out;
	}

	if (found_pch || found_fpch_preprocess) {
		using_precompiled_header = true;
		if (!(sloppiness & SLOPPY_TIME_MACROS)) {
			cc_log("You have to specify \"time_macros\" sloppiness when using"
			       " precompiled headers to get direct hits");
			cc_log("Disabling direct mode");
			stats_update(STATS_CANTUSEPCH);
			result = false;
			goto out;
		}
	}

	if(force_preprocessor_type == 0) { 
		actual_language = language_for_file(input_file);
	} 
	else if(force_preprocessor_type == 2) { 
		actual_language = "c++";
	}
	else {
		actual_language = "c";
	}

	output_is_precompiled_header =
		actual_language && strstr(actual_language, "-header") != NULL;

	if (!found_c_opt && !output_is_precompiled_header) {
		cc_log("No -c option found");
		/* I find that having a separate statistic for autoconf tests is useful,
		   as they are the dominant form of "called for link" in many cases */
		if (strstr(input_file, "conftest.")) {
			stats_update(STATS_CONFTEST);
		} else {
			stats_update(STATS_LINK);
		}
		result = false;
		goto out;
	}

	if (!actual_language) {
		cc_log("Unsupported source extension: %s", input_file);
		stats_update(STATS_SOURCELANG);
		result = false;
		goto out;
	}

	direct_i_file = language_is_preprocessed(actual_language);

	if (output_is_precompiled_header) {
		/* It doesn't work to create the .gch from preprocessed source. */
		cc_log("Creating precompiled header; not compiling preprocessed code");
		compile_preprocessed_source_code = false;
	}

	/* don't try to second guess the compilers heuristics for stdout handling */
	if (output_obj && str_eq(output_obj, "-")) {
		stats_update(STATS_OUTSTDOUT);
		cc_log("Output file is -");
		result = false;
		goto out;
	}

	if (!output_obj) {
		if (output_is_precompiled_header) {
			output_obj = format("%s.gch", input_file);
		} else {
			char *p;
			output_obj = x_strdup(input_file);
			if ((p = strrchr(output_obj, '/'))) {
				output_obj = p+1;
			}
			p = strrchr(output_obj, '.');
			if (!p || !p[1]) {
				cc_log("Badly formed object filename");
				stats_update(STATS_ARGS);
				result = false;
				goto out;
			}
			p[1] = found_S_opt ? 's' : 'o';
			p[2] = 0;
		}
	}

	/* cope with -o /dev/null */
	if (!str_eq(output_obj,"/dev/null")
	    && stat(output_obj, &st) == 0
	    && !S_ISREG(st.st_mode)) {
		cc_log("Not a regular file: %s", output_obj);
		stats_update(STATS_DEVICE);
		result = false;
		goto out;
	}

	/*
	 * Some options shouldn't be passed to the real compiler when it compiles
	 * preprocessed code:
	 *
	 * -finput-charset=XXX (otherwise conversion happens twice)
	 * -x XXX (otherwise the wrong language is selected)
	 */
	*preprocessor_args = args_copy(stripped_args);
	if (input_charset) {
		args_add(*preprocessor_args, input_charset);
	}
	if (found_pch) {
		args_add(*preprocessor_args, "-fpch-preprocess");
	}

	/*
	 * Add flags for dependency generation only to the preprocessor command line.
	 */
	if (generating_dependencies) {
		char *dep_path;

		if(!dependency_filename_specified)
		{ 
			char *base_name;

			base_name = remove_extension(output_obj);
			dep_file = format("%s.d", base_name);
			free(base_name);
		}

		if (!dependency_target_specified) {
			args_add(dep_args, "--depend_target");
			args_add(dep_args, output_obj);
		}

		free(output_dep);
		
		if(dep_dir)
		{ 
#ifdef _WIN32
			dep_path = make_relative_path(format("%s\\%s", dep_dir, dep_file));
#else
			dep_path = make_relative_path(format("%s/%s", dep_dir, dep_file));
#endif
		}
		else
		{ 
			dep_path = x_strdup(dep_file);
		}

		args_add(dep_args, "--depend");
		args_add(dep_args, dep_path);
		/* dep_path will be free in make_relative_path */
		output_dep = make_relative_path(x_strdup(dep_path));
	}

	if (compile_preprocessed_source_code) {
		*compiler_args = args_copy(stripped_args);
	} else {
		*compiler_args = args_copy(*preprocessor_args);
	}

	i_extension = getenv("CCACHE_EXTENSION");
	if (!i_extension) {
		const char *p_language = p_language_for_language(actual_language);
		i_extension = extension_for_language(p_language) + 1;

	}
	/* Patch for preprocessed file extension for armcc 3.1.
	 * armcc 3.1 cannot recognize "i" or "ii" as the preprocessed source file 
	 * without --compile_all_input. */
	args_add(*compiler_args, "--compile_all_input");
	if (str_eq(i_extension, "ii")) { 
		args_add(*compiler_args, "--cpp");
	}

	/*
	 * Only pass dependency arguments to the preprocesor since Intel's C++
	 * compiler doesn't produce a correct .d file when compiling preprocessed
	 * source.
	 */
	args_extend(*preprocessor_args, dep_args);

out:
	free(dep_file);
	free(dep_dir);
	args_free(stripped_args);
	args_free(dep_args);
	return result;
}
Beispiel #21
0
int main_TEI2Txt(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return SUCCESS_RETURN_CODE;
}

char output[FILENAME_MAX]="";
VersatileEncodingConfig vec=VEC_DEFAULT;
int val,index=-1;
bool only_verify_arguments = false;
UnitexGetOpt options;
while (EOF!=(val=options.parse_long(argc,argv,optstring_TEI2Txt,lopts_TEI2Txt,&index))) {
   switch(val) {
   case 'o': if (options.vars()->optarg[0]=='\0') {
                error("You must specify a non empty output file name\n");
                return USAGE_ERROR_CODE;
             }
             strcpy(output,options.vars()->optarg);
             break;
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage();
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_TEI2Txt[index].name);
             return USAGE_ERROR_CODE;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
   }
   index=-1;
}

if (options.vars()->optind!=argc-1) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

if(output[0]=='\0') {
  remove_extension(argv[options.vars()->optind],output);
    strcat(output,".txt");
}

int return_value = tei2txt(argv[options.vars()->optind],output,&vec);

return return_value;
}
Beispiel #22
0
int do_init(int argc, char** argv)
{
	FILE *list;
	char line[1024];
	struct map_data map;
	char name[MAP_NAME_LENGTH_EXT];

	grf_list_file = aStrdup("conf/grf-files.txt");
	map_list_file = aStrdup("db/map_index.txt");
	/* setup pre-defined, #define-dependant */
	map_cache_file = aStrdup("db/map_cache.dat");

	cmdline->exec(argc, argv, CMDLINE_OPT_PREINIT);
	cmdline->exec(argc, argv, CMDLINE_OPT_NORMAL);

	ShowStatus("Initializing grfio with %s\n", grf_list_file);
	grfio_init(grf_list_file);

	// Attempt to open the map cache file and force rebuild if not found
	ShowStatus("Opening map cache: %s\n", map_cache_file);
	if(!rebuild) {
		map_cache_fp = fopen(map_cache_file, "rb");
		if(map_cache_fp == NULL) {
			ShowNotice("Existing map cache not found, forcing rebuild mode\n");
			rebuild = 1;
		} else
			fclose(map_cache_fp);
	}
	if(rebuild)
		map_cache_fp = fopen(map_cache_file, "w+b");
	else
		map_cache_fp = fopen(map_cache_file, "r+b");
	if(map_cache_fp == NULL) {
		ShowError("Failure when opening map cache file %s\n", map_cache_file);
		exit(EXIT_FAILURE);
	}

	// Open the map list
	ShowStatus("Opening map list: %s\n", map_list_file);
	list = fopen(map_list_file, "r");
	if(list == NULL) {
		ShowError("Failure when opening maps list file %s\n", map_list_file);
		exit(EXIT_FAILURE);
	}

	// Initialize the main header
	if(rebuild) {
		header.file_size = sizeof(struct main_header);
		header.map_count = 0;
	} else {
		if(fread(&header, sizeof(struct main_header), 1, map_cache_fp) != 1){ printf("An error as occured while reading map_cache_fp \n"); }
		header.file_size = GetULong((unsigned char *)&(header.file_size));
		header.map_count = GetUShort((unsigned char *)&(header.map_count));
	}

	// Read and process the map list
	while(fgets(line, sizeof(line), list))
	{
		if(line[0] == '/' && line[1] == '/')
			continue;

		if(sscanf(line, "%15s", name) < 1)
			continue;

		if(strcmp("map:", name) == 0 && sscanf(line, "%*s %15s", name) < 1)
			continue;

		name[MAP_NAME_LENGTH_EXT-1] = '\0';
		remove_extension(name);
		if (find_map(name)) {
			ShowInfo("Map '"CL_WHITE"%s"CL_RESET"' already in cache.\n", name);
		} else if(!read_map(name, &map)) {
			ShowError("Map '"CL_WHITE"%s"CL_RESET"' not found!\n", name);
		} else if (!cache_map(name, &map)) {
			ShowError("Map '"CL_WHITE"%s"CL_RESET"' failed to cache (write error).\n", name);
		} else {
			ShowInfo("Map '"CL_WHITE"%s"CL_RESET"' successfully cached.\n", name);
		}
	}

	ShowStatus("Closing map list: %s\n", map_list_file);
	fclose(list);

	// Write the main header and close the map cache
	ShowStatus("Closing map cache: %s\n", map_cache_file);
	fseek(map_cache_fp, 0, SEEK_SET);
	fwrite(&header, sizeof(struct main_header), 1, map_cache_fp);
	fclose(map_cache_fp);

	ShowStatus("Finalizing grfio\n");
	grfio_final();

	ShowInfo("%d maps now in cache\n", header.map_count);

	aFree(grf_list_file);
	aFree(map_list_file);
	aFree(map_cache_file);

	return 0;
}
void options_screen()
{

	/* options are */
	/*		17 Lines			-7 -1025

		0 New Progs			 ON    OFF   SAME
		1 New Accs			 ON	 OFF   SAME

		2 Auto Window		PAGING	SCROLLING
		3 Accs Window		PAGING	SCROLLING
		4 Sets Window		PAGING	SCROLLING		
		5 Other Window		PAGING	SCROLLING		

		6 Auto Path   
		 --------------------------------------

		7 Accs Path 
		 --------------------------------------					 
	
		8 <Set Keys>				9 <Resolution Info>

		10	[   OK   ]   11[ CANCEL ]   12[ SAVE ]

	*/

#define MAX_OPTIONS	12

int options_y, options_x, curr_option, old_option, exit_options;
long key; int upk,lowk;
/*char temp[60];
*/
/* local defaults */
	int lnew_progs_flag;	/* 1 = ON, 2 = OFF, 3 = SAME */
	int lnew_accs_flag;
	int lauto_paging;		/* 1 = paging, 2 = scrolling */
	int laccs_paging;
	int lsets_paging;
	int lother_paging;
	char lauto_path[FILENAME_MAX];
	char laccs_path[FILENAME_MAX];

	curr_option=0;
	old_option=11;

	lnew_progs_flag=prog_defaults.new_progs_flag;
	lnew_accs_flag=prog_defaults.new_accs_flag;
	lauto_paging=prog_defaults.auto_paging;		
	laccs_paging=prog_defaults.accs_paging;
	lsets_paging=prog_defaults.sets_paging;
	lother_paging=prog_defaults.other_paging;
	strcpy(lauto_path,prog_defaults.auto_path);
	strcpy(laccs_path,prog_defaults.accs_path);

	options_y=prog_defaults.rows-3-17+1;
	if(options_y>0)
		options_y=options_y/2;
	else
		options_y=1;

	options_x=prog_defaults.columns-40;
	if(options_x>0)
		options_x=options_x/2;
	else
		options_x=0;

	CLEAR_SCREEN

	centre_text("OPTIONS",line_buffer,prog_defaults.columns,' ');
	
	DISCARD_EOL
	AT(title_row,0)
	REVERSE_VIDEO
	printf("%s",line_buffer);
	NORMAL_VIDEO

	options_bottom_title();

	AT(options_y,options_x+1)
	printf("New Progs");
	print_on_off_same(options_y,options_x+1,lnew_progs_flag);
	AT(options_y+1,options_x+1)
	printf("New Accs");
	print_on_off_same(options_y+1,options_x+1,lnew_accs_flag);

	AT(options_y+3,options_x+1)
	printf("Auto Window");
	print_paging_scrolling(options_y+3,options_x+1,lauto_paging);
	AT(options_y+4,options_x+1)
	printf("Accs Window");
	print_paging_scrolling(options_y+4,options_x+1,laccs_paging);
	AT(options_y+5,options_x+1)
	printf("Sets Window");
	print_paging_scrolling(options_y+5,options_x+1,lsets_paging);
	AT(options_y+6,options_x+1)
	printf("Other Window");
	print_paging_scrolling(options_y+6,options_x+1,lother_paging);

	remove_extension(lauto_path);
	AT(options_y+8,options_x+1)
	printf("Auto Path");
/*	AT(options_y+9,options_x+1)
	printf("--------------------------------------");
*/	AT(options_y+9,options_x+1)
	printf("%s",lauto_path);

	remove_extension(laccs_path);
	AT(options_y+11,options_x+1)
	printf("Accs Path");
/*	AT(options_y+12,options_x+1)
	printf("--------------------------------------");
*/	AT(options_y+12,options_x+1)
	printf("%s",laccs_path);

	AT(options_y+14,options_x+1)
	printf("<Set Keys>");

	AT(options_y+14,options_x+22)
	printf("<Resolution Info>");
	
	AT(options_y+16,options_x+3)
	printf("[   OK   ]");

	AT(options_y+16,options_x+15)
	printf("[ CANCEL ]");

	AT(options_y+16,options_x+27)
	printf("[  SAVE  ]");

	upk=0;lowk=0;exit_options=0;
	print_option(options_y,options_x,old_option, curr_option);

	while(exit_options==0)
	{
			while(!Bconstat(2))
			;

			key=Bconin(2);
			upk=(int)(key>>16);
			lowk=(int)(key%256);

			if(upk==0x48 )	/* up arrow */
			{
				old_option=curr_option;
				switch(curr_option)
				{
				case 11:
				case 10:
						curr_option=8;
						break;
				case 12:
						curr_option=9;
						break;
				case 9:
						curr_option=7;
						break;
				default:
						curr_option=curr_option-1;
						break;
				}

				if(curr_option<0)
					curr_option=MAX_OPTIONS;
				print_option(options_y,options_x,old_option, curr_option);
			}
			else
			{
			if(upk==0x50)	/* down arrow */
				{
					old_option=curr_option;

					switch(curr_option)
					{
					case 8: 
							curr_option=10;
							break;
					case 9:
							curr_option =12;
							break;
					case 10:
					case 11:
					case 12:
							curr_option=0;
							break;		
					default:
							curr_option=curr_option+1;
							break;
					}
					if(curr_option>MAX_OPTIONS)
						curr_option=0;
					print_option(options_y,options_x,old_option, curr_option);
				}	
				else
				{	
					if(upk==0x4D)	/* right arrow */
					{
					old_option=curr_option;

						switch(curr_option)
						{
						case 0:
								lnew_progs_flag++;
								if(lnew_progs_flag>3)
									lnew_progs_flag=1;
								print_on_off_same(options_y,options_x+1,lnew_progs_flag);
								break;
						case 1:
								lnew_accs_flag++;
								if(lnew_accs_flag>3)
									lnew_accs_flag=1;
								print_on_off_same(options_y+1,options_x+1,lnew_accs_flag);
								break;
						case 2:
								lauto_paging++;
								if(lauto_paging>SCROLLING)
									lauto_paging=PAGING;
								print_paging_scrolling(options_y+3,options_x+1,lauto_paging);
								break;
						case 3:
								laccs_paging++;
								if(laccs_paging>SCROLLING)
									laccs_paging=PAGING;
								print_paging_scrolling(options_y+4,options_x+1,laccs_paging);
								break;
						case 4:
								lsets_paging++;
								if(lsets_paging>SCROLLING)
									lsets_paging=PAGING;
								print_paging_scrolling(options_y+5,options_x+1,lsets_paging);
								break;
						case 5:
								lother_paging++;
								if(lother_paging>SCROLLING)
									lother_paging=PAGING;
								print_paging_scrolling(options_y+6,options_x+1,lother_paging);
								break;

						case 8:
						case 10:
						case 11:
								curr_option++;
								break;
						}
						if(curr_option>MAX_OPTIONS)
							curr_option=0;
						print_option(options_y,options_x,old_option, curr_option);
					}	
					else
						{
							if(upk==0x4B)	/* left arrow */
							{
								old_option=curr_option;

								switch(curr_option)
								{
								case 0:
										lnew_progs_flag--;
										if(lnew_progs_flag<1)
											lnew_progs_flag=3;
										print_on_off_same(options_y,options_x+1,lnew_progs_flag);
										break;
								case 1:
										lnew_accs_flag--;
										if(lnew_accs_flag<1)
											lnew_accs_flag=3;
										print_on_off_same(options_y+1,options_x+1,lnew_accs_flag);
										break;
								case 2:
										lauto_paging--;
										if(lauto_paging<PAGING)
											lauto_paging=SCROLLING;
										print_paging_scrolling(options_y+3,options_x+1,lauto_paging);
										break;
								case 3:
										laccs_paging--;
										if(laccs_paging<PAGING)
											laccs_paging=SCROLLING;
										print_paging_scrolling(options_y+4,options_x+1,laccs_paging);
										break;
								case 4:
										lsets_paging--;
										if(lsets_paging<PAGING)
											lsets_paging=SCROLLING;
										print_paging_scrolling(options_y+5,options_x+1,lsets_paging);
										break;
								case 5:
										lother_paging--;
										if(lother_paging<PAGING)
											lother_paging=SCROLLING;
										print_paging_scrolling(options_y+6,options_x+1,lother_paging);
										break;
								case 9:
								case 11:
								case 12:
								curr_option--;
								break;
								}
								if(curr_option<0)
									curr_option=MAX_OPTIONS;
								print_option(options_y,options_x,old_option, curr_option);
							}
							else
							{
							if(upk==0x1C) /* return */
							{
								switch(curr_option)
								{
								case 6:
										edit_at(options_y+9,options_x+1,FILENAME_MAX,37,lauto_path,'-',0,valid_filename_char,1);
										options_bottom_title();
										break;
								case 7:
										edit_at(options_y+12,options_x+1,FILENAME_MAX,37,laccs_path,'-',0,valid_filename_char,1);
										options_bottom_title();
										break;
								case 8:
										/* set keys */
										break;
								case 9:
										/* set res stuff */
										break;
								case 10:
										/* set defaults */
										prog_defaults.new_progs_flag=lnew_progs_flag;
										prog_defaults.new_accs_flag=lnew_accs_flag;
										prog_defaults.auto_paging=lauto_paging;		
										prog_defaults.accs_paging=laccs_paging;
										prog_defaults.sets_paging=lsets_paging;
										prog_defaults.other_paging=lother_paging;
										strcpy(prog_defaults.auto_path,lauto_path);
										strcpy(prog_defaults.accs_path,laccs_path);
										exit_options=1;	
										break;
								case 11:
										exit_options=1;	
										break;
								case 12:
										/* save defaults */
										break;
								}
							}
							else
							{
								if(upk==prog_defaults.toggle_key_high)
								{
									switch(curr_option)
									{
									case 0:
										lnew_progs_flag++;
										if(lnew_progs_flag>3)
											lnew_progs_flag=1;
										print_on_off_same(options_y,options_x+1,lnew_progs_flag);
										break;
									case 1:
											lnew_accs_flag++;
											if(lnew_accs_flag>3)
											lnew_accs_flag=1;
											print_on_off_same(options_y+1,options_x+1,lnew_accs_flag);
											break;
									case 2:
											lauto_paging++;
											if(lauto_paging>SCROLLING)
												lauto_paging=PAGING;
											print_paging_scrolling(options_y+3,options_x+1,lauto_paging);
											break;
									case 3:
											laccs_paging++;
											if(laccs_paging>SCROLLING)
												laccs_paging=PAGING;
											print_paging_scrolling(options_y+4,options_x+1,laccs_paging);
											break;
									case 4:
											lsets_paging++;
											if(lsets_paging>SCROLLING)
												lsets_paging=PAGING;
											print_paging_scrolling(options_y+5,options_x+1,lsets_paging);
											break;
									case 5:
											lother_paging++;
											if(lother_paging>SCROLLING)
												lother_paging=PAGING;
											print_paging_scrolling(options_y+6,options_x+1,lother_paging);
											break;
									}
								}
							}

							}	
						}				
				}
			}

	}

}
/**
 * This function reads a file that contains a list of Elag grammar names,
 * and it compiles them into the file 'outname'. However, if the result
 * automaton is too big, it will be saved in several automata inside
 * the output file.
 */
int compile_elag_rules(char* rulesname,char* outname, const VersatileEncodingConfig* vec,language_t* language) {
u_printf("Compilation of %s\n",rulesname);
U_FILE* f=NULL;
U_FILE* frules=u_fopen(ASCII,rulesname,U_READ);
if (frules==NULL) {
   fatal_error("Cannot open file '%s'\n",rulesname);
}
U_FILE* out=u_fopen(ASCII,outname,U_WRITE);
if (out==NULL) {
   fatal_error("cannot open file '%s'\n",outname);
}
/* Name of the file that contains the result automaton */
char fstoutname[FILENAME_MAX];
int nbRules=0;
char buf[FILENAME_MAX];
time_t start_time=time(0);
Fst2Automaton* res=NULL;
Fst2Automaton* A;
int fst_number=0;
Ustring* ustr=new_Ustring();

char buf2[FILENAME_MAX];
char directory[FILENAME_MAX];
get_path(rulesname,directory);

while (af_fgets(buf,FILENAME_MAX,frules->f)) {
   /* We read one by one the Elag grammar names in the .lst file */
   chomp(buf);
   if (*buf=='\0') {
      /* If we have an empty line */
      continue;
   }
   if (!is_absolute_path(buf)) {
      strcpy(buf2,buf);
      sprintf(buf,"%s%s",directory,buf2);
   }

   u_printf("\n%s...\n",buf);
   remove_extension(buf);
   strcat(buf,".elg");
   if ((f=u_fopen(ASCII,buf,U_READ))==NULL) {
      /* If the .elg file doesn't exist, we create one */
      remove_extension(buf);
      u_printf("Precompiling %s.fst2\n",buf);
      strcat(buf,".fst2");
      elRule* rule=new_elRule(buf,vec,language);
      if (rule==NULL) {
         fatal_error("Unable to read grammar '%s'\n",buf);
      }
      if ((A=compile_elag_rule(rule,language))==NULL) {
         fatal_error("Unable to compile rule '%s'\n",buf);
      }
      free_elRule(rule);
   } else {
      /* If there is already .elg, we use it */
      u_fclose(f);
      A=load_elag_grammar_automaton(vec,buf,language);
      if (A==NULL) {
         fatal_error("Unable to load '%s'\n",buf);
      }
   }
   if (A->automaton->number_of_states==0) {
      error("Grammar %s forbids everything!\n",buf);
   }
   if (res!=NULL) {
      /* If there is already an automaton, we intersect it with the new one */
      SingleGraph tmp=res->automaton;
      res->automaton=elag_intersection(language,tmp,A->automaton,GRAMMAR_GRAMMAR);
      free_SingleGraph(tmp,NULL);
      free_Fst2Automaton(A,NULL);
      trim(res->automaton,NULL);
   } else {
      res=A;
   }
   nbRules++;
   if (res->automaton->number_of_states>MAX_GRAM_SIZE) {
      /* If the automaton is too large, we will split the grammar
       * into several automata */
      elag_minimize(res->automaton,1);
      sprintf(fstoutname,"%s-%d.elg",outname,fst_number++);
      u_fprintf(out,"<%s>\n",fstoutname);
      u_printf("Splitting big grammar in '%s' (%d states)\n",fstoutname,res->automaton->number_of_states);
      u_sprintf(ustr,"%s: compiled elag grammar",fstoutname);
      free(res->name);
      res->name=u_strdup(ustr->str);
      save_automaton(res,fstoutname,vec,FST_GRAMMAR);
      free_Fst2Automaton(res,NULL);
      res=NULL;
   }
}
if (res!=NULL) {
   /* We save the last automaton, if any */
   sprintf(fstoutname,"%s-%d.elg",outname,fst_number++);
   u_fprintf(out,"<%s>\n",fstoutname);
   u_printf("Saving grammar in '%s'(%d states)\n",fstoutname,res->automaton->number_of_states);
   elag_minimize(res->automaton,1);
   u_sprintf(ustr,"%s: compiled elag grammar",fstoutname);
   free(res->name);
   res->name=u_strdup(ustr->str);
   save_automaton(res,fstoutname,vec,FST_GRAMMAR);
   free_Fst2Automaton(res,free_symbol);
}
time_t end_time=time(0);
u_fclose(frules);
u_fclose(out);
free_Ustring(ustr);
u_printf("\nDone.\nElapsed time: %.0f s\n",difftime(end_time,start_time));
u_printf("\n%d rule%s from %s compiled in %s (%d automat%s)\n",
         nbRules,(nbRules>1)?"s":"",rulesname,outname,fst_number,
         (fst_number>1)?"a":"on");
return 0;
}
Beispiel #25
0
/**
 * The same than main, but no call to setBufferMode.
 */
int main_Fst2Check(int argc,char* const argv[]) {
if (argc==1) {
   usage();
   return 0;
}

int check_recursion=0,tfst_check=0;
int append_output=0;
int display_statistics=0;
char no_empty_graph_warning=0;
char output[FILENAME_MAX]="";

Encoding encoding_output = DEFAULT_ENCODING_OUTPUT;
int bom_output = DEFAULT_BOM_OUTPUT;
int mask_encoding_compatibility_input = DEFAULT_MASK_ENCODING_COMPATIBILITY_INPUT;

int val,index=-1;
struct OptVars* vars=new_OptVars();
while (EOF!=(val=getopt_long_TS(argc,argv,optstring_Fst2Check,lopts_Fst2Check,&index,vars))) {
   switch(val) {
   case 'a': append_output=1; break;
   case 'y': check_recursion=1; break;
   case 'n': check_recursion=0; break;
   case 's': display_statistics=1; break;
   case 't': tfst_check=1;
             /* If we have a tfst sentence graph, we must not report
              * compilation failure in the case of an empty graph. It
              * may be because of a sentence graph previously emptied by ELAG */
             no_empty_graph_warning=1;
             break;
   case 'e': no_empty_graph_warning=1; break;
   case 'h': usage(); free_OptVars(vars); return 0;
   case ':': if (index==-1) fatal_error("Missing argument for option -%c\n",vars->optopt);
             else fatal_error("Missing argument for option --%s\n",lopts_Fst2Check[index].name);
   case 'k': if (vars->optarg[0]=='\0') {
                fatal_error("Empty input_encoding argument\n");
             }
             decode_reading_encoding_parameter(&mask_encoding_compatibility_input,vars->optarg);
             break;
   case 'q': if (vars->optarg[0]=='\0') {
                fatal_error("Empty output_encoding argument\n");
             }
             decode_writing_encoding_parameter(&encoding_output,&bom_output,vars->optarg);
             break;
   case 'o': if (vars->optarg[0]=='\0') {
                   fatal_error("You must specify a non empty output file name\n");
                }
                strcpy(output,vars->optarg);
                break;
   case '?': if (index==-1) fatal_error("Invalid option -%c\n",vars->optopt);
             else fatal_error("Invalid option --%s\n",vars->optarg);
             break;
   }
   index=-1;
}

if (vars->optind!=argc-1) {
   free_OptVars(vars);
   error("Invalid arguments: rerun with --help\n");
   return 1;
}

char fst2_file_name[FILENAME_MAX];
remove_extension(argv[vars->optind],fst2_file_name);
strcpy(fst2_file_name,argv[vars->optind]);
U_FILE* ferr=NULL;
free_OptVars(vars);

if (output[0]!=0)
{
  if (append_output == 0) {
      ferr=u_fopen_creating_versatile_encoding(encoding_output,bom_output, output, U_WRITE);
  }
  else {
      ferr = u_fopen_versatile_encoding(encoding_output,bom_output,mask_encoding_compatibility_input | ALL_ENCODING_BOM_POSSIBLE,output,U_APPEND);
  }  
}

if (display_statistics) {
    display_fst2_file_stat(fst2_file_name,ferr);
}


if (check_recursion) {
   if (!OK_for_Locate_write_error(fst2_file_name,no_empty_graph_warning,ferr)) {
      if (ferr != NULL) {
          u_fclose(ferr);
      }
      return 1;
   }
}
if (tfst_check) {
   if (!valid_sentence_automaton_write_error(fst2_file_name,ferr)) {
      if (ferr != NULL) {
          u_fclose(ferr);
      }
      return 1;
   }
}

if (ferr != NULL) {
    u_fclose(ferr);
}
if ((check_recursion) || (tfst_check)) {
  u_printf("%s fst2 check has succeeded\n",fst2_file_name);
}
return 0;
}