/**
 * Loads a match list. Match lists are supposed to have been
 * generated by the Locate program.
 */
struct match_list* load_match_list(U_FILE* f,OutputPolicy *output_policy,unichar *header,Abstract_allocator prv_alloc) {
struct match_list* l=NULL;
struct match_list* end_of_list=NULL;
int start,end,start_char,end_char,start_letter,end_letter;
Ustring* line=new_Ustring();
char is_an_output;
/* We read the header */
unichar foo=0;
if (header==NULL) {
  header=&foo;
}
u_fscanf(f,"#%C\n",header);
OutputPolicy policy;
switch(*header) {
   case 'D': {
     policy=DEBUG_OUTPUTS;
     /* In debug mode, we have to skip the debug header */
     int n_graphs;
     u_fscanf(f,"%d\n",&n_graphs);
     while ((n_graphs--)>-1) {
       /* -1, because we also have to skip the #[IMR] line */
       readline(line,f);
     }
     break;
   }
   case 'M': policy=MERGE_OUTPUTS; break;
   case 'R':
   case 'T':
   case 'X': policy=REPLACE_OUTPUTS; break;
   case 'I':
   default: policy=IGNORE_OUTPUTS; break;
}
if (output_policy!=NULL) {
   (*output_policy)=policy;
}
while (6==u_fscanf(f,"%d.%d.%d %d.%d.%d",&start,&start_char,&start_letter,&end,&end_char,&end_letter)) {
   /* We look if there is an output or not, i.e. a space or a new line */
   int c=u_fgetc(f);
   if (c==' ') {
      /* If we have an output to read */
    readline(line,f);
    /* In debug mode, we have to stop at the char #1 */
      int i=-1;
      while (line->str[++i]!=1 && line->str[i]!='\0') {
    }
      line->str[i]='\0';
   }
   is_an_output=(policy!=IGNORE_OUTPUTS);
   if (l==NULL) {
      l=new_match(start,end,start_char,end_char,start_letter,end_letter,is_an_output?line->str:NULL,-1,NULL,prv_alloc);
      end_of_list=l;
   } else {
      end_of_list->next=new_match(start,end,start_char,end_char,start_letter,end_letter,is_an_output?line->str:NULL,-1,NULL,prv_alloc);
      end_of_list=end_of_list->next;
   }
}
free_Ustring(line);
return l;
}
Beispiel #2
0
static int read_number(lua_State *L, UFILE *ufile) {
	lua_Number d;
	if (u_fscanf(ufile, LUA_NUMBER_SCAN, &d) == 1) {
		lua_pushnumber(L, d);
		return 1;
	}
	else return 0;  /* read fails */
}
Beispiel #3
0
/**
 * Loads the given offset file. Returns NULL in case of error.
 */
vector_offset* load_offsets(const VersatileEncodingConfig* vec,const char* name) {
U_FILE* f=u_fopen(vec,name,U_READ);
if (f==NULL) return NULL;
int a,b,c,d,n;
vector_offset* res=new_vector_offset();
while ((n=u_fscanf(f,"%d%d%d%d",&a,&b,&c,&d))!=EOF) {
	if (n!=4) {
		fatal_error("Corrupted offset file %s\n",name);
	}
	vector_offset_add(res,a,b,c,d);
}
u_fclose(f);
return res;
}
int main_SpellCheck(int argc,char* const argv[]) {
if (argc==1) {
    usage();
    return SUCCESS_RETURN_CODE;
}

VersatileEncodingConfig vec=VEC_DEFAULT;
int val,index=-1;
char mode=0;
char snt[FILENAME_MAX]="";
char txt[FILENAME_MAX]="";
char output[FILENAME_MAX]="";
char output_set=0;
char output_op='A';
SpellCheckConfig config;
config.max_errors=1;
config.max_SP_INSERT=1;
config.max_SP_SUPPR=1;
config.max_SP_SWAP=1;
config.max_SP_CHANGE=1;
for (int i=0;i<N_SPSubOp;i++) {
    config.score[i]=default_scores[i];
}
config.min_length1=4;
config.min_length2=6;
config.min_length3=12;
config.input_op='D';
config.keyboard=NULL;
config.allow_uppercase_initial=0;
char foo;
bool only_verify_arguments = false;
UnitexGetOpt options;
while (EOF!=(val=options.parse_long(argc,argv,optstring_SpellCheck,lopts_SpellCheck,&index))) {
   switch(val) {
   case 's': {
       strcpy(snt,options.vars()->optarg);
       mode='s';
       break;
   }
   case 'f': {
       strcpy(txt,options.vars()->optarg);
       mode='f';
       break;
   }
   case 'o': {
       if (options.vars()->optarg!=NULL) {
           strcpy(output,options.vars()->optarg);
       }
       output_set=1;
       break;
   }
   case 'I': {
       if (!strcmp(options.vars()->optarg,"D") || !strcmp(options.vars()->optarg,"M") || !strcmp(options.vars()->optarg,"U")) {
           config.input_op=options.vars()->optarg[0];
       } else {
       error("Invalid argument %s for option --input-op: should in [DMU]\n",options.vars()->optarg);
       return USAGE_ERROR_CODE;
       }
       break;
   }
   case 'O': {
       if (!strcmp(options.vars()->optarg,"O") || !strcmp(options.vars()->optarg,"A")) {
           output_op=options.vars()->optarg[0];
       } else {
           error("Invalid argument %s for option --output-op: should in [OA]\n",options.vars()->optarg);
       return USAGE_ERROR_CODE;
       }
       break;
   }
   case 1: {
       config.keyboard=get_Keyboard(options.vars()->optarg);
       if (config.keyboard==NULL) {
           error("Invalid argument %s for option --keyboard:\nUse --show-keyboards to see possible values\n",options.vars()->optarg);
       return USAGE_ERROR_CODE;
       }
       break;
   }
   case 2: {
       print_available_keyboards(U_STDOUT);
       return SUCCESS_RETURN_CODE;
   }
   case 10: {
       if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_errors,&foo)) {
           error("Invalid argument %s for --max-errors: should be an integer >=0\n",options.vars()->optarg);
       return USAGE_ERROR_CODE;
       }
       break;
   }
   case 11: {
       if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_INSERT,&foo)) {
           error("Invalid argument %s for --max-insert: should be an integer >=0\n",options.vars()->optarg);
       return USAGE_ERROR_CODE;
       }
       break;
   }
   case 12: {
       if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_SUPPR,&foo)) {
           error("Invalid argument %s for --max-suppr: should be an integer >=0\n",options.vars()->optarg);
       return USAGE_ERROR_CODE;
       }
       break;
   }
   case 13: {
       if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_CHANGE,&foo)) {
           error("Invalid argument %s for --max-change: should be an integer >=0\n",options.vars()->optarg);
       return USAGE_ERROR_CODE;
       }
       break;
   }
   case 14: {
       if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_SWAP,&foo)) {
           error("Invalid argument %s for --max-swap: should be an integer >=0\n",options.vars()->optarg);
       return USAGE_ERROR_CODE;
       }
       break;
   }
   case 20: {
       int* scores=config.score;
       if (N_SPSubOp!=sscanf(options.vars()->optarg,"%d,%d,%d,%d,%d,%d,%d,%d,%d%c",
            scores,scores+1,scores+2,scores+3,scores+4,scores+5,
            scores+6,scores+7,scores+8,&foo)) {
            error("Invalid argument %s for option --scores. See --help-scores\n",options.vars()->optarg);
        return USAGE_ERROR_CODE;
       }
       break;
   }
   case 21: {
       usage_scores();
       return SUCCESS_RETURN_CODE;
   }
   case 22: {
       if (3!=sscanf(options.vars()->optarg,"%u,%u,%u%c",
            &config.min_length1,&config.min_length2,&config.min_length3,&foo)) {
            error("Invalid argument %s for option --min-lengths\n",options.vars()->optarg);
        return USAGE_ERROR_CODE;
       }
       break;
   }
   case 23: {
       if (!strcmp(options.vars()->optarg,"yes")) {
           config.allow_uppercase_initial=1;
       } else if (!strcmp(options.vars()->optarg,"no")) {
           config.allow_uppercase_initial=0;
       } else {
           error("Invalid argument %s for option --upper-initial\n",options.vars()->optarg);
       return USAGE_ERROR_CODE;
       }
       break;
   }
   case 'k': if (options.vars()->optarg[0]=='\0') {
                error("Empty input_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg);
             break;
   case 'q': if (options.vars()->optarg[0]=='\0') {
                error("Empty output_encoding argument\n");
                return USAGE_ERROR_CODE;
             }
             decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg);
             break;
   case 'V': only_verify_arguments = true;
             break;
   case 'h': usage();
             return SUCCESS_RETURN_CODE;
   case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) :
                         error("Missing argument for option --%s\n",lopts_SpellCheck[index].name);
             return USAGE_ERROR_CODE;
   case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) :
                         error("Invalid option --%s\n",options.vars()->optarg);
             return USAGE_ERROR_CODE;
   }
   index=-1;
}

if (options.vars()->optind==argc) {
   error("Invalid arguments: rerun with --help\n");
   return USAGE_ERROR_CODE;
}

if (mode==0) {
  error("You must use either --snt or --file\n");
  return USAGE_ERROR_CODE;
}

if (only_verify_arguments) {
  // freeing all allocated memory
  return SUCCESS_RETURN_CODE;
}

config.n_dics=argc-options.vars()->optind;
config.dics=(Dictionary**)malloc(config.n_dics*sizeof(Dictionary*));
if (config.dics==NULL) {
    alloc_error("main_SpellCheck");
  return ALLOC_ERROR_CODE;
}

for (int i=0;i<config.n_dics;i++) {
    config.dics[i]=new_Dictionary(&vec,argv[i+options.vars()->optind]);
    if (config.dics[i]==NULL) {
        error("Cannot load dictionary %s\n",argv[i+options.vars()->optind]);
    }
}

config.out=U_STDOUT;
config.n_input_lines=0;
config.n_output_lines=0;

if (mode=='s') {
    /* When working with a .snt, we actually want to work on its err file */
    get_snt_path(snt,txt);
    strcat(txt,"err");
    /* the output must be dlf, and we note the number of lines in the existing
     * dlf file, if any */
    get_snt_path(snt,output);
    strcat(output,"dlf.n");
    U_FILE* f=u_fopen(&vec,output,U_READ);
    if (f!=NULL) {
        u_fscanf(f,"%d",&(config.n_output_lines));
        u_fclose(f);
    }
    get_snt_path(snt,output);
    strcat(output,"dlf");
    output_set=1;
    /* and we force the values for -I and -O */
    config.input_op='U';
    output_op='A';
} else {
    /* If mode=='f', we don't have anything to do since we already
     * defined the default output to stdout */
}

if (output_set) {
    if (output_op=='O') {
        config.out=u_fopen(&vec,output,U_WRITE);
    } else {
        config.out=u_fopen(&vec,output,U_APPEND);
    }
    if (config.out==NULL) {
        error("Cannot open output file %s\n",output);
    for (int i=0;i<config.n_dics;i++) {
      free_Dictionary(config.dics[i]);
    }
    free(config.dics);
    return DEFAULT_ERROR_CODE;
    }
}

config.modified_input=NULL;
char modified_input[FILENAME_MAX]="";
if (config.input_op!='D') {
    strcpy(modified_input,txt);
    strcat(modified_input,".tmp");
    config.modified_input=u_fopen(&vec,modified_input,U_WRITE);
    if (config.modified_input==NULL) {
        error("Cannot open tmp file %s\n",modified_input);
    if (config.out!=U_STDOUT) {
      u_fclose(config.out);
    }
    for (int i=0;i<config.n_dics;i++) {
      free_Dictionary(config.dics[i]);
    }
    free(config.dics);
    return DEFAULT_ERROR_CODE;
    }
}

config.in=u_fopen(&vec,txt,U_READ);
if (config.in==NULL) {
    error("Cannot open file %s\n",txt);
  u_fclose(config.modified_input);
  if (config.out!=U_STDOUT) {
    u_fclose(config.out);
  }
  for (int i=0;i<config.n_dics;i++) {
    free_Dictionary(config.dics[i]);
  }
  free(config.dics);
  return DEFAULT_ERROR_CODE;
}

/* We perform spellchecking */
spellcheck(&config);

/* And we clean */

u_fclose(config.in);

if (config.modified_input!=NULL) {
  /* If we used a tmp file because the input file has to be modified,
   * it's now time to actually modify it */
  u_fclose(config.modified_input);
  af_remove(txt);
  af_rename(modified_input,txt);
}

if (config.out!=U_STDOUT) {
  u_fclose(config.out);
}

for (int i=0;i<config.n_dics;i++) {
  free_Dictionary(config.dics[i]);
}
free(config.dics);

/* Finally, we update the dlf.n and err.n files if mode=='s' */
if (mode=='s') {
    get_snt_path(snt,output);
    strcat(output,"err.n");
    U_FILE* f=u_fopen(&vec,output,U_WRITE);
    if (f!=NULL) {
        u_fprintf(f,"%d",config.n_input_lines);
        u_fclose(f);
    }
    if (config.input_op!='D') {
        get_snt_path(snt,output);
        strcat(output,"dlf.n");
        U_FILE* fw=u_fopen(&vec,output,U_WRITE);
        if (fw!=NULL) {
            u_fprintf(fw,"%d",config.n_output_lines);
            u_fclose(fw);
        }
    }
}

return SUCCESS_RETURN_CODE;
}