/** * Loads a match list. Match lists are supposed to have been * generated by the Locate program. */ struct match_list* load_match_list(U_FILE* f,OutputPolicy *output_policy,unichar *header,Abstract_allocator prv_alloc) { struct match_list* l=NULL; struct match_list* end_of_list=NULL; int start,end,start_char,end_char,start_letter,end_letter; Ustring* line=new_Ustring(); char is_an_output; /* We read the header */ unichar foo=0; if (header==NULL) { header=&foo; } u_fscanf(f,"#%C\n",header); OutputPolicy policy; switch(*header) { case 'D': { policy=DEBUG_OUTPUTS; /* In debug mode, we have to skip the debug header */ int n_graphs; u_fscanf(f,"%d\n",&n_graphs); while ((n_graphs--)>-1) { /* -1, because we also have to skip the #[IMR] line */ readline(line,f); } break; } case 'M': policy=MERGE_OUTPUTS; break; case 'R': case 'T': case 'X': policy=REPLACE_OUTPUTS; break; case 'I': default: policy=IGNORE_OUTPUTS; break; } if (output_policy!=NULL) { (*output_policy)=policy; } while (6==u_fscanf(f,"%d.%d.%d %d.%d.%d",&start,&start_char,&start_letter,&end,&end_char,&end_letter)) { /* We look if there is an output or not, i.e. a space or a new line */ int c=u_fgetc(f); if (c==' ') { /* If we have an output to read */ readline(line,f); /* In debug mode, we have to stop at the char #1 */ int i=-1; while (line->str[++i]!=1 && line->str[i]!='\0') { } line->str[i]='\0'; } is_an_output=(policy!=IGNORE_OUTPUTS); if (l==NULL) { l=new_match(start,end,start_char,end_char,start_letter,end_letter,is_an_output?line->str:NULL,-1,NULL,prv_alloc); end_of_list=l; } else { end_of_list->next=new_match(start,end,start_char,end_char,start_letter,end_letter,is_an_output?line->str:NULL,-1,NULL,prv_alloc); end_of_list=end_of_list->next; } } free_Ustring(line); return l; }
static int read_number(lua_State *L, UFILE *ufile) { lua_Number d; if (u_fscanf(ufile, LUA_NUMBER_SCAN, &d) == 1) { lua_pushnumber(L, d); return 1; } else return 0; /* read fails */ }
/** * Loads the given offset file. Returns NULL in case of error. */ vector_offset* load_offsets(const VersatileEncodingConfig* vec,const char* name) { U_FILE* f=u_fopen(vec,name,U_READ); if (f==NULL) return NULL; int a,b,c,d,n; vector_offset* res=new_vector_offset(); while ((n=u_fscanf(f,"%d%d%d%d",&a,&b,&c,&d))!=EOF) { if (n!=4) { fatal_error("Corrupted offset file %s\n",name); } vector_offset_add(res,a,b,c,d); } u_fclose(f); return res; }
int main_SpellCheck(int argc,char* const argv[]) { if (argc==1) { usage(); return SUCCESS_RETURN_CODE; } VersatileEncodingConfig vec=VEC_DEFAULT; int val,index=-1; char mode=0; char snt[FILENAME_MAX]=""; char txt[FILENAME_MAX]=""; char output[FILENAME_MAX]=""; char output_set=0; char output_op='A'; SpellCheckConfig config; config.max_errors=1; config.max_SP_INSERT=1; config.max_SP_SUPPR=1; config.max_SP_SWAP=1; config.max_SP_CHANGE=1; for (int i=0;i<N_SPSubOp;i++) { config.score[i]=default_scores[i]; } config.min_length1=4; config.min_length2=6; config.min_length3=12; config.input_op='D'; config.keyboard=NULL; config.allow_uppercase_initial=0; char foo; bool only_verify_arguments = false; UnitexGetOpt options; while (EOF!=(val=options.parse_long(argc,argv,optstring_SpellCheck,lopts_SpellCheck,&index))) { switch(val) { case 's': { strcpy(snt,options.vars()->optarg); mode='s'; break; } case 'f': { strcpy(txt,options.vars()->optarg); mode='f'; break; } case 'o': { if (options.vars()->optarg!=NULL) { strcpy(output,options.vars()->optarg); } output_set=1; break; } case 'I': { if (!strcmp(options.vars()->optarg,"D") || !strcmp(options.vars()->optarg,"M") || !strcmp(options.vars()->optarg,"U")) { config.input_op=options.vars()->optarg[0]; } else { error("Invalid argument %s for option --input-op: should in [DMU]\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 'O': { if (!strcmp(options.vars()->optarg,"O") || !strcmp(options.vars()->optarg,"A")) { output_op=options.vars()->optarg[0]; } else { error("Invalid argument %s for option --output-op: should in [OA]\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 1: { config.keyboard=get_Keyboard(options.vars()->optarg); if (config.keyboard==NULL) { error("Invalid argument %s for option --keyboard:\nUse --show-keyboards to see possible values\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 2: { print_available_keyboards(U_STDOUT); return SUCCESS_RETURN_CODE; } case 10: { if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_errors,&foo)) { error("Invalid argument %s for --max-errors: should be an integer >=0\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 11: { if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_INSERT,&foo)) { error("Invalid argument %s for --max-insert: should be an integer >=0\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 12: { if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_SUPPR,&foo)) { error("Invalid argument %s for --max-suppr: should be an integer >=0\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 13: { if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_CHANGE,&foo)) { error("Invalid argument %s for --max-change: should be an integer >=0\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 14: { if (1!=sscanf(options.vars()->optarg,"%u%c",&config.max_SP_SWAP,&foo)) { error("Invalid argument %s for --max-swap: should be an integer >=0\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 20: { int* scores=config.score; if (N_SPSubOp!=sscanf(options.vars()->optarg,"%d,%d,%d,%d,%d,%d,%d,%d,%d%c", scores,scores+1,scores+2,scores+3,scores+4,scores+5, scores+6,scores+7,scores+8,&foo)) { error("Invalid argument %s for option --scores. See --help-scores\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 21: { usage_scores(); return SUCCESS_RETURN_CODE; } case 22: { if (3!=sscanf(options.vars()->optarg,"%u,%u,%u%c", &config.min_length1,&config.min_length2,&config.min_length3,&foo)) { error("Invalid argument %s for option --min-lengths\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 23: { if (!strcmp(options.vars()->optarg,"yes")) { config.allow_uppercase_initial=1; } else if (!strcmp(options.vars()->optarg,"no")) { config.allow_uppercase_initial=0; } else { error("Invalid argument %s for option --upper-initial\n",options.vars()->optarg); return USAGE_ERROR_CODE; } break; } case 'k': if (options.vars()->optarg[0]=='\0') { error("Empty input_encoding argument\n"); return USAGE_ERROR_CODE; } decode_reading_encoding_parameter(&(vec.mask_encoding_compatibility_input),options.vars()->optarg); break; case 'q': if (options.vars()->optarg[0]=='\0') { error("Empty output_encoding argument\n"); return USAGE_ERROR_CODE; } decode_writing_encoding_parameter(&(vec.encoding_output),&(vec.bom_output),options.vars()->optarg); break; case 'V': only_verify_arguments = true; break; case 'h': usage(); return SUCCESS_RETURN_CODE; case ':': index==-1 ? error("Missing argument for option -%c\n",options.vars()->optopt) : error("Missing argument for option --%s\n",lopts_SpellCheck[index].name); return USAGE_ERROR_CODE; case '?': index==-1 ? error("Invalid option -%c\n",options.vars()->optopt) : error("Invalid option --%s\n",options.vars()->optarg); return USAGE_ERROR_CODE; } index=-1; } if (options.vars()->optind==argc) { error("Invalid arguments: rerun with --help\n"); return USAGE_ERROR_CODE; } if (mode==0) { error("You must use either --snt or --file\n"); return USAGE_ERROR_CODE; } if (only_verify_arguments) { // freeing all allocated memory return SUCCESS_RETURN_CODE; } config.n_dics=argc-options.vars()->optind; config.dics=(Dictionary**)malloc(config.n_dics*sizeof(Dictionary*)); if (config.dics==NULL) { alloc_error("main_SpellCheck"); return ALLOC_ERROR_CODE; } for (int i=0;i<config.n_dics;i++) { config.dics[i]=new_Dictionary(&vec,argv[i+options.vars()->optind]); if (config.dics[i]==NULL) { error("Cannot load dictionary %s\n",argv[i+options.vars()->optind]); } } config.out=U_STDOUT; config.n_input_lines=0; config.n_output_lines=0; if (mode=='s') { /* When working with a .snt, we actually want to work on its err file */ get_snt_path(snt,txt); strcat(txt,"err"); /* the output must be dlf, and we note the number of lines in the existing * dlf file, if any */ get_snt_path(snt,output); strcat(output,"dlf.n"); U_FILE* f=u_fopen(&vec,output,U_READ); if (f!=NULL) { u_fscanf(f,"%d",&(config.n_output_lines)); u_fclose(f); } get_snt_path(snt,output); strcat(output,"dlf"); output_set=1; /* and we force the values for -I and -O */ config.input_op='U'; output_op='A'; } else { /* If mode=='f', we don't have anything to do since we already * defined the default output to stdout */ } if (output_set) { if (output_op=='O') { config.out=u_fopen(&vec,output,U_WRITE); } else { config.out=u_fopen(&vec,output,U_APPEND); } if (config.out==NULL) { error("Cannot open output file %s\n",output); for (int i=0;i<config.n_dics;i++) { free_Dictionary(config.dics[i]); } free(config.dics); return DEFAULT_ERROR_CODE; } } config.modified_input=NULL; char modified_input[FILENAME_MAX]=""; if (config.input_op!='D') { strcpy(modified_input,txt); strcat(modified_input,".tmp"); config.modified_input=u_fopen(&vec,modified_input,U_WRITE); if (config.modified_input==NULL) { error("Cannot open tmp file %s\n",modified_input); if (config.out!=U_STDOUT) { u_fclose(config.out); } for (int i=0;i<config.n_dics;i++) { free_Dictionary(config.dics[i]); } free(config.dics); return DEFAULT_ERROR_CODE; } } config.in=u_fopen(&vec,txt,U_READ); if (config.in==NULL) { error("Cannot open file %s\n",txt); u_fclose(config.modified_input); if (config.out!=U_STDOUT) { u_fclose(config.out); } for (int i=0;i<config.n_dics;i++) { free_Dictionary(config.dics[i]); } free(config.dics); return DEFAULT_ERROR_CODE; } /* We perform spellchecking */ spellcheck(&config); /* And we clean */ u_fclose(config.in); if (config.modified_input!=NULL) { /* If we used a tmp file because the input file has to be modified, * it's now time to actually modify it */ u_fclose(config.modified_input); af_remove(txt); af_rename(modified_input,txt); } if (config.out!=U_STDOUT) { u_fclose(config.out); } for (int i=0;i<config.n_dics;i++) { free_Dictionary(config.dics[i]); } free(config.dics); /* Finally, we update the dlf.n and err.n files if mode=='s' */ if (mode=='s') { get_snt_path(snt,output); strcat(output,"err.n"); U_FILE* f=u_fopen(&vec,output,U_WRITE); if (f!=NULL) { u_fprintf(f,"%d",config.n_input_lines); u_fclose(f); } if (config.input_op!='D') { get_snt_path(snt,output); strcat(output,"dlf.n"); U_FILE* fw=u_fopen(&vec,output,U_WRITE); if (fw!=NULL) { u_fprintf(fw,"%d",config.n_output_lines); u_fclose(fw); } } } return SUCCESS_RETURN_CODE; }