int main(int argc,char **argv) { int i,index; FILE *fp, *fp_restore = NULL; char *save_filename = NULL; char *restore_filename = NULL; for(i=1; i<argc; i++) { if(argv[i][0] != '-') break; ++i; switch(argv[i-1][1]) { case 'l': lower = atof(argv[i]); break; case 'u': upper = atof(argv[i]); break; case 'y': y_lower = atof(argv[i]); ++i; y_upper = atof(argv[i]); y_scaling = 1; break; case 's': save_filename = argv[i]; break; case 'r': restore_filename = argv[i]; break; default: fprintf(stderr,"unknown option\n"); exit_with_help(); } } if(!(upper > lower) || (y_scaling && !(y_upper > y_lower))) { fprintf(stderr,"inconsistent lower/upper specification\n"); exit(1); } if(restore_filename && save_filename) { fprintf(stderr,"cannot use -r and -s simultaneously\n"); exit(1); } if(argc != i+1) exit_with_help(); fp=fopen(argv[i],"r"); if(fp==NULL) { fprintf(stderr,"can't open file %s\n", argv[i]); exit(1); } line = (char *) malloc(max_line_len*sizeof(char)); #define SKIP_TARGET\ while(isspace(*p)) ++p;\ while(!isspace(*p)) ++p; #define SKIP_ELEMENT\ while(*p!=':') ++p;\ ++p;\ while(isspace(*p)) ++p;\ while(*p && !isspace(*p)) ++p; /* assumption: min index of attributes is 1 */ /* pass 1: find out max index of attributes */ max_index = 0; min_index = 1; if(restore_filename) { int idx, c; fp_restore = fopen(restore_filename,"r"); if(fp_restore==NULL) { fprintf(stderr,"can't open file %s\n", restore_filename); exit(1); } c = fgetc(fp_restore); if(c == 'y') { readline(fp_restore); readline(fp_restore); readline(fp_restore); } readline(fp_restore); readline(fp_restore); while(fscanf(fp_restore,"%d %*f %*f\n",&idx) == 1) max_index = max(idx,max_index); rewind(fp_restore); } while(readline(fp)!=NULL) { char *p=line; SKIP_TARGET while(sscanf(p,"%d:%*f",&index)==1) { max_index = max(max_index, index); min_index = min(min_index, index); SKIP_ELEMENT num_nonzeros++; } } if(min_index < 1) fprintf(stderr, "WARNING: minimal feature index is %d, but indices should start from 1\n", min_index); rewind(fp); feature_max = (double *)malloc((max_index+1)* sizeof(double)); feature_min = (double *)malloc((max_index+1)* sizeof(double)); if(feature_max == NULL || feature_min == NULL) { fprintf(stderr,"can't allocate enough memory\n"); exit(1); } for(i=0; i<=max_index; i++) { feature_max[i]=-DBL_MAX; feature_min[i]=DBL_MAX; } /* pass 2: find out min/max value */ while(readline(fp)!=NULL) { char *p=line; int next_index=1; double target; double value; if (sscanf(p,"%lf",&target) != 1) return clean_up(fp_restore, fp, "ERROR: failed to read labels\n"); y_max = max(y_max,target); y_min = min(y_min,target); SKIP_TARGET while(sscanf(p,"%d:%lf",&index,&value)==2) { for(i=next_index; i<index; i++) { feature_max[i]=max(feature_max[i],0); feature_min[i]=min(feature_min[i],0); } feature_max[index]=max(feature_max[index],value); feature_min[index]=min(feature_min[index],value); SKIP_ELEMENT next_index=index+1; } for(i=next_index; i<=max_index; i++) { feature_max[i]=max(feature_max[i],0); feature_min[i]=min(feature_min[i],0); } } rewind(fp); /* pass 2.5: save/restore feature_min/feature_max */ if(restore_filename) { /* fp_restore rewinded in finding max_index */ int idx, c; double fmin, fmax; int next_index = 1; if((c = fgetc(fp_restore)) == 'y') { if(fscanf(fp_restore, "%lf %lf\n", &y_lower, &y_upper) != 2 || fscanf(fp_restore, "%lf %lf\n", &y_min, &y_max) != 2) return clean_up(fp_restore, fp, "ERROR: failed to read scaling parameters\n"); y_scaling = 1; } else ungetc(c, fp_restore); if (fgetc(fp_restore) == 'x') { if(fscanf(fp_restore, "%lf %lf\n", &lower, &upper) != 2) return clean_up(fp_restore, fp, "ERROR: failed to read scaling parameters\n"); while(fscanf(fp_restore,"%d %lf %lf\n",&idx,&fmin,&fmax)==3) { for(i = next_index; i<idx; i++) if(feature_min[i] != feature_max[i]) fprintf(stderr, "WARNING: feature index %d appeared in file %s was not seen in the scaling factor file %s.\n", i, argv[argc-1], restore_filename); feature_min[idx] = fmin; feature_max[idx] = fmax; next_index = idx + 1; } for(i=next_index; i<=max_index; i++) if(feature_min[i] != feature_max[i]) fprintf(stderr, "WARNING: feature index %d appeared in file %s was not seen in the scaling factor file %s.\n", i, argv[argc-1], restore_filename); } fclose(fp_restore); } if(save_filename) { FILE *fp_save = fopen(save_filename,"w"); if(fp_save==NULL) { fprintf(stderr,"can't open file %s\n", save_filename); exit(1); } if(y_scaling) { fprintf(fp_save, "y\n"); fprintf(fp_save, "%.16g %.16g\n", y_lower, y_upper); fprintf(fp_save, "%.16g %.16g\n", y_min, y_max); } fprintf(fp_save, "x\n"); fprintf(fp_save, "%.16g %.16g\n", lower, upper); for(i=1; i<=max_index; i++) { if(feature_min[i]!=feature_max[i]) fprintf(fp_save,"%d %.16g %.16g\n",i,feature_min[i],feature_max[i]); } if(min_index < 1) fprintf(stderr, "WARNING: scaling factors with indices smaller than 1 are not stored to the file %s.\n", save_filename); fclose(fp_save); } /* pass 3: scale */ while(readline(fp)!=NULL) { char *p=line; int next_index=1; double target; double value; if (sscanf(p,"%lf",&target) != 1) return clean_up(NULL, fp, "ERROR: failed to read labels\n"); output_target(target); SKIP_TARGET while(sscanf(p,"%d:%lf",&index,&value)==2) { for(i=next_index; i<index; i++) output(i,0); output(index,value); SKIP_ELEMENT next_index=index+1; } for(i=next_index; i<=max_index; i++) output(i,0); printf("\n"); } if (new_num_nonzeros > num_nonzeros) fprintf(stderr, "WARNING: original #nonzeros %ld\n" " new #nonzeros %ld\n" "Use -l 0 if many original feature values are zeros\n", num_nonzeros, new_num_nonzeros); free(line); free(feature_max); free(feature_min); fclose(fp); return 0; }
int main(int argc,char **argv) { int i,index; FILE *fp; char *save_filename = NULL; char *restore_filename = NULL; for(i=1;i<argc;i++) { if(argv[i][0] != '-') break; ++i; switch(argv[i-1][1]) { case 'l': lower = atof(argv[i]); break; case 'u': upper = atof(argv[i]); break; case 'y': y_lower = atof(argv[i]); ++i; y_upper = atof(argv[i]); y_scaling = 1; break; case 's': save_filename = argv[i]; break; case 'r': restore_filename = argv[i]; break; default: fprintf(stderr,"unknown option\n"); exit_with_help(); } } if(!(upper > lower) || (y_scaling && !(y_upper > y_lower))) { fprintf(stderr,"inconsistent lower/upper specification\n"); exit(1); } if(argc != i+1) exit_with_help(); fp=fopen(argv[i],"r"); if(fp==NULL) { fprintf(stderr,"can't open file %s\n", argv[i]); exit(1); } line = (char *) malloc(max_line_len*sizeof(char)); #define SKIP_TARGET\ while(isspace(*p)) ++p;\ while(!isspace(*p)) ++p; #define SKIP_ELEMENT\ while(*p!=':') ++p;\ ++p;\ while(isspace(*p)) ++p;\ while(*p && !isspace(*p)) ++p; /* assumption: min index of attributes is 1 */ /* pass 1: find out max index of attributes */ max_index = 0; while(readline(fp)!=NULL) { char *p=line; SKIP_TARGET while(sscanf(p,"%d:%*f",&index)==1) { max_index = max(max_index, index); SKIP_ELEMENT } } feature_max = (double *)malloc((max_index+1)* sizeof(double)); feature_min = (double *)malloc((max_index+1)* sizeof(double)); if(feature_max == NULL || feature_min == NULL) { fprintf(stderr,"can't allocate enough memory\n"); exit(1); } for(i=0;i<=max_index;i++) { feature_max[i]=-DBL_MAX; feature_min[i]=DBL_MAX; } rewind(fp); /* pass 2: find out min/max value */ while(readline(fp)!=NULL) { char *p=line; int next_index=1; double target; double value; sscanf(p,"%lf",&target); y_max = max(y_max,target); y_min = min(y_min,target); SKIP_TARGET while(sscanf(p,"%d:%lf",&index,&value)==2) { for(i=next_index;i<index;i++) { feature_max[i]=max(feature_max[i],0); feature_min[i]=min(feature_min[i],0); } feature_max[index]=max(feature_max[index],value); feature_min[index]=min(feature_min[index],value); SKIP_ELEMENT next_index=index+1; } for(i=next_index;i<=max_index;i++) { feature_max[i]=max(feature_max[i],0); feature_min[i]=min(feature_min[i],0); } } rewind(fp); /* pass 2.5: save/restore feature_min/feature_max */ if(restore_filename) { FILE *fp_restore = fopen(restore_filename,"r"); int idx, c; double fmin, fmax; if(fp_restore==NULL) { fprintf(stderr,"can't open file %s\n", restore_filename); exit(1); } if((c = fgetc(fp_restore)) == 'y') { fscanf(fp_restore, "%lf %lf\n", &y_lower, &y_upper); fscanf(fp_restore, "%lf %lf\n", &y_min, &y_max); y_scaling = 1; } else ungetc(c, fp_restore); if (fgetc(fp_restore) == 'x') { fscanf(fp_restore, "%lf %lf\n", &lower, &upper); while(fscanf(fp_restore,"%d %lf %lf\n",&idx,&fmin,&fmax)==3) { if(idx<=max_index) { feature_min[idx] = fmin; feature_max[idx] = fmax; } } } fclose(fp_restore); } if(save_filename) { FILE *fp_save = fopen(save_filename,"w"); if(fp_save==NULL) { fprintf(stderr,"can't open file %s\n", save_filename); exit(1); } if(y_scaling) { fprintf(fp_save, "y\n"); fprintf(fp_save, "%.16g %.16g\n", y_lower, y_upper); fprintf(fp_save, "%.16g %.16g\n", y_min, y_max); } fprintf(fp_save, "x\n"); fprintf(fp_save, "%.16g %.16g\n", lower, upper); for(i=1;i<=max_index;i++) { if(feature_min[i]!=feature_max[i]) fprintf(fp_save,"%d %.16g %.16g\n",i,feature_min[i],feature_max[i]); } fclose(fp_save); } /* pass 3: scale */ while(readline(fp)!=NULL) { char *p=line; int next_index=1; int index; double target; double value; sscanf(p,"%lf",&target); output_target(target); SKIP_TARGET while(sscanf(p,"%d:%lf",&index,&value)==2) { for(i=next_index;i<index;i++) output(i,0); output(index,value); SKIP_ELEMENT next_index=index+1; } for(i=next_index;i<=max_index;i++) output(i,0); printf("\n"); } free(line); free(feature_max); free(feature_min); fclose(fp); return 0; }
int main(int argc,char **argv) { int i,index; FILE *fp; for(i=1;i<argc;i++) { if(argv[i][0] != '-') break; ++i; switch(argv[i-1][1]) { case 'l': lower = atof(argv[i]); break; case 'u': upper = atof(argv[i]); break; case 'y': y_lower = atof(argv[i]); ++i; y_upper = atof(argv[i]); y_scaling = 1; break; default: fprintf(stderr,"unknown option\n"); exit(1); } } if(!(upper > lower) || (y_scaling && !(y_upper > y_lower))) { fprintf(stderr,"inconsistent lower/upper specification\n"); exit(1); } if(argc != i+1) { fprintf(stderr,"usage: %s [-l lower] [-u upper] [-y y_lower y_upper] filename\n",argv[0]); fprintf(stderr,"(default: lower = -1, upper = 1, no y scaling)\n"); exit(1); } fp=fopen(argv[i],"r"); if(fp==NULL) { fprintf(stderr,"can't open file\n"); exit(1); } #define SKIP_TARGET\ while(isspace(*p)) ++p;\ while(!isspace(*p)) ++p; #define SKIP_ELEMENT\ while(*p!=':') ++p;\ ++p;\ while(isspace(*p)) ++p;\ while(*p && !isspace(*p)) ++p; /* assumption: min index of attributes is 1 */ /* pass 1: find out max index of attributes */ max_index = 0; while(fgets(buf,MAX_LINE_LEN,fp)!=NULL) { char *p=buf; SKIP_TARGET while(sscanf(p,"%d:%*f",&index)==1) { max_index = max(max_index, index); SKIP_ELEMENT } } feature_max = (double *)malloc((max_index+1)* sizeof(double)); feature_min = (double *)malloc((max_index+1)* sizeof(double)); if(feature_max == NULL || feature_min == NULL) { fprintf(stderr,"can't allocate enough memory\n"); exit(1); } for(i=0;i<=max_index;i++) { feature_max[i]=-DBL_MAX; feature_min[i]=DBL_MAX; } rewind(fp); /* pass 2: find out min/max value */ while(fgets(buf,MAX_LINE_LEN,fp)!=NULL) { char *p=buf; int next_index=1; double target; double value; sscanf(p,"%lf",&target); y_max = max(y_max,target); y_min = min(y_min,target); SKIP_TARGET while(sscanf(p,"%d:%lf",&index,&value)==2) { for(i=next_index;i<index;i++) { feature_max[i]=max(feature_max[i],0); feature_min[i]=min(feature_min[i],0); } feature_max[index]=max(feature_max[index],value); feature_min[index]=min(feature_min[index],value); SKIP_ELEMENT next_index=index+1; } for(i=next_index;i<=max_index;i++) { feature_max[i]=max(feature_max[i],0); feature_min[i]=min(feature_min[i],0); } } rewind(fp); /* pass 3: scale */ while(fgets(buf,MAX_LINE_LEN,fp)!=NULL) { char *p=buf; int next_index=1; int index; double target; double value; sscanf(p,"%lf",&target); output_target(target); SKIP_TARGET while(sscanf(p,"%d:%lf",&index,&value)==2) { for(i=next_index;i<index;i++) output(i,0); output(index,value); SKIP_ELEMENT next_index=index+1; } for(i=next_index;i<=max_index;i++) output(i,0); printf("\n"); } fclose(fp); return 0; }