int get_vector(codebook *b,FILE *in,int start, int n,float *a){ int i; const static_codebook *c=b->c; while(1){ if(v_sofar==n || get_line_value(in,a)){ reset_next_value(); if(get_next_value(in,a)) break; for(i=0;i<start;i++){ sequence_base=*a; get_line_value(in,a); } } for(i=1;i<c->dim;i++) if(get_line_value(in,a+i)) break; if(i==c->dim){ float temp=a[c->dim-1]; for(i=0;i<c->dim;i++)a[i]-=sequence_base; if(c->q_sequencep)sequence_base=temp; v_sofar++; return(0); } sequence_base=0.f; } return(-1); }
static int getline(FILE *in,float *vec,int begin,int n){ int i,next=0; reset_next_value(); if(get_next_value(in,vec))return(0); if(begin){ for(i=1;i<begin;i++) get_line_value(in,vec); next=0; }else{ next=1; } for(i=next;i<n;i++) if(get_line_value(in,vec+i)){ fprintf(stderr,"ran out of columns in input data\n"); exit(1); } return(1); }
static int getval(FILE *in,int begin,int n,int group,int max){ float v; int i; long val=0; if(nsofar>=n || get_line_value(in,&v)){ reset_next_value(); nsofar=0; if(get_next_value(in,&v)) return(-1); for(i=1;i<=begin;i++) get_line_value(in,&v); } val=(int)v; nsofar++; for(i=1;i<group;i++,nsofar++) if(nsofar>=n || get_line_value(in,&v)) return(getval(in,begin,n,group,max)); else val = val*max+(int)v; return(val); }
int main(int argc,char *argv[]){ char *basename; codebook **b=_ogg_calloc(1,sizeof(codebook *)); int *addmul=_ogg_calloc(1,sizeof(int)); int books=0; int input=0; int interleave=0; int j; int start=0; int num=-1; argv++; if(*argv==NULL){ process_usage(); exit(1); } /* yes, this is evil. However, it's very convenient to parse file extentions */ while(*argv){ if(*argv[0]=='-'){ /* option */ if(argv[0][1]=='s'){ /* subvector */ if(sscanf(argv[1],"%d,%d",&start,&num)!=2){ num= -1; if(sscanf(argv[1],"%d",&start)!=1){ fprintf(stderr,"Syntax error using -s\n"); exit(1); } } argv+=2; } if(argv[0][1]=='i'){ /* interleave */ interleave=1; argv+=1; } }else{ /* input file. What kind? */ char *dot; char *ext=NULL; char *name=strdup(*argv++); dot=strrchr(name,'.'); if(dot) ext=dot+1; else ext=""; /* codebook */ if(!strcmp(ext,"vqh")){ int multp=0; if(input){ fprintf(stderr,"specify all input data (.vqd) files following\n" "codebook header (.vqh) files\n"); exit(1); } /* is it additive or multiplicative? */ if(name[0]=='*'){ multp=1; name++; } if(name[0]=='+')name++; basename=strrchr(name,'/'); if(basename) basename=strdup(basename)+1; else basename=strdup(name); dot=strrchr(basename,'.'); if(dot)*dot='\0'; b=_ogg_realloc(b,sizeof(codebook *)*(books+2)); b[books]=codebook_load(name); addmul=_ogg_realloc(addmul,sizeof(int)*(books+1)); addmul[books++]=multp; b[books]=NULL; } /* data file */ if(!strcmp(ext,"vqd")){ int cols; long lines=0; char *line; float *vec; FILE *in=fopen(name,"r"); if(!in){ fprintf(stderr,"Could not open input file %s\n",name); exit(1); } if(!input){ process_preprocess(b,basename); input++; } reset_next_value(); line=setup_line(in); /* count cols before we start reading */ { char *temp=line; while(*temp==' ')temp++; for(cols=0;*temp;cols++){ while(*temp>32)temp++; while(*temp==' ')temp++; } } vec=alloca(cols*sizeof(float)); while(line){ lines++; for(j=0;j<cols;j++) if(get_line_value(in,vec+j)){ fprintf(stderr,"Too few columns on line %ld in data file\n",lines); exit(1); } /* ignores -s for now */ process_vector(b,addmul,interleave,vec,cols); line=setup_line(in); } fclose(in); } } } /* take any data from stdin */ { struct stat st; if(fstat(STDIN_FILENO,&st)==-1){ fprintf(stderr,"Could not stat STDIN\n"); exit(1); } if((S_IFIFO|S_IFREG|S_IFSOCK)&st.st_mode){ int cols; char *line; long lines=0; float *vec; if(!input){ process_preprocess(b,basename); input++; } line=setup_line(stdin); /* count cols before we start reading */ { char *temp=line; while(*temp==' ')temp++; for(cols=0;*temp;cols++){ while(*temp>32)temp++; while(*temp==' ')temp++; } } vec=alloca(cols*sizeof(float)); while(line){ lines++; for(j=0;j<cols;j++) if(get_line_value(stdin,vec+j)){ fprintf(stderr,"Too few columns on line %ld in data file\n",lines); exit(1); } /* ignores -s for now */ process_vector(b,addmul,interleave,vec,cols); line=setup_line(stdin); } } } process_postprocess(b,basename); return 0; }
int main(int argc, char *argv[]){ char *base; char *infile; int i,j,k,begin,n,subn,guard=1; FILE *file; int maxval=0; int loval=0; if(argc<3)usage(); if(argc==4)guard=0; infile=strdup(argv[1]); base=strdup(infile); if(strrchr(base,'.')) strrchr(base,'.')[0]='\0'; { char *pos=strchr(argv[2],','); char *dpos=strchr(argv[2],'-'); if(dpos){ loval=atoi(argv[2]); maxval=atoi(dpos+1); subn=1; begin=0; }else{ begin=atoi(argv[2]); if(!pos) usage(); else n=atoi(pos+1); pos=strchr(pos+1,','); if(!pos) usage(); else subn=atoi(pos+1); if(n/subn*subn != n){ fprintf(stderr,"n must be divisible by group\n"); exit(1); } } } /* scan the file for maximum value */ file=fopen(infile,"r"); if(!file){ fprintf(stderr,"Could not open file %s\n",infile); if(!maxval) exit(1); else fprintf(stderr," making untrained books.\n"); } if(!maxval){ i=0; while(1){ long v; if(get_next_ivalue(file,&v))break; if(v>maxval)maxval=v; if(!(i++&0xff))spinnit("loading... ",i); } rewind(file); maxval++; } { long vals=pow(maxval,subn); long *hist=_ogg_calloc(vals,sizeof(long)); long *lengths=_ogg_calloc(vals,sizeof(long)); for(j=loval;j<vals;j++)hist[j]=guard; if(file){ reset_next_value(); i/=subn; while(!feof(file)){ long val=getval(file,begin,n,subn,maxval); if(val==-1 || val>=vals)break; hist[val]++; if(!(i--&0xff))spinnit("loading... ",i*subn); } fclose(file); } /* we have the probabilities, build the tree */ fprintf(stderr,"Building tree for %ld entries\n",vals); build_tree_from_lengths0(vals,hist,lengths); /* save the book */ { char *buffer=alloca(strlen(base)+5); strcpy(buffer,base); strcat(buffer,".vqh"); file=fopen(buffer,"w"); if(!file){ fprintf(stderr,"Could not open file %s\n",buffer); exit(1); } } /* first, the static vectors, then the book structure to tie it together. */ /* lengthlist */ fprintf(file,"static const long _huff_lengthlist_%s[] = {\n",base); for(j=0;j<vals;){ fprintf(file,"\t"); for(k=0;k<16 && j<vals;k++,j++) fprintf(file,"%2ld,",lengths[j]); fprintf(file,"\n"); } fprintf(file,"};\n\n"); /* the toplevel book */ fprintf(file,"static const static_codebook _huff_book_%s = {\n",base); fprintf(file,"\t%d, %ld,\n",subn,vals); fprintf(file,"\t(long *)_huff_lengthlist_%s,\n",base); fprintf(file,"\t0, 0, 0, 0, 0,\n"); fprintf(file,"\tNULL,\n"); fprintf(file,"\t0\n};\n\n"); fclose(file); fprintf(stderr,"Done. \n\n"); } exit(0); }
int main(int argc,char *argv[]){ codebook b; static_codebook c; double *quantlist; long *hits; int entries=-1,dim=-1,quantvals=-1,addmul=-1,sequencep=0; FILE *in=NULL; char *line,*name; long i,j; memset(&b,0,sizeof(b)); memset(&c,0,sizeof(c)); if(argv[1]==NULL){ fprintf(stderr,"Need a lattice description file on the command line.\n"); exit(1); } { char *ptr; char *filename=_ogg_calloc(strlen(argv[1])+4,1); strcpy(filename,argv[1]); in=fopen(filename,"r"); if(!in){ fprintf(stderr,"Could not open input file %s\n",filename); exit(1); } ptr=strrchr(filename,'.'); if(ptr){ *ptr='\0'; name=strdup(filename); }else{ name=strdup(filename); } } /* read the description */ line=get_line(in); if(sscanf(line,"%d %d %d %d",&quantvals,&dim,&addmul,&sequencep)!=4){ if(sscanf(line,"%d %d %d",&quantvals,&dim,&addmul)!=3){ fprintf(stderr,"Syntax error reading description file (line 1)\n"); exit(1); } } entries=pow(quantvals,dim); c.dim=dim; c.entries=entries; c.lengthlist=_ogg_malloc(entries*sizeof(long)); c.maptype=1; c.q_sequencep=sequencep; c.quantlist=_ogg_calloc(quantvals,sizeof(long)); quantlist=_ogg_malloc(sizeof(double)*c.dim*c.entries); hits=_ogg_malloc(c.entries*sizeof(long)); for(j=0;j<entries;j++)hits[j]=1; for(j=0;j<entries;j++)c.lengthlist[j]=1; reset_next_value(); line=setup_line(in); for(j=0;j<quantvals;j++){ char *temp; if(!line || sscanf(line,"%lf",quantlist+j)!=1){ fprintf(stderr,"Ran out of data on line 2 of description file\n"); exit(1); } temp=strchr(line,','); if(!temp)temp=strchr(line,' '); if(temp)temp++; line=temp; } /* gen a real quant list from the more easily human-grokked input */ { double min=quantlist[0]; double mindel=-1; int fac=1; for(j=1;j<quantvals;j++)if(quantlist[j]<min)min=quantlist[j]; for(j=0;j<quantvals;j++) for(i=j+1;i<quantvals;i++) if(mindel==-1 || fabs(quantlist[j]-quantlist[i])<mindel) mindel=fabs(quantlist[j]-quantlist[i]); j=0; while(j<quantvals){ for(j=0;j<quantvals;j++){ double test=fac*(quantlist[j]-min)/mindel; if( fabs(rint(test)-test)>.00001f) break; } if(fac>100)break; if(j<quantvals)fac++; } mindel/=fac; fprintf(stderr,"min=%g mindel=%g\n",min,mindel); c.q_min=_float32_pack(min); c.q_delta=_float32_pack(mindel); c.q_quant=0; min=_float32_unpack(c.q_min); mindel=_float32_unpack(c.q_delta); for(j=0;j<quantvals;j++){ c.quantlist[j]=rint((quantlist[j]-min)/mindel); if(ilog(c.quantlist[j])>c.q_quant)c.q_quant=ilog(c.quantlist[j]); } } /* build the [default] codeword lengths */ memset(c.lengthlist,0,sizeof(long)*entries); for(i=0;i<entries;i++)hits[i]=1; build_tree_from_lengths(entries,hits,c.lengthlist); /* save the book in C header form */ write_codebook(stdout,name,&c); fprintf(stderr,"\r " "\nDone.\n"); exit(0); }
codebook *codebook_load(char *filename){ codebook *b=_ogg_calloc(1,sizeof(codebook)); static_codebook *c=(static_codebook *)(b->c=_ogg_calloc(1,sizeof(static_codebook))); encode_aux_nearestmatch *a=NULL; encode_aux_threshmatch *t=NULL; encode_aux_pigeonhole *p=NULL; int quant_to_read=0; FILE *in=fopen(filename,"r"); char *line; long i; if(in==NULL){ fprintf(stderr,"Couldn't open codebook %s\n",filename); exit(1); } /* find the codebook struct */ find_seek_to(in,"static static_codebook "); /* get the major important values */ line=get_line(in); if(sscanf(line,"%ld, %ld,", &(c->dim),&(c->entries))!=2){ fprintf(stderr,"1: syntax in %s in line:\t %s",filename,line); exit(1); } line=get_line(in); line=get_line(in); if(sscanf(line,"%d, %ld, %ld, %d, %d,", &(c->maptype),&(c->q_min),&(c->q_delta),&(c->q_quant), &(c->q_sequencep))!=5){ fprintf(stderr,"1: syntax in %s in line:\t %s",filename,line); exit(1); } /* find the auxiliary encode struct[s] (if any) */ if(find_seek_to(in,"static encode_aux_nearestmatch _vq_aux")){ /* how big? */ c->nearest_tree=a=_ogg_calloc(1,sizeof(encode_aux_nearestmatch)); line=get_line(in); line=get_line(in); line=get_line(in); line=get_line(in); line=get_line(in); if(sscanf(line,"%ld, %ld",&(a->aux),&(a->alloc))!=2){ fprintf(stderr,"2: syntax in %s in line:\t %s",filename,line); exit(1); } /* load ptr0 */ find_seek_to(in,"static long _vq_ptr0"); reset_next_value(); a->ptr0=_ogg_malloc(sizeof(long)*a->aux); for(i=0;i<a->aux;i++) if(get_next_ivalue(in,a->ptr0+i)){ fprintf(stderr,"out of data while reading codebook %s\n",filename); exit(1); } /* load ptr1 */ find_seek_to(in,"static long _vq_ptr1"); reset_next_value(); a->ptr1=_ogg_malloc(sizeof(long)*a->aux); for(i=0;i<a->aux;i++) if(get_next_ivalue(in,a->ptr1+i)){ fprintf(stderr,"out of data while reading codebook %s\n",filename); exit(1); } /* load p */ find_seek_to(in,"static long _vq_p_"); reset_next_value(); a->p=_ogg_malloc(sizeof(long)*a->aux); for(i=0;i<a->aux;i++) if(get_next_ivalue(in,a->p+i)){ fprintf(stderr,"out of data while reading codebook %s\n",filename); exit(1); } /* load q */ find_seek_to(in,"static long _vq_q_"); reset_next_value(); a->q=_ogg_malloc(sizeof(long)*a->aux); for(i=0;i<a->aux;i++) if(get_next_ivalue(in,a->q+i)){ fprintf(stderr,"out of data while reading codebook %s\n",filename); exit(1); } } if(find_seek_to(in,"static encode_aux_threshmatch _vq_aux")){ /* how big? */ c->thresh_tree=t=_ogg_calloc(1,sizeof(encode_aux_threshmatch)); line=get_line(in); line=get_line(in); line=get_line(in); if(sscanf(line,"%d",&(t->quantvals))!=1){ fprintf(stderr,"3: syntax in %s in line:\t %s",filename,line); exit(1); } line=get_line(in); if(sscanf(line,"%d",&(t->threshvals))!=1){ fprintf(stderr,"4: syntax in %s in line:\t %s",filename,line); exit(1); } /* load quantthresh */ find_seek_to(in,"static float _vq_quantthresh_"); reset_next_value(); t->quantthresh=_ogg_malloc(sizeof(float)*t->threshvals); for(i=0;i<t->threshvals-1;i++) if(get_next_value(in,t->quantthresh+i)){ fprintf(stderr,"out of data 1 while reading codebook %s\n",filename); exit(1); } /* load quantmap */ find_seek_to(in,"static long _vq_quantmap_"); reset_next_value(); t->quantmap=_ogg_malloc(sizeof(long)*t->threshvals); for(i=0;i<t->threshvals;i++) if(get_next_ivalue(in,t->quantmap+i)){ fprintf(stderr,"out of data 2 while reading codebook %s\n",filename); exit(1); } } if(find_seek_to(in,"static encode_aux_pigeonhole _vq_aux")){ int pigeons=1,i; /* how big? */ c->pigeon_tree=p=_ogg_calloc(1,sizeof(encode_aux_pigeonhole)); line=get_line(in); if(sscanf(line,"%f, %f, %d, %d",&(p->min),&(p->del), &(p->mapentries),&(p->quantvals))!=4){ fprintf(stderr,"5: syntax in %s in line:\t %s",filename,line); exit(1); } line=get_line(in); line=get_line(in); if(sscanf(line,"%ld",&(p->fittotal))!=1){ fprintf(stderr,"6: syntax in %s in line:\t %s",filename,line); exit(1); } /* load pigeonmap */ find_seek_to(in,"static long _vq_pigeonmap_"); reset_next_value(); p->pigeonmap=_ogg_malloc(sizeof(long)*p->mapentries); for(i=0;i<p->mapentries;i++) if(get_next_ivalue(in,p->pigeonmap+i)){ fprintf(stderr,"out of data (pigeonmap) while reading codebook %s\n",filename); exit(1); } /* load fitlist */ find_seek_to(in,"static long _vq_fitlist_"); reset_next_value(); p->fitlist=_ogg_malloc(sizeof(long)*p->fittotal); for(i=0;i<p->fittotal;i++) if(get_next_ivalue(in,p->fitlist+i)){ fprintf(stderr,"out of data (fitlist) while reading codebook %s\n",filename); exit(1); } /* load fitmap */ find_seek_to(in,"static long _vq_fitmap_"); reset_next_value(); for(i=0;i<c->dim;i++)pigeons*=p->quantvals; p->fitmap=_ogg_malloc(sizeof(long)*pigeons); for(i=0;i<pigeons;i++) if(get_next_ivalue(in,p->fitmap+i)){ fprintf(stderr,"out of data (fitmap) while reading codebook %s\n",filename); exit(1); } /* load fitlength */ find_seek_to(in,"static long _vq_fitlength_"); reset_next_value(); p->fitlength=_ogg_malloc(sizeof(long)*pigeons); for(i=0;i<pigeons;i++) if(get_next_ivalue(in,p->fitlength+i)){ fprintf(stderr,"out of data (fitlength) while reading codebook %s\n",filename); exit(1); } } switch(c->maptype){ case 0: quant_to_read=0; break; case 1: quant_to_read=_book_maptype1_quantvals(c); break; case 2: quant_to_read=c->entries*c->dim; break; } /* load the quantized entries */ find_seek_to(in,"static long _vq_quantlist_"); reset_next_value(); c->quantlist=_ogg_malloc(sizeof(long)*quant_to_read); for(i=0;i<quant_to_read;i++) if(get_next_ivalue(in,c->quantlist+i)){ fprintf(stderr,"out of data while reading codebook %s\n",filename); exit(1); } /* load the lengthlist */ find_seek_to(in,"_lengthlist"); reset_next_value(); c->lengthlist=_ogg_malloc(sizeof(long)*c->entries); for(i=0;i<c->entries;i++) if(get_next_ivalue(in,c->lengthlist+i)){ fprintf(stderr,"out of data while reading codebook %s\n",filename); exit(1); } /* got it all */ fclose(in); vorbis_book_init_encode(b,c); return(b); }
char *setup_line(FILE *in){ reset_next_value(); value_line_buff=get_line(in); return(value_line_buff); }
int main(int argc,char *argv[]){ char *basename; codebook *b=NULL; int entries=0; int dim=0; long i,j,target=-1,protect=-1; FILE *out=NULL; int argnum=0; argv++; if(*argv==NULL){ usage(); exit(1); } while(*argv){ if(*argv[0]=='-'){ argv++; }else{ switch (argnum++){ case 0:case 1: { /* yes, this is evil. However, it's very convenient to parse file extentions */ /* input file. What kind? */ char *dot; char *ext=NULL; char *name=strdup(*argv++); dot=strrchr(name,'.'); if(dot) ext=dot+1; else{ ext=""; } /* codebook */ if(!strcmp(ext,"vqh")){ basename=strrchr(name,'/'); if(basename) basename=strdup(basename)+1; else basename=strdup(name); dot=strrchr(basename,'.'); if(dot)*dot='\0'; b=codebook_load(name); dim=b->dim; entries=b->entries; } /* data file; we do actually need to suck it into memory */ /* we're dealing with just one book, so we can de-interleave */ if(!strcmp(ext,"vqd") && !points){ int cols; long lines=0; char *line; float *vec; FILE *in=fopen(name,"r"); if(!in){ fprintf(stderr,"Could not open input file %s\n",name); exit(1); } reset_next_value(); line=setup_line(in); /* count cols before we start reading */ { char *temp=line; while(*temp==' ')temp++; for(cols=0;*temp;cols++){ while(*temp>32)temp++; while(*temp==' ')temp++; } } vec=alloca(cols*sizeof(float)); /* count, then load, to avoid fragmenting the hell out of memory */ while(line){ lines++; for(j=0;j<cols;j++) if(get_line_value(in,vec+j)){ fprintf(stderr,"Too few columns on line %ld in data file\n",lines); exit(1); } if((lines&0xff)==0)spinnit("counting samples...",lines*cols); line=setup_line(in); } pointlist=_ogg_malloc((cols*lines+entries*dim)*sizeof(float)); rewind(in); line=setup_line(in); while(line){ lines--; for(j=0;j<cols;j++) if(get_line_value(in,vec+j)){ fprintf(stderr,"Too few columns on line %ld in data file\n",lines); exit(1); } /* deinterleave, add to heap */ add_vector(b,vec,cols); if((lines&0xff)==0)spinnit("loading samples...",lines*cols); line=setup_line(in); } fclose(in); } } break; case 2: target=atol(*argv++); if(target==0)target=entries; break; case 3: protect=atol(*argv++); break; case 4: { char *buff=alloca(strlen(*argv)+5); sprintf(buff,"%s.vqh",*argv); basename=*argv++; out=fopen(buff,"w"); if(!out){ fprintf(stderr,"unable ot open %s for output",buff); exit(1); } } break; default: usage(); } } } if(!entries || !points || !out)usage(); if(target==-1)usage(); /* add guard points */ for(i=0;i<entries;i++) for(j=0;j<dim;j++) pointlist[points++]=b->valuelist[i*dim+j]; points/=dim; /* set up auxiliary vectors for error tracking */ { encode_aux_nearestmatch *nt=NULL; long pointssofar=0; long *pointindex; long indexedpoints=0; long *entryindex; long *reventry; long *membership=_ogg_malloc(points*sizeof(long)); long *firsthead=_ogg_malloc(entries*sizeof(long)); long *secondary=_ogg_malloc(points*sizeof(long)); long *secondhead=_ogg_malloc(entries*sizeof(long)); long *cellcount=_ogg_calloc(entries,sizeof(long)); long *cellcount2=_ogg_calloc(entries,sizeof(long)); float *cellerror=_ogg_calloc(entries,sizeof(float)); float *cellerrormax=_ogg_calloc(entries,sizeof(float)); long cellsleft=entries; for(i=0;i<points;i++)membership[i]=-1; for(i=0;i<entries;i++)firsthead[i]=-1; for(i=0;i<points;i++)secondary[i]=-1; for(i=0;i<entries;i++)secondhead[i]=-1; for(i=0;i<points;i++){ /* assign vectors to the nearest cell. Also keep track of second nearest for error statistics */ float *ppt=pointlist+i*dim; int firstentry=closest(b,ppt,-1); int secondentry=closest(b,ppt,firstentry); float firstmetric=_dist(dim,b->valuelist+dim*firstentry,ppt); float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt); if(!(i&0xff))spinnit("initializing... ",points-i); membership[i]=firsthead[firstentry]; firsthead[firstentry]=i; secondary[i]=secondhead[secondentry]; secondhead[secondentry]=i; if(i<points-entries){ cellerror[firstentry]+=secondmetric-firstmetric; cellerrormax[firstentry]=max(cellerrormax[firstentry], _heuristic(b,ppt,secondentry)); cellcount[firstentry]++; cellcount2[secondentry]++; } } /* which cells are most heavily populated? Protect as many from dispersal as the user has requested */ { long **countindex=_ogg_calloc(entries,sizeof(long *)); for(i=0;i<entries;i++)countindex[i]=cellcount+i; qsort(countindex,entries,sizeof(long *),longsort); for(i=0;i<protect;i++){ int ptr=countindex[i]-cellcount; cellerrormax[ptr]=9e50f; } } { fprintf(stderr,"\r"); for(i=0;i<entries;i++){ /* decompose index */ int entry=i; for(j=0;j<dim;j++){ fprintf(stderr,"%d:",entry%b->c->thresh_tree->quantvals); entry/=b->c->thresh_tree->quantvals; } fprintf(stderr,":%ld/%ld, ",cellcount[i],cellcount2[i]); } fprintf(stderr,"\n"); } /* do the automatic cull request */ while(cellsleft>target){ int bestcell=-1; float besterror=0; float besterror2=0; long head=-1; char spinbuf[80]; sprintf(spinbuf,"cells left to eliminate: %ld : ",cellsleft-target); /* find the cell with lowest removal impact */ for(i=0;i<entries;i++){ if(b->c->lengthlist[i]>0){ if(bestcell==-1 || cellerrormax[i]<=besterror2){ if(bestcell==-1 || cellerrormax[i]<besterror2 || besterror>cellerror[i]){ besterror=cellerror[i]; besterror2=cellerrormax[i]; bestcell=i; } } } } fprintf(stderr,"\reliminating cell %d \n" " dispersal error of %g max/%g total (%ld hits)\n", bestcell,besterror2,besterror,cellcount[bestcell]); /* disperse it. move each point out, adding it (properly) to the second best */ b->c->lengthlist[bestcell]=0; head=firsthead[bestcell]; firsthead[bestcell]=-1; while(head!=-1){ /* head is a point number */ float *ppt=pointlist+head*dim; int firstentry=closest(b,ppt,-1); int secondentry=closest(b,ppt,firstentry); float firstmetric=_dist(dim,b->valuelist+dim*firstentry,ppt); float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt); long next=membership[head]; if(head<points-entries){ cellcount[firstentry]++; cellcount[bestcell]--; cellerror[firstentry]+=secondmetric-firstmetric; cellerrormax[firstentry]=max(cellerrormax[firstentry], _heuristic(b,ppt,secondentry)); } membership[head]=firsthead[firstentry]; firsthead[firstentry]=head; head=next; if(cellcount[bestcell]%128==0) spinnit(spinbuf,cellcount[bestcell]+cellcount2[bestcell]); } /* now see that all points that had the dispersed cell as second choice have second choice reassigned */ head=secondhead[bestcell]; secondhead[bestcell]=-1; while(head!=-1){ float *ppt=pointlist+head*dim; /* who are we assigned to now? */ int firstentry=closest(b,ppt,-1); /* what is the new second closest match? */ int secondentry=closest(b,ppt,firstentry); /* old second closest is the cell being disbanded */ float oldsecondmetric=_dist(dim,b->valuelist+dim*bestcell,ppt); /* new second closest error */ float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt); long next=secondary[head]; if(head<points-entries){ cellcount2[secondentry]++; cellcount2[bestcell]--; cellerror[firstentry]+=secondmetric-oldsecondmetric; cellerrormax[firstentry]=max(cellerrormax[firstentry], _heuristic(b,ppt,secondentry)); } secondary[head]=secondhead[secondentry]; secondhead[secondentry]=head; head=next; if(cellcount2[bestcell]%128==0) spinnit(spinbuf,cellcount2[bestcell]); } cellsleft--; } /* paring is over. Build decision trees using points that now fall through the thresh matcher. */ /* we don't free membership; we flatten it in order to use in lp_split */ for(i=0;i<entries;i++){ long head=firsthead[i]; spinnit("rearranging membership cache... ",entries-i); while(head!=-1){ long next=membership[head]; membership[head]=i; head=next; } } free(secondhead); free(firsthead); free(cellerror); free(cellerrormax); free(secondary); pointindex=_ogg_malloc(points*sizeof(long)); /* make a point index of fall-through points */ for(i=0;i<points;i++){ int best=_best(b,pointlist+i*dim,1); if(best==-1) pointindex[indexedpoints++]=i; spinnit("finding orphaned points... ",points-i); } /* make an entry index */ entryindex=_ogg_malloc(entries*sizeof(long)); target=0; for(i=0;i<entries;i++){ if(b->c->lengthlist[i]>0) entryindex[target++]=i; } /* make working space for a reverse entry index */ reventry=_ogg_malloc(entries*sizeof(long)); /* do the split */ nt=b->c->nearest_tree= _ogg_calloc(1,sizeof(encode_aux_nearestmatch)); nt->alloc=4096; nt->ptr0=_ogg_malloc(sizeof(long)*nt->alloc); nt->ptr1=_ogg_malloc(sizeof(long)*nt->alloc); nt->p=_ogg_malloc(sizeof(long)*nt->alloc); nt->q=_ogg_malloc(sizeof(long)*nt->alloc); nt->aux=0; fprintf(stderr,"Leaves added: %d \n", lp_split(pointlist,points, b,entryindex,target, pointindex,indexedpoints, membership,reventry, 0,&pointssofar)); free(membership); free(reventry); free(pointindex); /* hack alert. I should just change the damned splitter and codebook writer */ for(i=0;i<nt->aux;i++)nt->p[i]*=dim; for(i=0;i<nt->aux;i++)nt->q[i]*=dim; /* recount hits. Build new lengthlist. reuse entryindex storage */ for(i=0;i<entries;i++)entryindex[i]=1; for(i=0;i<points-entries;i++){ int best=_best(b,pointlist+i*dim,1); float *a=pointlist+i*dim; if(!(i&0xff))spinnit("counting hits...",i); if(best==-1){ fprintf(stderr,"\nINTERNAL ERROR; a point count not be matched to a\n" "codebook entry. The new decision tree is broken.\n"); exit(1); } entryindex[best]++; } for(i=0;i<nt->aux;i++)nt->p[i]/=dim; for(i=0;i<nt->aux;i++)nt->q[i]/=dim; /* the lengthlist builder doesn't actually deal with 0 hit entries. So, we pack the 'sparse' hit list into a dense list, then unpack the lengths after the build */ { int upper=0; long *lengthlist=_ogg_calloc(entries,sizeof(long)); for(i=0;i<entries;i++){ if(b->c->lengthlist[i]>0) entryindex[upper++]=entryindex[i]; else{ if(entryindex[i]>1){ fprintf(stderr,"\nINTERNAL ERROR; _best matched to unused entry\n"); exit(1); } } } /* sanity check */ if(upper != target){ fprintf(stderr,"\nINTERNAL ERROR; packed the wrong number of entries\n"); exit(1); } build_tree_from_lengths(upper,entryindex,lengthlist); upper=0; for(i=0;i<entries;i++){ if(b->c->lengthlist[i]>0) b->c->lengthlist[i]=lengthlist[upper++]; } } } /* we're done. write it out. */ write_codebook(out,basename,b->c); fprintf(stderr,"\r \nDone.\n"); return(0); }