int main(int argc,char *argv[]){ char *basename; codebook **b=_ogg_calloc(1,sizeof(codebook *)); int *addmul=_ogg_calloc(1,sizeof(int)); int books=0; int input=0; int interleave=0; int j; int start=0; int num=-1; argv++; if(*argv==NULL){ process_usage(); exit(1); } /* yes, this is evil. However, it's very convenient to parse file extentions */ while(*argv){ if(*argv[0]=='-'){ /* option */ if(argv[0][1]=='s'){ /* subvector */ if(sscanf(argv[1],"%d,%d",&start,&num)!=2){ num= -1; if(sscanf(argv[1],"%d",&start)!=1){ fprintf(stderr,"Syntax error using -s\n"); exit(1); } } argv+=2; } if(argv[0][1]=='i'){ /* interleave */ interleave=1; argv+=1; } }else{ /* input file. What kind? */ char *dot; char *ext=NULL; char *name=strdup(*argv++); dot=strrchr(name,'.'); if(dot) ext=dot+1; else ext=""; /* codebook */ if(!strcmp(ext,"vqh")){ int multp=0; if(input){ fprintf(stderr,"specify all input data (.vqd) files following\n" "codebook header (.vqh) files\n"); exit(1); } /* is it additive or multiplicative? */ if(name[0]=='*'){ multp=1; name++; } if(name[0]=='+')name++; basename=strrchr(name,'/'); if(basename) basename=strdup(basename)+1; else basename=strdup(name); dot=strrchr(basename,'.'); if(dot)*dot='\0'; b=_ogg_realloc(b,sizeof(codebook *)*(books+2)); b[books]=codebook_load(name); addmul=_ogg_realloc(addmul,sizeof(int)*(books+1)); addmul[books++]=multp; b[books]=NULL; } /* data file */ if(!strcmp(ext,"vqd")){ int cols; long lines=0; char *line; float *vec; FILE *in=fopen(name,"r"); if(!in){ fprintf(stderr,"Could not open input file %s\n",name); exit(1); } if(!input){ process_preprocess(b,basename); input++; } reset_next_value(); line=setup_line(in); /* count cols before we start reading */ { char *temp=line; while(*temp==' ')temp++; for(cols=0;*temp;cols++){ while(*temp>32)temp++; while(*temp==' ')temp++; } } vec=alloca(cols*sizeof(float)); while(line){ lines++; for(j=0;j<cols;j++) if(get_line_value(in,vec+j)){ fprintf(stderr,"Too few columns on line %ld in data file\n",lines); exit(1); } /* ignores -s for now */ process_vector(b,addmul,interleave,vec,cols); line=setup_line(in); } fclose(in); } } } /* take any data from stdin */ { struct stat st; if(fstat(STDIN_FILENO,&st)==-1){ fprintf(stderr,"Could not stat STDIN\n"); exit(1); } if((S_IFIFO|S_IFREG|S_IFSOCK)&st.st_mode){ int cols; char *line; long lines=0; float *vec; if(!input){ process_preprocess(b,basename); input++; } line=setup_line(stdin); /* count cols before we start reading */ { char *temp=line; while(*temp==' ')temp++; for(cols=0;*temp;cols++){ while(*temp>32)temp++; while(*temp==' ')temp++; } } vec=alloca(cols*sizeof(float)); while(line){ lines++; for(j=0;j<cols;j++) if(get_line_value(stdin,vec+j)){ fprintf(stderr,"Too few columns on line %ld in data file\n",lines); exit(1); } /* ignores -s for now */ process_vector(b,addmul,interleave,vec,cols); line=setup_line(stdin); } } } process_postprocess(b,basename); return 0; }
int main(int argc,char *argv[]){ FILE *in; long lines=0; float min; float max; long bins=-1; int flag=0; long *countarray; long total=0; char *line; if(argv[1]==NULL){ fprintf(stderr,"Usage: distribution {data.vqd [bins]| book.vqh} \n\n"); exit(1); } if(argv[2]!=NULL) bins=atoi(argv[2])-1; in=fopen(argv[1],"r"); if(!in){ fprintf(stderr,"Could not open input file %s\n",argv[1]); exit(1); } if(strrchr(argv[1],'.') && strcmp(strrchr(argv[1],'.'),".vqh")==0){ /* load/decode a book */ codebook *b=codebook_load(argv[1]); static_codebook *c=(static_codebook *)(b->c); float delta; int i; fclose(in); switch(c->maptype){ case 0: printf("entropy codebook only; no mappings\n"); exit(0); break; case 1: bins=_book_maptype1_quantvals(c); break; case 2: bins=c->entries*c->dim; break; } max=min=_float32_unpack(c->q_min); delta=_float32_unpack(c->q_delta); for(i=0;i<bins;i++){ float val=c->quantlist[i]*delta+min; if(val>max)max=val; } printf("Minimum scalar value: %f\n",min); printf("Maximum scalar value: %f\n",max); switch(c->maptype){ case 1: { /* lattice codebook. dump it. */ int j,k; long maxcount=0; long **sort=calloc(bins,sizeof(long *)); long base=c->lengthlist[0]; countarray=calloc(bins,sizeof(long)); for(i=0;i<bins;i++)sort[i]=c->quantlist+i; qsort(sort,bins,sizeof(long *),ascend); for(i=0;i<b->entries;i++) if(c->lengthlist[i]>base)base=c->lengthlist[i]; /* dump a full, correlated count */ for(j=0;j<b->entries;j++){ if(c->lengthlist[j]){ int indexdiv=1; printf("%4d: ",j); for(k=0;k<b->dim;k++){ int index= (j/indexdiv)%bins; printf("%+3.1f,", c->quantlist[index]*_float32_unpack(c->q_delta)+ _float32_unpack(c->q_min)); indexdiv*=bins; } printf("\t|"); for(k=0;k<base-c->lengthlist[j];k++)printf("*"); printf("\n"); } } /* do a rough count */ for(j=0;j<b->entries;j++){ int indexdiv=1; for(k=0;k<b->dim;k++){ if(c->lengthlist[j]){ int index= (j/indexdiv)%bins; countarray[index]+=(1<<(base-c->lengthlist[j])); indexdiv*=bins; } } } /* dump the count */ { long maxcount=0,i,j; for(i=0;i<bins;i++) if(countarray[i]>maxcount)maxcount=countarray[i]; for(i=0;i<bins;i++){ int ptr=sort[i]-c->quantlist; int stars=rint(50./maxcount*countarray[ptr]); printf("%+08f (%8ld) |",c->quantlist[ptr]*delta+min,countarray[ptr]); for(j=0;j<stars;j++)printf("*"); printf("\n"); } } } break; case 2: { /* trained, full mapping codebook. */ printf("Can't do probability dump of a trained [type 2] codebook (yet)\n"); } break; } }else{ /* load/count a data file */ /* do it the simple way; two pass. */ line=setup_line(in); while(line){ float code; char buf[80]; lines++; sprintf(buf,"getting min/max (%.2f::%.2f). lines...",min,max); if(!(lines&0xff))spinnit(buf,lines); while(!flag && sscanf(line,"%f",&code)==1){ line=strchr(line,','); min=max=code; flag=1; } while(line && sscanf(line,"%f",&code)==1){ line=strchr(line,','); if(line)line++; if(code<min)min=code; if(code>max)max=code; } line=setup_line(in); } if(bins<1){ if((int)(max-min)==min-max){ bins=max-min; }else{ bins=25; } } printf("\r \r"); printf("Minimum scalar value: %f\n",min); printf("Maximum scalar value: %f\n",max); if(argv[2]){ printf("\n counting hits into %ld bins...\n",bins+1); countarray=calloc(bins+1,sizeof(long)); rewind(in); line=setup_line(in); while(line){ float code; lines--; if(!(lines&0xff))spinnit("counting distribution. lines so far...",lines); while(line && sscanf(line,"%f",&code)==1){ line=strchr(line,','); if(line)line++; code-=min; code/=(max-min); code*=bins; countarray[(int)rint(code)]++; total++; } line=setup_line(in); } /* make a pretty graph */ { long maxcount=0,i,j; for(i=0;i<bins+1;i++) if(countarray[i]>maxcount)maxcount=countarray[i]; printf("\r \r"); printf("Total scalars: %ld\n",total); for(i=0;i<bins+1;i++){ int stars=rint(50./maxcount*countarray[i]); printf("%08f (%8ld) |",(max-min)/bins*i+min,countarray[i]); for(j=0;j<stars;j++)printf("*"); printf("\n"); } } } fclose(in); } printf("\nDone.\n"); exit(0); }
int main(int argc,char *argv[]) { codebook *b; static_codebook *c; int entries=-1,dim=-1; float min,del; char *name; long i,j; float *suggestions; int suggcount=0; if(argv[1]==NULL) { fprintf(stderr,"Need a lattice book on the command line.\n"); exit(1); } { char *ptr; char *filename=strdup(argv[1]); b=codebook_load(filename); c=(static_codebook *)(b->c); ptr=strrchr(filename,'.'); if(ptr) { *ptr='\0'; name=strdup(filename); } else { name=strdup(filename); } } if(c->maptype!=1) { fprintf(stderr,"Provided book is not a latticebook.\n"); exit(1); } entries=b->entries; dim=b->dim; min=_float32_unpack(c->q_min); del=_float32_unpack(c->q_delta); /* Do we want to gen a threshold hint? */ if(c->q_sequencep==0) { /* yes. Discard any preexisting threshhold hint */ long quantvals=_book_maptype1_quantvals(c); long **quantsort=alloca(quantvals*sizeof(long *)); encode_aux_threshmatch *t=_ogg_calloc(1,sizeof(encode_aux_threshmatch)); c->thresh_tree=t; fprintf(stderr,"Adding threshold hint to %s...\n",name); /* partial/complete suggestions */ if(argv[2]) { char *ptr=strdup(argv[2]); suggestions=alloca(sizeof(float)*quantvals); for(suggcount=0; ptr && suggcount<quantvals; suggcount++) { char *ptr2=strchr(ptr,','); if(ptr2)*ptr2++='\0'; suggestions[suggcount]=atof(ptr); ptr=ptr2; } } /* simplest possible threshold hint only */ t->quantthresh=_ogg_calloc(quantvals-1,sizeof(float)); t->quantmap=_ogg_calloc(quantvals,sizeof(int)); t->threshvals=quantvals; t->quantvals=quantvals; /* the quantvals may not be in order; sort em first */ for(i=0; i<quantvals; i++)quantsort[i]=c->quantlist+i; qsort(quantsort,quantvals,sizeof(long *),longsort); /* ok, gen the map and thresholds */ for(i=0; i<quantvals; i++)t->quantmap[i]=quantsort[i]-c->quantlist; for(i=0; i<quantvals-1; i++) { float v1=*(quantsort[i])*del+min; float v2=*(quantsort[i+1])*del+min; for(j=0; j<suggcount; j++) if(v1<suggestions[j] && suggestions[j]<v2) { t->quantthresh[i]=suggestions[j]; break; } if(j==suggcount) { t->quantthresh[i]=(v1+v2)*.5; } } } /* Do we want to gen a pigeonhole hint? */ #if 0 for(i=0; i<entries; i++)if(c->lengthlist[i]==0)break; if(c->q_sequencep || i<entries) { long **tempstack; long *tempcount; long *temptrack; float *tempmin; float *tempmax; long totalstack=0; long pigeons; long subpigeons; long quantvals=_book_maptype1_quantvals(c); int changep=1,factor; encode_aux_pigeonhole *p=_ogg_calloc(1,sizeof(encode_aux_pigeonhole)); c->pigeon_tree=p; fprintf(stderr,"Adding pigeonhole hint to %s...\n",name); /* the idea is that we quantize uniformly, even in a nonuniform lattice, so that quantization of one scalar has a predictable result on the next sequential scalar in a greedy matching algorithm. We generate a lookup based on the quantization of the vector (pigeonmap groups quantized entries together) and list the entries that could possible be the best fit for any given member of that pigeonhole. The encode process then has a much smaller list to brute force */ /* find our pigeonhole-specific quantization values, fill in the quant value->pigeonhole map */ factor=3; p->del=del; p->min=min; p->quantvals=quantvals; { int max=0; for(i=0; i<quantvals; i++)if(max<c->quantlist[i])max=c->quantlist[i]; p->mapentries=max; } p->pigeonmap=_ogg_malloc(p->mapentries*sizeof(long)); p->quantvals=(quantvals+factor-1)/factor; /* pigeonhole roughly on the boundaries of the quantvals; the exact pigeonhole grouping is an optimization issue, not a correctness issue */ for(i=0; i<p->mapentries; i++) { float thisval=del*i+min; /* middle of the quant zone */ int quant=0; float err=fabs(c->quantlist[0]*del+min-thisval); for(j=1; j<quantvals; j++) { float thiserr=fabs(c->quantlist[j]*del+min-thisval); if(thiserr<err) { quant=j/factor; err=thiserr; } } p->pigeonmap[i]=quant; } /* pigeonmap complete. Now do the grungy business of finding the entries that could possibly be the best fit for a value appearing in the pigeonhole. The trick that allows the below to work is the uniform quantization; even though the scalars may be 'sequential' (each a delta from the last), the uniform quantization means that the error variance is *not* dependant. Given a pigeonhole and an entry, we can find the minimum and maximum possible errors (relative to the entry) for any point that could appear in the pigeonhole */ /* must iterate over both pigeonholes and entries */ /* temporarily (in order to avoid thinking hard), we grow each pigeonhole separately, the build a stack of 'em later */ pigeons=1; subpigeons=1; for(i=0; i<dim; i++)subpigeons*=p->mapentries; for(i=0; i<dim; i++)pigeons*=p->quantvals; temptrack=_ogg_calloc(dim,sizeof(long)); tempmin=_ogg_calloc(dim,sizeof(float)); tempmax=_ogg_calloc(dim,sizeof(float)); tempstack=_ogg_calloc(pigeons,sizeof(long *)); tempcount=_ogg_calloc(pigeons,sizeof(long)); while(1) { float errorpost=-1; char buffer[80]; /* map our current pigeonhole to a 'big pigeonhole' so we know what list we're after */ int entry=0; for(i=dim-1; i>=0; i--)entry=entry*p->quantvals+p->pigeonmap[temptrack[i]]; setvals(dim,p,temptrack,tempmin,tempmax,c->q_sequencep); sprintf(buffer,"Building pigeonhole search list [%ld]...",totalstack); /* Search all entries to find the one with the minimum possible maximum error. Record that error */ for(i=0; i<entries; i++) { if(c->lengthlist[i]>0) { float this=maxerror(dim,b->valuelist+i*dim,p, temptrack,tempmin,tempmax); if(errorpost==-1 || this<errorpost)errorpost=this; spinnit(buffer,subpigeons); } } /* Our search list will contain all entries with a minimum possible error <= our errorpost */ for(i=0; i<entries; i++) if(c->lengthlist[i]>0) { spinnit(buffer,subpigeons); if(minerror(dim,b->valuelist+i*dim,p, temptrack,tempmin,tempmax)<errorpost) totalstack+=addtosearch(entry,tempstack,tempcount,i); } for(i=0; i<dim; i++) { temptrack[i]++; if(temptrack[i]<p->mapentries)break; temptrack[i]=0; } if(i==dim)break; subpigeons--; }
int main(int argc,char *argv[]){ codebook *b; static_codebook *c; long *lengths; long *hits; int entries=-1,dim=-1,guard=1; FILE *in=NULL; char *line,*name; long j; if(argv[1]==NULL){ fprintf(stderr,"Need a lattice codebook on the command line.\n"); exit(1); } if(argv[2]==NULL){ fprintf(stderr,"Need a codeword data file on the command line.\n"); exit(1); } if(argv[3]!=NULL)guard=0; { char *ptr; char *filename=strdup(argv[1]); b=codebook_load(filename); c=(static_codebook *)(b->c); ptr=strrchr(filename,'.'); if(ptr){ *ptr='\0'; name=strdup(filename); }else{ name=strdup(filename); } } if(c->maptype!=1){ fprintf(stderr,"Provided book is not a latticebook.\n"); exit(1); } entries=b->entries; dim=b->dim; hits=_ogg_malloc(entries*sizeof(long)); lengths=_ogg_calloc(entries,sizeof(long)); for(j=0;j<entries;j++)hits[j]=guard; in=fopen(argv[2],"r"); if(!in){ fprintf(stderr,"Could not open input file %s\n",argv[2]); exit(1); } if(!strrcmp_i(argv[0],"latticetune")){ long lines=0; line=setup_line(in); while(line){ long code; lines++; if(!(lines&0xfff))spinnit("codewords so far...",lines); if(sscanf(line,"%ld",&code)==1) hits[code]++; line=setup_line(in); } } /* now we simply count already collated by-entry data */ if(!strrcmp_i(argv[0],"res0tune") || !strrcmp_i(argv[0],"res1tune")){ line=setup_line(in); while(line){ /* code:hits\n */ /* likely to have multiple listing for each code entry; must accumulate */ char *pos=strchr(line,':'); if(pos){ long code=atol(line); long val=atol(pos+1); hits[code]+=val; } line=setup_line(in); } } fclose(in); /* build the codeword lengths */ build_tree_from_lengths0(entries,hits,lengths); c->lengthlist=lengths; write_codebook(stdout,name,c); { long bins=_book_maptype1_quantvals(c); long i,k,base=c->lengthlist[0]; for(i=0;i<entries;i++) if(c->lengthlist[i]>base)base=c->lengthlist[i]; for(j=0;j<entries;j++){ if(c->lengthlist[j]){ int indexdiv=1; fprintf(stderr,"%4ld: ",j); for(k=0;k<c->dim;k++){ int index= (j/indexdiv)%bins; fprintf(stderr,"%+3.1f,", c->quantlist[index]*_float32_unpack(c->q_delta)+ _float32_unpack(c->q_min)); indexdiv*=bins; } fprintf(stderr,"\t|"); for(k=0;k<base-c->lengthlist[j];k++)fprintf(stderr,"*"); fprintf(stderr,"\n"); } } } fprintf(stderr,"\r " "\nDone.\n"); exit(0); }
int main(int argc,char *argv[]){ char *basename; codebook *b=NULL; int entries=0; int dim=0; long i,j,target=-1,protect=-1; FILE *out=NULL; int argnum=0; argv++; if(*argv==NULL){ usage(); exit(1); } while(*argv){ if(*argv[0]=='-'){ argv++; }else{ switch (argnum++){ case 0:case 1: { /* yes, this is evil. However, it's very convenient to parse file extentions */ /* input file. What kind? */ char *dot; char *ext=NULL; char *name=strdup(*argv++); dot=strrchr(name,'.'); if(dot) ext=dot+1; else{ ext=""; } /* codebook */ if(!strcmp(ext,"vqh")){ basename=strrchr(name,'/'); if(basename) basename=strdup(basename)+1; else basename=strdup(name); dot=strrchr(basename,'.'); if(dot)*dot='\0'; b=codebook_load(name); dim=b->dim; entries=b->entries; } /* data file; we do actually need to suck it into memory */ /* we're dealing with just one book, so we can de-interleave */ if(!strcmp(ext,"vqd") && !points){ int cols; long lines=0; char *line; float *vec; FILE *in=fopen(name,"r"); if(!in){ fprintf(stderr,"Could not open input file %s\n",name); exit(1); } reset_next_value(); line=setup_line(in); /* count cols before we start reading */ { char *temp=line; while(*temp==' ')temp++; for(cols=0;*temp;cols++){ while(*temp>32)temp++; while(*temp==' ')temp++; } } vec=alloca(cols*sizeof(float)); /* count, then load, to avoid fragmenting the hell out of memory */ while(line){ lines++; for(j=0;j<cols;j++) if(get_line_value(in,vec+j)){ fprintf(stderr,"Too few columns on line %ld in data file\n",lines); exit(1); } if((lines&0xff)==0)spinnit("counting samples...",lines*cols); line=setup_line(in); } pointlist=_ogg_malloc((cols*lines+entries*dim)*sizeof(float)); rewind(in); line=setup_line(in); while(line){ lines--; for(j=0;j<cols;j++) if(get_line_value(in,vec+j)){ fprintf(stderr,"Too few columns on line %ld in data file\n",lines); exit(1); } /* deinterleave, add to heap */ add_vector(b,vec,cols); if((lines&0xff)==0)spinnit("loading samples...",lines*cols); line=setup_line(in); } fclose(in); } } break; case 2: target=atol(*argv++); if(target==0)target=entries; break; case 3: protect=atol(*argv++); break; case 4: { char *buff=alloca(strlen(*argv)+5); sprintf(buff,"%s.vqh",*argv); basename=*argv++; out=fopen(buff,"w"); if(!out){ fprintf(stderr,"unable ot open %s for output",buff); exit(1); } } break; default: usage(); } } } if(!entries || !points || !out)usage(); if(target==-1)usage(); /* add guard points */ for(i=0;i<entries;i++) for(j=0;j<dim;j++) pointlist[points++]=b->valuelist[i*dim+j]; points/=dim; /* set up auxiliary vectors for error tracking */ { encode_aux_nearestmatch *nt=NULL; long pointssofar=0; long *pointindex; long indexedpoints=0; long *entryindex; long *reventry; long *membership=_ogg_malloc(points*sizeof(long)); long *firsthead=_ogg_malloc(entries*sizeof(long)); long *secondary=_ogg_malloc(points*sizeof(long)); long *secondhead=_ogg_malloc(entries*sizeof(long)); long *cellcount=_ogg_calloc(entries,sizeof(long)); long *cellcount2=_ogg_calloc(entries,sizeof(long)); float *cellerror=_ogg_calloc(entries,sizeof(float)); float *cellerrormax=_ogg_calloc(entries,sizeof(float)); long cellsleft=entries; for(i=0;i<points;i++)membership[i]=-1; for(i=0;i<entries;i++)firsthead[i]=-1; for(i=0;i<points;i++)secondary[i]=-1; for(i=0;i<entries;i++)secondhead[i]=-1; for(i=0;i<points;i++){ /* assign vectors to the nearest cell. Also keep track of second nearest for error statistics */ float *ppt=pointlist+i*dim; int firstentry=closest(b,ppt,-1); int secondentry=closest(b,ppt,firstentry); float firstmetric=_dist(dim,b->valuelist+dim*firstentry,ppt); float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt); if(!(i&0xff))spinnit("initializing... ",points-i); membership[i]=firsthead[firstentry]; firsthead[firstentry]=i; secondary[i]=secondhead[secondentry]; secondhead[secondentry]=i; if(i<points-entries){ cellerror[firstentry]+=secondmetric-firstmetric; cellerrormax[firstentry]=max(cellerrormax[firstentry], _heuristic(b,ppt,secondentry)); cellcount[firstentry]++; cellcount2[secondentry]++; } } /* which cells are most heavily populated? Protect as many from dispersal as the user has requested */ { long **countindex=_ogg_calloc(entries,sizeof(long *)); for(i=0;i<entries;i++)countindex[i]=cellcount+i; qsort(countindex,entries,sizeof(long *),longsort); for(i=0;i<protect;i++){ int ptr=countindex[i]-cellcount; cellerrormax[ptr]=9e50f; } } { fprintf(stderr,"\r"); for(i=0;i<entries;i++){ /* decompose index */ int entry=i; for(j=0;j<dim;j++){ fprintf(stderr,"%d:",entry%b->c->thresh_tree->quantvals); entry/=b->c->thresh_tree->quantvals; } fprintf(stderr,":%ld/%ld, ",cellcount[i],cellcount2[i]); } fprintf(stderr,"\n"); } /* do the automatic cull request */ while(cellsleft>target){ int bestcell=-1; float besterror=0; float besterror2=0; long head=-1; char spinbuf[80]; sprintf(spinbuf,"cells left to eliminate: %ld : ",cellsleft-target); /* find the cell with lowest removal impact */ for(i=0;i<entries;i++){ if(b->c->lengthlist[i]>0){ if(bestcell==-1 || cellerrormax[i]<=besterror2){ if(bestcell==-1 || cellerrormax[i]<besterror2 || besterror>cellerror[i]){ besterror=cellerror[i]; besterror2=cellerrormax[i]; bestcell=i; } } } } fprintf(stderr,"\reliminating cell %d \n" " dispersal error of %g max/%g total (%ld hits)\n", bestcell,besterror2,besterror,cellcount[bestcell]); /* disperse it. move each point out, adding it (properly) to the second best */ b->c->lengthlist[bestcell]=0; head=firsthead[bestcell]; firsthead[bestcell]=-1; while(head!=-1){ /* head is a point number */ float *ppt=pointlist+head*dim; int firstentry=closest(b,ppt,-1); int secondentry=closest(b,ppt,firstentry); float firstmetric=_dist(dim,b->valuelist+dim*firstentry,ppt); float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt); long next=membership[head]; if(head<points-entries){ cellcount[firstentry]++; cellcount[bestcell]--; cellerror[firstentry]+=secondmetric-firstmetric; cellerrormax[firstentry]=max(cellerrormax[firstentry], _heuristic(b,ppt,secondentry)); } membership[head]=firsthead[firstentry]; firsthead[firstentry]=head; head=next; if(cellcount[bestcell]%128==0) spinnit(spinbuf,cellcount[bestcell]+cellcount2[bestcell]); } /* now see that all points that had the dispersed cell as second choice have second choice reassigned */ head=secondhead[bestcell]; secondhead[bestcell]=-1; while(head!=-1){ float *ppt=pointlist+head*dim; /* who are we assigned to now? */ int firstentry=closest(b,ppt,-1); /* what is the new second closest match? */ int secondentry=closest(b,ppt,firstentry); /* old second closest is the cell being disbanded */ float oldsecondmetric=_dist(dim,b->valuelist+dim*bestcell,ppt); /* new second closest error */ float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt); long next=secondary[head]; if(head<points-entries){ cellcount2[secondentry]++; cellcount2[bestcell]--; cellerror[firstentry]+=secondmetric-oldsecondmetric; cellerrormax[firstentry]=max(cellerrormax[firstentry], _heuristic(b,ppt,secondentry)); } secondary[head]=secondhead[secondentry]; secondhead[secondentry]=head; head=next; if(cellcount2[bestcell]%128==0) spinnit(spinbuf,cellcount2[bestcell]); } cellsleft--; } /* paring is over. Build decision trees using points that now fall through the thresh matcher. */ /* we don't free membership; we flatten it in order to use in lp_split */ for(i=0;i<entries;i++){ long head=firsthead[i]; spinnit("rearranging membership cache... ",entries-i); while(head!=-1){ long next=membership[head]; membership[head]=i; head=next; } } free(secondhead); free(firsthead); free(cellerror); free(cellerrormax); free(secondary); pointindex=_ogg_malloc(points*sizeof(long)); /* make a point index of fall-through points */ for(i=0;i<points;i++){ int best=_best(b,pointlist+i*dim,1); if(best==-1) pointindex[indexedpoints++]=i; spinnit("finding orphaned points... ",points-i); } /* make an entry index */ entryindex=_ogg_malloc(entries*sizeof(long)); target=0; for(i=0;i<entries;i++){ if(b->c->lengthlist[i]>0) entryindex[target++]=i; } /* make working space for a reverse entry index */ reventry=_ogg_malloc(entries*sizeof(long)); /* do the split */ nt=b->c->nearest_tree= _ogg_calloc(1,sizeof(encode_aux_nearestmatch)); nt->alloc=4096; nt->ptr0=_ogg_malloc(sizeof(long)*nt->alloc); nt->ptr1=_ogg_malloc(sizeof(long)*nt->alloc); nt->p=_ogg_malloc(sizeof(long)*nt->alloc); nt->q=_ogg_malloc(sizeof(long)*nt->alloc); nt->aux=0; fprintf(stderr,"Leaves added: %d \n", lp_split(pointlist,points, b,entryindex,target, pointindex,indexedpoints, membership,reventry, 0,&pointssofar)); free(membership); free(reventry); free(pointindex); /* hack alert. I should just change the damned splitter and codebook writer */ for(i=0;i<nt->aux;i++)nt->p[i]*=dim; for(i=0;i<nt->aux;i++)nt->q[i]*=dim; /* recount hits. Build new lengthlist. reuse entryindex storage */ for(i=0;i<entries;i++)entryindex[i]=1; for(i=0;i<points-entries;i++){ int best=_best(b,pointlist+i*dim,1); float *a=pointlist+i*dim; if(!(i&0xff))spinnit("counting hits...",i); if(best==-1){ fprintf(stderr,"\nINTERNAL ERROR; a point count not be matched to a\n" "codebook entry. The new decision tree is broken.\n"); exit(1); } entryindex[best]++; } for(i=0;i<nt->aux;i++)nt->p[i]/=dim; for(i=0;i<nt->aux;i++)nt->q[i]/=dim; /* the lengthlist builder doesn't actually deal with 0 hit entries. So, we pack the 'sparse' hit list into a dense list, then unpack the lengths after the build */ { int upper=0; long *lengthlist=_ogg_calloc(entries,sizeof(long)); for(i=0;i<entries;i++){ if(b->c->lengthlist[i]>0) entryindex[upper++]=entryindex[i]; else{ if(entryindex[i]>1){ fprintf(stderr,"\nINTERNAL ERROR; _best matched to unused entry\n"); exit(1); } } } /* sanity check */ if(upper != target){ fprintf(stderr,"\nINTERNAL ERROR; packed the wrong number of entries\n"); exit(1); } build_tree_from_lengths(upper,entryindex,lengthlist); upper=0; for(i=0;i<entries;i++){ if(b->c->lengthlist[i]>0) b->c->lengthlist[i]=lengthlist[upper++]; } } } /* we're done. write it out. */ write_codebook(out,basename,b->c); fprintf(stderr,"\r \nDone.\n"); return(0); }