int main(int argc,char *argv[]){ vqgen v; static_codebook c; codebook b; quant_meta q; long *quantlist=NULL; int entries=-1,dim=-1,aux=-1; FILE *out=NULL; FILE *in=NULL; char *line,*name; long i,j,k; b.c=&c; if(argv[1]==NULL){ fprintf(stderr,"Need a trained data set on the command line.\n"); exit(1); } { char *ptr; char *filename=strdup(argv[1]); in=fopen(filename,"r"); if(!in){ fprintf(stderr,"Could not open input file %s\n",filename); exit(1); } ptr=strrchr(filename,'-'); if(ptr){ *ptr='\0'; name=strdup(filename); sprintf(ptr,".vqh"); }else{ name=strdup(filename); strcat(filename,".vqh"); } out=fopen(filename,"w"); if(out==NULL){ fprintf(stderr,"Unable to open %s for writing\n",filename); exit(1); } } /* suck in the trained book */ /* read book type, but it doesn't matter */ line=rline(in,out); line=rline(in,out); if(sscanf(line,"%d %d %d",&entries,&dim,&aux)!=3){ fprintf(stderr,"Syntax error reading book file\n"); exit(1); } /* just use it to allocate mem */ vqgen_init(&v,dim,0,entries,0.f,NULL,NULL,0); /* quant */ line=rline(in,out); if(sscanf(line,"%ld %ld %d %d",&q.min,&q.delta, &q.quant,&q.sequencep)!=4){ fprintf(stderr,"Syntax error reading book file\n"); exit(1); } /* quantized entries */ /* save quant data; we don't want to requantize later as our method is currently imperfect wrt repeated application */ i=0; quantlist=_ogg_malloc(sizeof(long)*v.elements*v.entries); for(j=0;j<entries;j++){ float a; for(k=0;k<dim;k++){ line=rline(in,out); sscanf(line,"%f",&a); v.entrylist[i]=a; quantlist[i++]=rint(a); } } /* ignore bias */ for(j=0;j<entries;j++)line=rline(in,out); free(v.bias); v.bias=NULL; /* training points */ { float *b=alloca(sizeof(float)*(dim+aux)); i=0; v.entries=0; /* hack to avoid reseeding */ while(1){ for(k=0;k<dim+aux;k++){ line=rline(in,out); if(!line)break; sscanf(line,"%f",b+k); } if(feof(in))break; vqgen_addpoint(&v,b,NULL); } v.entries=entries; } fclose(in); vqgen_unquantize(&v,&q); /* build the book */ vqsp_book(&v,&b,quantlist); c.q_min=q.min; c.q_delta=q.delta; c.q_quant=q.quant; c.q_sequencep=q.sequencep; /* save the book in C header form */ write_codebook(out,name,b.c); fclose(out); exit(0); }
int main(int argc,char *argv[]){ codebook b; static_codebook c; double *quantlist; long *hits; int entries=-1,dim=-1,quantvals=-1,addmul=-1,sequencep=0; FILE *in=NULL; char *line,*name; long i,j; memset(&b,0,sizeof(b)); memset(&c,0,sizeof(c)); if(argv[1]==NULL){ fprintf(stderr,"Need a lattice description file on the command line.\n"); exit(1); } { char *ptr; char *filename=_ogg_calloc(strlen(argv[1])+4,1); strcpy(filename,argv[1]); in=fopen(filename,"r"); if(!in){ fprintf(stderr,"Could not open input file %s\n",filename); exit(1); } ptr=strrchr(filename,'.'); if(ptr){ *ptr='\0'; name=strdup(filename); }else{ name=strdup(filename); } } /* read the description */ line=get_line(in); if(sscanf(line,"%d %d %d %d",&quantvals,&dim,&addmul,&sequencep)!=4){ if(sscanf(line,"%d %d %d",&quantvals,&dim,&addmul)!=3){ fprintf(stderr,"Syntax error reading description file (line 1)\n"); exit(1); } } entries=pow(quantvals,dim); c.dim=dim; c.entries=entries; c.lengthlist=_ogg_malloc(entries*sizeof(long)); c.maptype=1; c.q_sequencep=sequencep; c.quantlist=_ogg_calloc(quantvals,sizeof(long)); quantlist=_ogg_malloc(sizeof(double)*c.dim*c.entries); hits=_ogg_malloc(c.entries*sizeof(long)); for(j=0;j<entries;j++)hits[j]=1; for(j=0;j<entries;j++)c.lengthlist[j]=1; reset_next_value(); line=setup_line(in); for(j=0;j<quantvals;j++){ char *temp; if(!line || sscanf(line,"%lf",quantlist+j)!=1){ fprintf(stderr,"Ran out of data on line 2 of description file\n"); exit(1); } temp=strchr(line,','); if(!temp)temp=strchr(line,' '); if(temp)temp++; line=temp; } /* gen a real quant list from the more easily human-grokked input */ { double min=quantlist[0]; double mindel=-1; int fac=1; for(j=1;j<quantvals;j++)if(quantlist[j]<min)min=quantlist[j]; for(j=0;j<quantvals;j++) for(i=j+1;i<quantvals;i++) if(mindel==-1 || fabs(quantlist[j]-quantlist[i])<mindel) mindel=fabs(quantlist[j]-quantlist[i]); j=0; while(j<quantvals){ for(j=0;j<quantvals;j++){ double test=fac*(quantlist[j]-min)/mindel; if( fabs(rint(test)-test)>.00001f) break; } if(fac>100)break; if(j<quantvals)fac++; } mindel/=fac; fprintf(stderr,"min=%g mindel=%g\n",min,mindel); c.q_min=_float32_pack(min); c.q_delta=_float32_pack(mindel); c.q_quant=0; min=_float32_unpack(c.q_min); mindel=_float32_unpack(c.q_delta); for(j=0;j<quantvals;j++){ c.quantlist[j]=rint((quantlist[j]-min)/mindel); if(ilog(c.quantlist[j])>c.q_quant)c.q_quant=ilog(c.quantlist[j]); } } /* build the [default] codeword lengths */ memset(c.lengthlist,0,sizeof(long)*entries); for(i=0;i<entries;i++)hits[i]=1; build_tree_from_lengths(entries,hits,c.lengthlist); /* save the book in C header form */ write_codebook(stdout,name,&c); fprintf(stderr,"\r " "\nDone.\n"); exit(0); }
int main(int argc,char *argv[]){ codebook *b; static_codebook *c; long *lengths; long *hits; int entries=-1,dim=-1,guard=1; FILE *in=NULL; char *line,*name; long j; if(argv[1]==NULL){ fprintf(stderr,"Need a lattice codebook on the command line.\n"); exit(1); } if(argv[2]==NULL){ fprintf(stderr,"Need a codeword data file on the command line.\n"); exit(1); } if(argv[3]!=NULL)guard=0; { char *ptr; char *filename=strdup(argv[1]); b=codebook_load(filename); c=(static_codebook *)(b->c); ptr=strrchr(filename,'.'); if(ptr){ *ptr='\0'; name=strdup(filename); }else{ name=strdup(filename); } } if(c->maptype!=1){ fprintf(stderr,"Provided book is not a latticebook.\n"); exit(1); } entries=b->entries; dim=b->dim; hits=_ogg_malloc(entries*sizeof(long)); lengths=_ogg_calloc(entries,sizeof(long)); for(j=0;j<entries;j++)hits[j]=guard; in=fopen(argv[2],"r"); if(!in){ fprintf(stderr,"Could not open input file %s\n",argv[2]); exit(1); } if(!strrcmp_i(argv[0],"latticetune")){ long lines=0; line=setup_line(in); while(line){ long code; lines++; if(!(lines&0xfff))spinnit("codewords so far...",lines); if(sscanf(line,"%ld",&code)==1) hits[code]++; line=setup_line(in); } } /* now we simply count already collated by-entry data */ if(!strrcmp_i(argv[0],"res0tune") || !strrcmp_i(argv[0],"res1tune")){ line=setup_line(in); while(line){ /* code:hits\n */ /* likely to have multiple listing for each code entry; must accumulate */ char *pos=strchr(line,':'); if(pos){ long code=atol(line); long val=atol(pos+1); hits[code]+=val; } line=setup_line(in); } } fclose(in); /* build the codeword lengths */ build_tree_from_lengths0(entries,hits,lengths); c->lengthlist=lengths; write_codebook(stdout,name,c); { long bins=_book_maptype1_quantvals(c); long i,k,base=c->lengthlist[0]; for(i=0;i<entries;i++) if(c->lengthlist[i]>base)base=c->lengthlist[i]; for(j=0;j<entries;j++){ if(c->lengthlist[j]){ int indexdiv=1; fprintf(stderr,"%4ld: ",j); for(k=0;k<c->dim;k++){ int index= (j/indexdiv)%bins; fprintf(stderr,"%+3.1f,", c->quantlist[index]*_float32_unpack(c->q_delta)+ _float32_unpack(c->q_min)); indexdiv*=bins; } fprintf(stderr,"\t|"); for(k=0;k<base-c->lengthlist[j];k++)fprintf(stderr,"*"); fprintf(stderr,"\n"); } } } fprintf(stderr,"\r " "\nDone.\n"); exit(0); }
int main(int argc,char *argv[]){ char *basename; codebook *b=NULL; int entries=0; int dim=0; long i,j,target=-1,protect=-1; FILE *out=NULL; int argnum=0; argv++; if(*argv==NULL){ usage(); exit(1); } while(*argv){ if(*argv[0]=='-'){ argv++; }else{ switch (argnum++){ case 0:case 1: { /* yes, this is evil. However, it's very convenient to parse file extentions */ /* input file. What kind? */ char *dot; char *ext=NULL; char *name=strdup(*argv++); dot=strrchr(name,'.'); if(dot) ext=dot+1; else{ ext=""; } /* codebook */ if(!strcmp(ext,"vqh")){ basename=strrchr(name,'/'); if(basename) basename=strdup(basename)+1; else basename=strdup(name); dot=strrchr(basename,'.'); if(dot)*dot='\0'; b=codebook_load(name); dim=b->dim; entries=b->entries; } /* data file; we do actually need to suck it into memory */ /* we're dealing with just one book, so we can de-interleave */ if(!strcmp(ext,"vqd") && !points){ int cols; long lines=0; char *line; float *vec; FILE *in=fopen(name,"r"); if(!in){ fprintf(stderr,"Could not open input file %s\n",name); exit(1); } reset_next_value(); line=setup_line(in); /* count cols before we start reading */ { char *temp=line; while(*temp==' ')temp++; for(cols=0;*temp;cols++){ while(*temp>32)temp++; while(*temp==' ')temp++; } } vec=alloca(cols*sizeof(float)); /* count, then load, to avoid fragmenting the hell out of memory */ while(line){ lines++; for(j=0;j<cols;j++) if(get_line_value(in,vec+j)){ fprintf(stderr,"Too few columns on line %ld in data file\n",lines); exit(1); } if((lines&0xff)==0)spinnit("counting samples...",lines*cols); line=setup_line(in); } pointlist=_ogg_malloc((cols*lines+entries*dim)*sizeof(float)); rewind(in); line=setup_line(in); while(line){ lines--; for(j=0;j<cols;j++) if(get_line_value(in,vec+j)){ fprintf(stderr,"Too few columns on line %ld in data file\n",lines); exit(1); } /* deinterleave, add to heap */ add_vector(b,vec,cols); if((lines&0xff)==0)spinnit("loading samples...",lines*cols); line=setup_line(in); } fclose(in); } } break; case 2: target=atol(*argv++); if(target==0)target=entries; break; case 3: protect=atol(*argv++); break; case 4: { char *buff=alloca(strlen(*argv)+5); sprintf(buff,"%s.vqh",*argv); basename=*argv++; out=fopen(buff,"w"); if(!out){ fprintf(stderr,"unable ot open %s for output",buff); exit(1); } } break; default: usage(); } } } if(!entries || !points || !out)usage(); if(target==-1)usage(); /* add guard points */ for(i=0;i<entries;i++) for(j=0;j<dim;j++) pointlist[points++]=b->valuelist[i*dim+j]; points/=dim; /* set up auxiliary vectors for error tracking */ { encode_aux_nearestmatch *nt=NULL; long pointssofar=0; long *pointindex; long indexedpoints=0; long *entryindex; long *reventry; long *membership=_ogg_malloc(points*sizeof(long)); long *firsthead=_ogg_malloc(entries*sizeof(long)); long *secondary=_ogg_malloc(points*sizeof(long)); long *secondhead=_ogg_malloc(entries*sizeof(long)); long *cellcount=_ogg_calloc(entries,sizeof(long)); long *cellcount2=_ogg_calloc(entries,sizeof(long)); float *cellerror=_ogg_calloc(entries,sizeof(float)); float *cellerrormax=_ogg_calloc(entries,sizeof(float)); long cellsleft=entries; for(i=0;i<points;i++)membership[i]=-1; for(i=0;i<entries;i++)firsthead[i]=-1; for(i=0;i<points;i++)secondary[i]=-1; for(i=0;i<entries;i++)secondhead[i]=-1; for(i=0;i<points;i++){ /* assign vectors to the nearest cell. Also keep track of second nearest for error statistics */ float *ppt=pointlist+i*dim; int firstentry=closest(b,ppt,-1); int secondentry=closest(b,ppt,firstentry); float firstmetric=_dist(dim,b->valuelist+dim*firstentry,ppt); float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt); if(!(i&0xff))spinnit("initializing... ",points-i); membership[i]=firsthead[firstentry]; firsthead[firstentry]=i; secondary[i]=secondhead[secondentry]; secondhead[secondentry]=i; if(i<points-entries){ cellerror[firstentry]+=secondmetric-firstmetric; cellerrormax[firstentry]=max(cellerrormax[firstentry], _heuristic(b,ppt,secondentry)); cellcount[firstentry]++; cellcount2[secondentry]++; } } /* which cells are most heavily populated? Protect as many from dispersal as the user has requested */ { long **countindex=_ogg_calloc(entries,sizeof(long *)); for(i=0;i<entries;i++)countindex[i]=cellcount+i; qsort(countindex,entries,sizeof(long *),longsort); for(i=0;i<protect;i++){ int ptr=countindex[i]-cellcount; cellerrormax[ptr]=9e50f; } } { fprintf(stderr,"\r"); for(i=0;i<entries;i++){ /* decompose index */ int entry=i; for(j=0;j<dim;j++){ fprintf(stderr,"%d:",entry%b->c->thresh_tree->quantvals); entry/=b->c->thresh_tree->quantvals; } fprintf(stderr,":%ld/%ld, ",cellcount[i],cellcount2[i]); } fprintf(stderr,"\n"); } /* do the automatic cull request */ while(cellsleft>target){ int bestcell=-1; float besterror=0; float besterror2=0; long head=-1; char spinbuf[80]; sprintf(spinbuf,"cells left to eliminate: %ld : ",cellsleft-target); /* find the cell with lowest removal impact */ for(i=0;i<entries;i++){ if(b->c->lengthlist[i]>0){ if(bestcell==-1 || cellerrormax[i]<=besterror2){ if(bestcell==-1 || cellerrormax[i]<besterror2 || besterror>cellerror[i]){ besterror=cellerror[i]; besterror2=cellerrormax[i]; bestcell=i; } } } } fprintf(stderr,"\reliminating cell %d \n" " dispersal error of %g max/%g total (%ld hits)\n", bestcell,besterror2,besterror,cellcount[bestcell]); /* disperse it. move each point out, adding it (properly) to the second best */ b->c->lengthlist[bestcell]=0; head=firsthead[bestcell]; firsthead[bestcell]=-1; while(head!=-1){ /* head is a point number */ float *ppt=pointlist+head*dim; int firstentry=closest(b,ppt,-1); int secondentry=closest(b,ppt,firstentry); float firstmetric=_dist(dim,b->valuelist+dim*firstentry,ppt); float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt); long next=membership[head]; if(head<points-entries){ cellcount[firstentry]++; cellcount[bestcell]--; cellerror[firstentry]+=secondmetric-firstmetric; cellerrormax[firstentry]=max(cellerrormax[firstentry], _heuristic(b,ppt,secondentry)); } membership[head]=firsthead[firstentry]; firsthead[firstentry]=head; head=next; if(cellcount[bestcell]%128==0) spinnit(spinbuf,cellcount[bestcell]+cellcount2[bestcell]); } /* now see that all points that had the dispersed cell as second choice have second choice reassigned */ head=secondhead[bestcell]; secondhead[bestcell]=-1; while(head!=-1){ float *ppt=pointlist+head*dim; /* who are we assigned to now? */ int firstentry=closest(b,ppt,-1); /* what is the new second closest match? */ int secondentry=closest(b,ppt,firstentry); /* old second closest is the cell being disbanded */ float oldsecondmetric=_dist(dim,b->valuelist+dim*bestcell,ppt); /* new second closest error */ float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt); long next=secondary[head]; if(head<points-entries){ cellcount2[secondentry]++; cellcount2[bestcell]--; cellerror[firstentry]+=secondmetric-oldsecondmetric; cellerrormax[firstentry]=max(cellerrormax[firstentry], _heuristic(b,ppt,secondentry)); } secondary[head]=secondhead[secondentry]; secondhead[secondentry]=head; head=next; if(cellcount2[bestcell]%128==0) spinnit(spinbuf,cellcount2[bestcell]); } cellsleft--; } /* paring is over. Build decision trees using points that now fall through the thresh matcher. */ /* we don't free membership; we flatten it in order to use in lp_split */ for(i=0;i<entries;i++){ long head=firsthead[i]; spinnit("rearranging membership cache... ",entries-i); while(head!=-1){ long next=membership[head]; membership[head]=i; head=next; } } free(secondhead); free(firsthead); free(cellerror); free(cellerrormax); free(secondary); pointindex=_ogg_malloc(points*sizeof(long)); /* make a point index of fall-through points */ for(i=0;i<points;i++){ int best=_best(b,pointlist+i*dim,1); if(best==-1) pointindex[indexedpoints++]=i; spinnit("finding orphaned points... ",points-i); } /* make an entry index */ entryindex=_ogg_malloc(entries*sizeof(long)); target=0; for(i=0;i<entries;i++){ if(b->c->lengthlist[i]>0) entryindex[target++]=i; } /* make working space for a reverse entry index */ reventry=_ogg_malloc(entries*sizeof(long)); /* do the split */ nt=b->c->nearest_tree= _ogg_calloc(1,sizeof(encode_aux_nearestmatch)); nt->alloc=4096; nt->ptr0=_ogg_malloc(sizeof(long)*nt->alloc); nt->ptr1=_ogg_malloc(sizeof(long)*nt->alloc); nt->p=_ogg_malloc(sizeof(long)*nt->alloc); nt->q=_ogg_malloc(sizeof(long)*nt->alloc); nt->aux=0; fprintf(stderr,"Leaves added: %d \n", lp_split(pointlist,points, b,entryindex,target, pointindex,indexedpoints, membership,reventry, 0,&pointssofar)); free(membership); free(reventry); free(pointindex); /* hack alert. I should just change the damned splitter and codebook writer */ for(i=0;i<nt->aux;i++)nt->p[i]*=dim; for(i=0;i<nt->aux;i++)nt->q[i]*=dim; /* recount hits. Build new lengthlist. reuse entryindex storage */ for(i=0;i<entries;i++)entryindex[i]=1; for(i=0;i<points-entries;i++){ int best=_best(b,pointlist+i*dim,1); float *a=pointlist+i*dim; if(!(i&0xff))spinnit("counting hits...",i); if(best==-1){ fprintf(stderr,"\nINTERNAL ERROR; a point count not be matched to a\n" "codebook entry. The new decision tree is broken.\n"); exit(1); } entryindex[best]++; } for(i=0;i<nt->aux;i++)nt->p[i]/=dim; for(i=0;i<nt->aux;i++)nt->q[i]/=dim; /* the lengthlist builder doesn't actually deal with 0 hit entries. So, we pack the 'sparse' hit list into a dense list, then unpack the lengths after the build */ { int upper=0; long *lengthlist=_ogg_calloc(entries,sizeof(long)); for(i=0;i<entries;i++){ if(b->c->lengthlist[i]>0) entryindex[upper++]=entryindex[i]; else{ if(entryindex[i]>1){ fprintf(stderr,"\nINTERNAL ERROR; _best matched to unused entry\n"); exit(1); } } } /* sanity check */ if(upper != target){ fprintf(stderr,"\nINTERNAL ERROR; packed the wrong number of entries\n"); exit(1); } build_tree_from_lengths(upper,entryindex,lengthlist); upper=0; for(i=0;i<entries;i++){ if(b->c->lengthlist[i]>0) b->c->lengthlist[i]=lengthlist[upper++]; } } } /* we're done. write it out. */ write_codebook(out,basename,b->c); fprintf(stderr,"\r \nDone.\n"); return(0); }