void vqgen_cellmetric(vqgen *v){ int j,k; float min=-1.f,max=-1.f,mean=0.f,acc=0.f; long dup=0,unused=0; #ifdef NOISY int i; char buff[80]; float spacings[v->entries]; int count=0; FILE *cells; sprintf(buff,"cellspace%d.m",v->it); cells=fopen(buff,"w"); #endif /* minimum, maximum, cell spacing */ for(j=0;j<v->entries;j++){ float localmin=-1.; for(k=0;k<v->entries;k++){ if(j!=k){ float this=_dist(v,_now(v,j),_now(v,k)); if(this>0){ if(v->assigned[k] && (localmin==-1 || this<localmin)) localmin=this; }else{ if(k<j){ dup++; break; } } } } if(k<v->entries)continue; if(v->assigned[j]==0){ unused++; continue; } localmin=v->max[j]+localmin/2; /* this gives us rough diameter */ if(min==-1 || localmin<min)min=localmin; if(max==-1 || localmin>max)max=localmin; mean+=localmin; acc++; #ifdef NOISY spacings[count++]=localmin; #endif }
static int closest(codebook *b,float *vec,int current){ encode_aux_threshmatch *tt=b->c->thresh_tree; int dim=b->dim; int i,k,o; float bestmetric=0; int bestentry=-1; int best=bestm(b,vec); if(current<0 && b->c->lengthlist[best]>0)return best; for(i=0;i<b->entries;i++){ if(b->c->lengthlist[i]>0 && i!=best && i!=current){ float thismetric=_dist(dim, vec, b->valuelist+i*dim); if(bestentry==-1 || thismetric<bestmetric){ bestentry=i; bestmetric=thismetric; } } } return(bestentry); }
void cell_spacing(codebook *c){ int j,k; float min=-1.f,max=-1.f,mean=0.f,meansq=0.f; long total=0; /* minimum, maximum, mean, ms cell spacing */ for(j=0;j<c->c->entries;j++){ if(c->c->lengthlist[j]>0){ float localmin=-1.; for(k=0;k<c->c->entries;k++){ if(c->c->lengthlist[k]>0){ float this=_dist(c->c->dim,_now(c,j),_now(c,k)); if(j!=k && (localmin==-1 || this<localmin)) localmin=this; } } if(min==-1 || localmin<min)min=localmin; if(max==-1 || localmin>max)max=localmin; mean+=sqrt(localmin); meansq+=localmin; total++; }
uint64_t next() { return _dist(_rng); }
result_type operator()(const T& value) { return _dist(engine(), value); }
/** Returns: distribution()(engine()) */ result_type operator()() { return _dist(engine()); }
extern "C" double cost(const char *text1, const char *text2) { std::string s1(text1); std::string s2(text2); return _dist(s1, s2); }
// 百分率でのランダム bool Random::PercentageRandom( float probability ) { std::bernoulli_distribution _dist( probability ); return _dist( *rnd ); }
// 実数型乱数取得 float Random::GetFloat( float min, float max ) { std::uniform_real_distribution<float> _dist( min, max ); return _dist( *rnd ); }
// 整数型乱数取得 int Random::GetInt( int min, int max ) { std::uniform_int_distribution<int> _dist( min, max ); return _dist( *rnd ); }
int main(int argc,char *argv[]){ char *basename; codebook *b=NULL; int entries=0; int dim=0; long i,j,target=-1,protect=-1; FILE *out=NULL; int argnum=0; argv++; if(*argv==NULL){ usage(); exit(1); } while(*argv){ if(*argv[0]=='-'){ argv++; }else{ switch (argnum++){ case 0:case 1: { /* yes, this is evil. However, it's very convenient to parse file extentions */ /* input file. What kind? */ char *dot; char *ext=NULL; char *name=strdup(*argv++); dot=strrchr(name,'.'); if(dot) ext=dot+1; else{ ext=""; } /* codebook */ if(!strcmp(ext,"vqh")){ basename=strrchr(name,'/'); if(basename) basename=strdup(basename)+1; else basename=strdup(name); dot=strrchr(basename,'.'); if(dot)*dot='\0'; b=codebook_load(name); dim=b->dim; entries=b->entries; } /* data file; we do actually need to suck it into memory */ /* we're dealing with just one book, so we can de-interleave */ if(!strcmp(ext,"vqd") && !points){ int cols; long lines=0; char *line; float *vec; FILE *in=fopen(name,"r"); if(!in){ fprintf(stderr,"Could not open input file %s\n",name); exit(1); } reset_next_value(); line=setup_line(in); /* count cols before we start reading */ { char *temp=line; while(*temp==' ')temp++; for(cols=0;*temp;cols++){ while(*temp>32)temp++; while(*temp==' ')temp++; } } vec=alloca(cols*sizeof(float)); /* count, then load, to avoid fragmenting the hell out of memory */ while(line){ lines++; for(j=0;j<cols;j++) if(get_line_value(in,vec+j)){ fprintf(stderr,"Too few columns on line %ld in data file\n",lines); exit(1); } if((lines&0xff)==0)spinnit("counting samples...",lines*cols); line=setup_line(in); } pointlist=_ogg_malloc((cols*lines+entries*dim)*sizeof(float)); rewind(in); line=setup_line(in); while(line){ lines--; for(j=0;j<cols;j++) if(get_line_value(in,vec+j)){ fprintf(stderr,"Too few columns on line %ld in data file\n",lines); exit(1); } /* deinterleave, add to heap */ add_vector(b,vec,cols); if((lines&0xff)==0)spinnit("loading samples...",lines*cols); line=setup_line(in); } fclose(in); } } break; case 2: target=atol(*argv++); if(target==0)target=entries; break; case 3: protect=atol(*argv++); break; case 4: { char *buff=alloca(strlen(*argv)+5); sprintf(buff,"%s.vqh",*argv); basename=*argv++; out=fopen(buff,"w"); if(!out){ fprintf(stderr,"unable ot open %s for output",buff); exit(1); } } break; default: usage(); } } } if(!entries || !points || !out)usage(); if(target==-1)usage(); /* add guard points */ for(i=0;i<entries;i++) for(j=0;j<dim;j++) pointlist[points++]=b->valuelist[i*dim+j]; points/=dim; /* set up auxiliary vectors for error tracking */ { encode_aux_nearestmatch *nt=NULL; long pointssofar=0; long *pointindex; long indexedpoints=0; long *entryindex; long *reventry; long *membership=_ogg_malloc(points*sizeof(long)); long *firsthead=_ogg_malloc(entries*sizeof(long)); long *secondary=_ogg_malloc(points*sizeof(long)); long *secondhead=_ogg_malloc(entries*sizeof(long)); long *cellcount=_ogg_calloc(entries,sizeof(long)); long *cellcount2=_ogg_calloc(entries,sizeof(long)); float *cellerror=_ogg_calloc(entries,sizeof(float)); float *cellerrormax=_ogg_calloc(entries,sizeof(float)); long cellsleft=entries; for(i=0;i<points;i++)membership[i]=-1; for(i=0;i<entries;i++)firsthead[i]=-1; for(i=0;i<points;i++)secondary[i]=-1; for(i=0;i<entries;i++)secondhead[i]=-1; for(i=0;i<points;i++){ /* assign vectors to the nearest cell. Also keep track of second nearest for error statistics */ float *ppt=pointlist+i*dim; int firstentry=closest(b,ppt,-1); int secondentry=closest(b,ppt,firstentry); float firstmetric=_dist(dim,b->valuelist+dim*firstentry,ppt); float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt); if(!(i&0xff))spinnit("initializing... ",points-i); membership[i]=firsthead[firstentry]; firsthead[firstentry]=i; secondary[i]=secondhead[secondentry]; secondhead[secondentry]=i; if(i<points-entries){ cellerror[firstentry]+=secondmetric-firstmetric; cellerrormax[firstentry]=max(cellerrormax[firstentry], _heuristic(b,ppt,secondentry)); cellcount[firstentry]++; cellcount2[secondentry]++; } } /* which cells are most heavily populated? Protect as many from dispersal as the user has requested */ { long **countindex=_ogg_calloc(entries,sizeof(long *)); for(i=0;i<entries;i++)countindex[i]=cellcount+i; qsort(countindex,entries,sizeof(long *),longsort); for(i=0;i<protect;i++){ int ptr=countindex[i]-cellcount; cellerrormax[ptr]=9e50f; } } { fprintf(stderr,"\r"); for(i=0;i<entries;i++){ /* decompose index */ int entry=i; for(j=0;j<dim;j++){ fprintf(stderr,"%d:",entry%b->c->thresh_tree->quantvals); entry/=b->c->thresh_tree->quantvals; } fprintf(stderr,":%ld/%ld, ",cellcount[i],cellcount2[i]); } fprintf(stderr,"\n"); } /* do the automatic cull request */ while(cellsleft>target){ int bestcell=-1; float besterror=0; float besterror2=0; long head=-1; char spinbuf[80]; sprintf(spinbuf,"cells left to eliminate: %ld : ",cellsleft-target); /* find the cell with lowest removal impact */ for(i=0;i<entries;i++){ if(b->c->lengthlist[i]>0){ if(bestcell==-1 || cellerrormax[i]<=besterror2){ if(bestcell==-1 || cellerrormax[i]<besterror2 || besterror>cellerror[i]){ besterror=cellerror[i]; besterror2=cellerrormax[i]; bestcell=i; } } } } fprintf(stderr,"\reliminating cell %d \n" " dispersal error of %g max/%g total (%ld hits)\n", bestcell,besterror2,besterror,cellcount[bestcell]); /* disperse it. move each point out, adding it (properly) to the second best */ b->c->lengthlist[bestcell]=0; head=firsthead[bestcell]; firsthead[bestcell]=-1; while(head!=-1){ /* head is a point number */ float *ppt=pointlist+head*dim; int firstentry=closest(b,ppt,-1); int secondentry=closest(b,ppt,firstentry); float firstmetric=_dist(dim,b->valuelist+dim*firstentry,ppt); float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt); long next=membership[head]; if(head<points-entries){ cellcount[firstentry]++; cellcount[bestcell]--; cellerror[firstentry]+=secondmetric-firstmetric; cellerrormax[firstentry]=max(cellerrormax[firstentry], _heuristic(b,ppt,secondentry)); } membership[head]=firsthead[firstentry]; firsthead[firstentry]=head; head=next; if(cellcount[bestcell]%128==0) spinnit(spinbuf,cellcount[bestcell]+cellcount2[bestcell]); } /* now see that all points that had the dispersed cell as second choice have second choice reassigned */ head=secondhead[bestcell]; secondhead[bestcell]=-1; while(head!=-1){ float *ppt=pointlist+head*dim; /* who are we assigned to now? */ int firstentry=closest(b,ppt,-1); /* what is the new second closest match? */ int secondentry=closest(b,ppt,firstentry); /* old second closest is the cell being disbanded */ float oldsecondmetric=_dist(dim,b->valuelist+dim*bestcell,ppt); /* new second closest error */ float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt); long next=secondary[head]; if(head<points-entries){ cellcount2[secondentry]++; cellcount2[bestcell]--; cellerror[firstentry]+=secondmetric-oldsecondmetric; cellerrormax[firstentry]=max(cellerrormax[firstentry], _heuristic(b,ppt,secondentry)); } secondary[head]=secondhead[secondentry]; secondhead[secondentry]=head; head=next; if(cellcount2[bestcell]%128==0) spinnit(spinbuf,cellcount2[bestcell]); } cellsleft--; } /* paring is over. Build decision trees using points that now fall through the thresh matcher. */ /* we don't free membership; we flatten it in order to use in lp_split */ for(i=0;i<entries;i++){ long head=firsthead[i]; spinnit("rearranging membership cache... ",entries-i); while(head!=-1){ long next=membership[head]; membership[head]=i; head=next; } } free(secondhead); free(firsthead); free(cellerror); free(cellerrormax); free(secondary); pointindex=_ogg_malloc(points*sizeof(long)); /* make a point index of fall-through points */ for(i=0;i<points;i++){ int best=_best(b,pointlist+i*dim,1); if(best==-1) pointindex[indexedpoints++]=i; spinnit("finding orphaned points... ",points-i); } /* make an entry index */ entryindex=_ogg_malloc(entries*sizeof(long)); target=0; for(i=0;i<entries;i++){ if(b->c->lengthlist[i]>0) entryindex[target++]=i; } /* make working space for a reverse entry index */ reventry=_ogg_malloc(entries*sizeof(long)); /* do the split */ nt=b->c->nearest_tree= _ogg_calloc(1,sizeof(encode_aux_nearestmatch)); nt->alloc=4096; nt->ptr0=_ogg_malloc(sizeof(long)*nt->alloc); nt->ptr1=_ogg_malloc(sizeof(long)*nt->alloc); nt->p=_ogg_malloc(sizeof(long)*nt->alloc); nt->q=_ogg_malloc(sizeof(long)*nt->alloc); nt->aux=0; fprintf(stderr,"Leaves added: %d \n", lp_split(pointlist,points, b,entryindex,target, pointindex,indexedpoints, membership,reventry, 0,&pointssofar)); free(membership); free(reventry); free(pointindex); /* hack alert. I should just change the damned splitter and codebook writer */ for(i=0;i<nt->aux;i++)nt->p[i]*=dim; for(i=0;i<nt->aux;i++)nt->q[i]*=dim; /* recount hits. Build new lengthlist. reuse entryindex storage */ for(i=0;i<entries;i++)entryindex[i]=1; for(i=0;i<points-entries;i++){ int best=_best(b,pointlist+i*dim,1); float *a=pointlist+i*dim; if(!(i&0xff))spinnit("counting hits...",i); if(best==-1){ fprintf(stderr,"\nINTERNAL ERROR; a point count not be matched to a\n" "codebook entry. The new decision tree is broken.\n"); exit(1); } entryindex[best]++; } for(i=0;i<nt->aux;i++)nt->p[i]/=dim; for(i=0;i<nt->aux;i++)nt->q[i]/=dim; /* the lengthlist builder doesn't actually deal with 0 hit entries. So, we pack the 'sparse' hit list into a dense list, then unpack the lengths after the build */ { int upper=0; long *lengthlist=_ogg_calloc(entries,sizeof(long)); for(i=0;i<entries;i++){ if(b->c->lengthlist[i]>0) entryindex[upper++]=entryindex[i]; else{ if(entryindex[i]>1){ fprintf(stderr,"\nINTERNAL ERROR; _best matched to unused entry\n"); exit(1); } } } /* sanity check */ if(upper != target){ fprintf(stderr,"\nINTERNAL ERROR; packed the wrong number of entries\n"); exit(1); } build_tree_from_lengths(upper,entryindex,lengthlist); upper=0; for(i=0;i<entries;i++){ if(b->c->lengthlist[i]>0) b->c->lengthlist[i]=lengthlist[upper++]; } } } /* we're done. write it out. */ write_codebook(out,basename,b->c); fprintf(stderr,"\r \nDone.\n"); return(0); }
////////////////////////////////// // return a random real between // 0 and 1 with uniform dist double RandomNumberGenerator::next() { return _dist(); }
result_type rand() { std::lock_guard<std::mutex> lock(_mutex); return _dist(_rgen); }