Esempio n. 1
0
void vqgen_cellmetric(vqgen *v){
  int j,k;
  float min=-1.f,max=-1.f,mean=0.f,acc=0.f;
  long dup=0,unused=0;
 #ifdef NOISY
  int i;
   char buff[80];
   float spacings[v->entries];
   int count=0;
   FILE *cells;
   sprintf(buff,"cellspace%d.m",v->it);
   cells=fopen(buff,"w");
#endif

  /* minimum, maximum, cell spacing */
  for(j=0;j<v->entries;j++){
    float localmin=-1.;

    for(k=0;k<v->entries;k++){
      if(j!=k){
	float this=_dist(v,_now(v,j),_now(v,k));
	if(this>0){
	  if(v->assigned[k] && (localmin==-1 || this<localmin))
	    localmin=this;
	}else{	
	  if(k<j){
	    dup++;
	    break;
	  }
	}
      }
    }
    if(k<v->entries)continue;

    if(v->assigned[j]==0){
      unused++;
      continue;
    }
    
    localmin=v->max[j]+localmin/2; /* this gives us rough diameter */
    if(min==-1 || localmin<min)min=localmin;
    if(max==-1 || localmin>max)max=localmin;
    mean+=localmin;
    acc++;
#ifdef NOISY
    spacings[count++]=localmin;
#endif
  }
Esempio n. 2
0
static int closest(codebook *b,float *vec,int current){
  encode_aux_threshmatch *tt=b->c->thresh_tree;
  int dim=b->dim;
  int i,k,o;

  float bestmetric=0;
  int bestentry=-1;
  int best=bestm(b,vec);

  if(current<0 && b->c->lengthlist[best]>0)return best;

  for(i=0;i<b->entries;i++){
    if(b->c->lengthlist[i]>0 && i!=best && i!=current){
      float thismetric=_dist(dim, vec, b->valuelist+i*dim);
      if(bestentry==-1 || thismetric<bestmetric){
	bestentry=i;
	bestmetric=thismetric;
      }
    }
  }

  return(bestentry);
}
Esempio n. 3
0
void cell_spacing(codebook *c){
  int j,k;
  float min=-1.f,max=-1.f,mean=0.f,meansq=0.f;
  long total=0;

  /* minimum, maximum, mean, ms cell spacing */
  for(j=0;j<c->c->entries;j++){
    if(c->c->lengthlist[j]>0){
      float localmin=-1.;
      for(k=0;k<c->c->entries;k++){
        if(c->c->lengthlist[k]>0){
          float this=_dist(c->c->dim,_now(c,j),_now(c,k));
          if(j!=k &&
             (localmin==-1 || this<localmin))
            localmin=this;
        }
      }
      
      if(min==-1 || localmin<min)min=localmin;
      if(max==-1 || localmin>max)max=localmin;
      mean+=sqrt(localmin);
      meansq+=localmin;
      total++;
    }
Esempio n. 4
0
 uint64_t next()
 {
     return _dist(_rng);
 }
 result_type operator()(const T& value) { return _dist(engine(), value); }
 /** Returns: distribution()(engine()) */
 result_type operator()() { return _dist(engine()); }
Esempio n. 7
0
extern "C" double cost(const char *text1, const char *text2)
{
	std::string s1(text1);
	std::string s2(text2);
	return _dist(s1, s2);
}
Esempio n. 8
0
	//	百分率でのランダム
	bool	Random::PercentageRandom( float probability )
	{
		std::bernoulli_distribution _dist( probability );

		return _dist( *rnd );
	}
Esempio n. 9
0
	//	実数型乱数取得
	float	Random::GetFloat( float min, float max )
	{
		std::uniform_real_distribution<float>	_dist( min, max );
		return	_dist( *rnd );
	}
Esempio n. 10
0
	//	整数型乱数取得
	int		Random::GetInt( int min, int max )
	{
		std::uniform_int_distribution<int>	_dist( min, max );
		return	_dist( *rnd );
	}
Esempio n. 11
0
int main(int argc,char *argv[]){
  char *basename;
  codebook *b=NULL;
  int entries=0;
  int dim=0;
  long i,j,target=-1,protect=-1;
  FILE *out=NULL;

  int argnum=0;

  argv++;
  if(*argv==NULL){
    usage();
    exit(1);
  }

  while(*argv){
    if(*argv[0]=='-'){

      argv++;
	
    }else{
      switch (argnum++){
      case 0:case 1:
	{
	  /* yes, this is evil.  However, it's very convenient to parse file
	     extentions */
	  
	  /* input file.  What kind? */
	  char *dot;
	  char *ext=NULL;
	  char *name=strdup(*argv++);
	  dot=strrchr(name,'.');
	  if(dot)
	    ext=dot+1;
	  else{
	    ext="";
	    
	  }
	  
	  
	  /* codebook */
	  if(!strcmp(ext,"vqh")){
	    
	    basename=strrchr(name,'/');
	    if(basename)
	      basename=strdup(basename)+1;
	    else
	      basename=strdup(name);
	    dot=strrchr(basename,'.');
	    if(dot)*dot='\0';
	    
	    b=codebook_load(name);
	    dim=b->dim;
	    entries=b->entries;
	  }
	  
	  /* data file; we do actually need to suck it into memory */
	  /* we're dealing with just one book, so we can de-interleave */ 
	  if(!strcmp(ext,"vqd") && !points){
	    int cols;
	    long lines=0;
	    char *line;
	    float *vec;
	    FILE *in=fopen(name,"r");
	    if(!in){
	      fprintf(stderr,"Could not open input file %s\n",name);
	      exit(1);
	    }
	    
	    reset_next_value();
	    line=setup_line(in);
	    /* count cols before we start reading */
	    {
	      char *temp=line;
	      while(*temp==' ')temp++;
	      for(cols=0;*temp;cols++){
		while(*temp>32)temp++;
		while(*temp==' ')temp++;
	      }
	    }
	    vec=alloca(cols*sizeof(float));
	    /* count, then load, to avoid fragmenting the hell out of
	       memory */
	    while(line){
	      lines++;
	      for(j=0;j<cols;j++)
		if(get_line_value(in,vec+j)){
		  fprintf(stderr,"Too few columns on line %ld in data file\n",lines);
		  exit(1);
		}
	      if((lines&0xff)==0)spinnit("counting samples...",lines*cols);
	      line=setup_line(in);
	    }
	    pointlist=_ogg_malloc((cols*lines+entries*dim)*sizeof(float));
	    
	    rewind(in);
	    line=setup_line(in);
	    while(line){
	      lines--;
	      for(j=0;j<cols;j++)
		if(get_line_value(in,vec+j)){
		  fprintf(stderr,"Too few columns on line %ld in data file\n",lines);
		  exit(1);
		}
	      /* deinterleave, add to heap */
	      add_vector(b,vec,cols);
	      if((lines&0xff)==0)spinnit("loading samples...",lines*cols);
	      
	      line=setup_line(in);
	    }
	    fclose(in);
	  }
	}
	break;
      case 2:
	target=atol(*argv++);
	if(target==0)target=entries;
	break;
      case 3:
	protect=atol(*argv++);
	break;
      case 4:
	{
	  char *buff=alloca(strlen(*argv)+5);
	  sprintf(buff,"%s.vqh",*argv);
	  basename=*argv++;

	  out=fopen(buff,"w");
	  if(!out){
	    fprintf(stderr,"unable ot open %s for output",buff);
	    exit(1);
	  }
	}
	break;
      default:
	usage();
      }
    }
  }
  if(!entries || !points || !out)usage();
  if(target==-1)usage();

  /* add guard points */
  for(i=0;i<entries;i++)
    for(j=0;j<dim;j++)
      pointlist[points++]=b->valuelist[i*dim+j];
  
  points/=dim;

  /* set up auxiliary vectors for error tracking */
  {
    encode_aux_nearestmatch *nt=NULL;
    long pointssofar=0;
    long *pointindex;
    long indexedpoints=0;
    long *entryindex;
    long *reventry;
    long *membership=_ogg_malloc(points*sizeof(long));
    long *firsthead=_ogg_malloc(entries*sizeof(long));
    long *secondary=_ogg_malloc(points*sizeof(long));
    long *secondhead=_ogg_malloc(entries*sizeof(long));

    long *cellcount=_ogg_calloc(entries,sizeof(long));
    long *cellcount2=_ogg_calloc(entries,sizeof(long));
    float *cellerror=_ogg_calloc(entries,sizeof(float));
    float *cellerrormax=_ogg_calloc(entries,sizeof(float));
    long cellsleft=entries;
    for(i=0;i<points;i++)membership[i]=-1;
    for(i=0;i<entries;i++)firsthead[i]=-1;
    for(i=0;i<points;i++)secondary[i]=-1;
    for(i=0;i<entries;i++)secondhead[i]=-1;

    for(i=0;i<points;i++){
      /* assign vectors to the nearest cell.  Also keep track of second
	 nearest for error statistics */
      float *ppt=pointlist+i*dim;
      int    firstentry=closest(b,ppt,-1);
      int    secondentry=closest(b,ppt,firstentry);
      float firstmetric=_dist(dim,b->valuelist+dim*firstentry,ppt);
      float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt);
      
      if(!(i&0xff))spinnit("initializing... ",points-i);
    
      membership[i]=firsthead[firstentry];
      firsthead[firstentry]=i;
      secondary[i]=secondhead[secondentry];
      secondhead[secondentry]=i;

      if(i<points-entries){
	cellerror[firstentry]+=secondmetric-firstmetric;
	cellerrormax[firstentry]=max(cellerrormax[firstentry],
				     _heuristic(b,ppt,secondentry));
	cellcount[firstentry]++;
	cellcount2[secondentry]++;
      }
    }

    /* which cells are most heavily populated?  Protect as many from
       dispersal as the user has requested */
    {
      long **countindex=_ogg_calloc(entries,sizeof(long *));
      for(i=0;i<entries;i++)countindex[i]=cellcount+i;
      qsort(countindex,entries,sizeof(long *),longsort);
      for(i=0;i<protect;i++){
	int ptr=countindex[i]-cellcount;
	cellerrormax[ptr]=9e50f;
      }
    }

    {
      fprintf(stderr,"\r");
      for(i=0;i<entries;i++){
	/* decompose index */
	int entry=i;
	for(j=0;j<dim;j++){
	  fprintf(stderr,"%d:",entry%b->c->thresh_tree->quantvals);
	  entry/=b->c->thresh_tree->quantvals;
	}
	
	fprintf(stderr,":%ld/%ld, ",cellcount[i],cellcount2[i]);
      }
      fprintf(stderr,"\n");
    }

    /* do the automatic cull request */
    while(cellsleft>target){
      int bestcell=-1;
      float besterror=0;
      float besterror2=0;
      long head=-1;
      char spinbuf[80];
      sprintf(spinbuf,"cells left to eliminate: %ld : ",cellsleft-target);

      /* find the cell with lowest removal impact */
      for(i=0;i<entries;i++){
	if(b->c->lengthlist[i]>0){
	  if(bestcell==-1 || cellerrormax[i]<=besterror2){
	    if(bestcell==-1 || cellerrormax[i]<besterror2 || 
	       besterror>cellerror[i]){
	      besterror=cellerror[i];
	      besterror2=cellerrormax[i];
	      bestcell=i;
	    }
	  }
	}
      }

      fprintf(stderr,"\reliminating cell %d                              \n"
	      "     dispersal error of %g max/%g total (%ld hits)\n",
	      bestcell,besterror2,besterror,cellcount[bestcell]);

      /* disperse it.  move each point out, adding it (properly) to
         the second best */
      b->c->lengthlist[bestcell]=0;
      head=firsthead[bestcell];
      firsthead[bestcell]=-1;
      while(head!=-1){
	/* head is a point number */
	float *ppt=pointlist+head*dim;
	int firstentry=closest(b,ppt,-1);
	int secondentry=closest(b,ppt,firstentry);
	float firstmetric=_dist(dim,b->valuelist+dim*firstentry,ppt);
	float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt);
	long next=membership[head];

	if(head<points-entries){
	  cellcount[firstentry]++;
	  cellcount[bestcell]--;
	  cellerror[firstentry]+=secondmetric-firstmetric;
	  cellerrormax[firstentry]=max(cellerrormax[firstentry],
				       _heuristic(b,ppt,secondentry));
	}

	membership[head]=firsthead[firstentry];
	firsthead[firstentry]=head;
	head=next;
	if(cellcount[bestcell]%128==0)
	  spinnit(spinbuf,cellcount[bestcell]+cellcount2[bestcell]);

      }

      /* now see that all points that had the dispersed cell as second
         choice have second choice reassigned */
      head=secondhead[bestcell];
      secondhead[bestcell]=-1;
      while(head!=-1){
	float *ppt=pointlist+head*dim;
	/* who are we assigned to now? */
	int firstentry=closest(b,ppt,-1);
	/* what is the new second closest match? */
	int secondentry=closest(b,ppt,firstentry);
	/* old second closest is the cell being disbanded */
	float oldsecondmetric=_dist(dim,b->valuelist+dim*bestcell,ppt);
	/* new second closest error */
	float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt);
	long next=secondary[head];

	if(head<points-entries){
	  cellcount2[secondentry]++;
	  cellcount2[bestcell]--;
	  cellerror[firstentry]+=secondmetric-oldsecondmetric;
	  cellerrormax[firstentry]=max(cellerrormax[firstentry],
				       _heuristic(b,ppt,secondentry));
	}
	
	secondary[head]=secondhead[secondentry];
	secondhead[secondentry]=head;
	head=next;

	if(cellcount2[bestcell]%128==0)
	  spinnit(spinbuf,cellcount2[bestcell]);
      }

      cellsleft--;
    }

    /* paring is over.  Build decision trees using points that now fall
       through the thresh matcher. */
    /* we don't free membership; we flatten it in order to use in lp_split */

    for(i=0;i<entries;i++){
      long head=firsthead[i];
      spinnit("rearranging membership cache... ",entries-i);
      while(head!=-1){
	long next=membership[head];
	membership[head]=i;
	head=next;
      }
    }

    free(secondhead);
    free(firsthead);
    free(cellerror);
    free(cellerrormax);
    free(secondary);

    pointindex=_ogg_malloc(points*sizeof(long));
    /* make a point index of fall-through points */
    for(i=0;i<points;i++){
      int best=_best(b,pointlist+i*dim,1);
      if(best==-1)
	pointindex[indexedpoints++]=i;
      spinnit("finding orphaned points... ",points-i);
    }

    /* make an entry index */
    entryindex=_ogg_malloc(entries*sizeof(long));
    target=0;
    for(i=0;i<entries;i++){
      if(b->c->lengthlist[i]>0)
	entryindex[target++]=i;
    }

    /* make working space for a reverse entry index */
    reventry=_ogg_malloc(entries*sizeof(long));

    /* do the split */
    nt=b->c->nearest_tree=
      _ogg_calloc(1,sizeof(encode_aux_nearestmatch));

    nt->alloc=4096;
    nt->ptr0=_ogg_malloc(sizeof(long)*nt->alloc);
    nt->ptr1=_ogg_malloc(sizeof(long)*nt->alloc);
    nt->p=_ogg_malloc(sizeof(long)*nt->alloc);
    nt->q=_ogg_malloc(sizeof(long)*nt->alloc);
    nt->aux=0;

    fprintf(stderr,"Leaves added: %d              \n",
            lp_split(pointlist,points,
                     b,entryindex,target,
                     pointindex,indexedpoints,
                     membership,reventry,
                     0,&pointssofar));
    free(membership);
    free(reventry);
    free(pointindex);

    /* hack alert.  I should just change the damned splitter and
       codebook writer */
    for(i=0;i<nt->aux;i++)nt->p[i]*=dim;
    for(i=0;i<nt->aux;i++)nt->q[i]*=dim;
    
    /* recount hits.  Build new lengthlist. reuse entryindex storage */
    for(i=0;i<entries;i++)entryindex[i]=1;
    for(i=0;i<points-entries;i++){
      int best=_best(b,pointlist+i*dim,1);
      float *a=pointlist+i*dim;
      if(!(i&0xff))spinnit("counting hits...",i);
      if(best==-1){
	fprintf(stderr,"\nINTERNAL ERROR; a point count not be matched to a\n"
		"codebook entry.  The new decision tree is broken.\n");
	exit(1);
      }
      entryindex[best]++;
    }
    for(i=0;i<nt->aux;i++)nt->p[i]/=dim;
    for(i=0;i<nt->aux;i++)nt->q[i]/=dim;
    
    /* the lengthlist builder doesn't actually deal with 0 hit entries.
       So, we pack the 'sparse' hit list into a dense list, then unpack
       the lengths after the build */
    {
      int upper=0;
      long *lengthlist=_ogg_calloc(entries,sizeof(long));
      for(i=0;i<entries;i++){
	if(b->c->lengthlist[i]>0)
	  entryindex[upper++]=entryindex[i];
	else{
	  if(entryindex[i]>1){
	    fprintf(stderr,"\nINTERNAL ERROR; _best matched to unused entry\n");
	    exit(1);
	  }
	}
      }
      
      /* sanity check */
      if(upper != target){
	fprintf(stderr,"\nINTERNAL ERROR; packed the wrong number of entries\n");
	exit(1);
      }
    
      build_tree_from_lengths(upper,entryindex,lengthlist);
      
      upper=0;
      for(i=0;i<entries;i++){
	if(b->c->lengthlist[i]>0)
	  b->c->lengthlist[i]=lengthlist[upper++];
      }

    }
  }
  /* we're done.  write it out. */
  write_codebook(out,basename,b->c);

  fprintf(stderr,"\r                                        \nDone.\n");
  return(0);
}
Esempio n. 12
0
//////////////////////////////////
// return a random real between
// 0 and 1 with uniform dist
double RandomNumberGenerator::next() {
    return _dist();
}
Esempio n. 13
0
 result_type rand()
 {
     std::lock_guard<std::mutex> lock(_mutex);
     return _dist(_rgen);
 }