Exemple #1
0
int main(int argc,char *argv[]){
  char *basename;
  codebook **b=_ogg_calloc(1,sizeof(codebook *));
  int *addmul=_ogg_calloc(1,sizeof(int));
  int books=0;
  int input=0;
  int interleave=0;
  int j;
  int start=0;
  int num=-1;
  argv++;

  if(*argv==NULL){
    process_usage();
    exit(1);
  }

  /* yes, this is evil.  However, it's very convenient to parse file
     extentions */

  while(*argv){
    if(*argv[0]=='-'){
      /* option */
      if(argv[0][1]=='s'){
	/* subvector */
	if(sscanf(argv[1],"%d,%d",&start,&num)!=2){
	  num= -1;
	  if(sscanf(argv[1],"%d",&start)!=1){
	    fprintf(stderr,"Syntax error using -s\n");
	    exit(1);
	  }
	}
	argv+=2;
      }
      if(argv[0][1]=='i'){
	/* interleave */
	interleave=1;
	argv+=1;
      }
    }else{
      /* input file.  What kind? */
      char *dot;
      char *ext=NULL;
      char *name=strdup(*argv++);
      dot=strrchr(name,'.');
      if(dot)
	ext=dot+1;
      else
	ext="";

      /* codebook */
      if(!strcmp(ext,"vqh")){
	int multp=0;
	if(input){
	  fprintf(stderr,"specify all input data (.vqd) files following\n"
		  "codebook header (.vqh) files\n");
	  exit(1);
	}
	/* is it additive or multiplicative? */
	if(name[0]=='*'){
	  multp=1;
	  name++;
	}
	if(name[0]=='+')name++;

	basename=strrchr(name,'/');
	if(basename)
	  basename=strdup(basename)+1;
	else
	  basename=strdup(name);
	dot=strrchr(basename,'.');
	if(dot)*dot='\0';

	b=_ogg_realloc(b,sizeof(codebook *)*(books+2));
	b[books]=codebook_load(name);
	addmul=_ogg_realloc(addmul,sizeof(int)*(books+1));
	addmul[books++]=multp;
	b[books]=NULL;
      }

      /* data file */
      if(!strcmp(ext,"vqd")){
	int cols;
	long lines=0;
	char *line;
	float *vec;
	FILE *in=fopen(name,"r");
	if(!in){
	  fprintf(stderr,"Could not open input file %s\n",name);
	  exit(1);
	}

	if(!input){
	  process_preprocess(b,basename);
	  input++;
	}

	reset_next_value();
	line=setup_line(in);
	/* count cols before we start reading */
	{
	  char *temp=line;
	  while(*temp==' ')temp++;
	  for(cols=0;*temp;cols++){
	    while(*temp>32)temp++;
	    while(*temp==' ')temp++;
	  }
	}
	vec=alloca(cols*sizeof(float));
	while(line){
	  lines++;
	  for(j=0;j<cols;j++)
	    if(get_line_value(in,vec+j)){
	      fprintf(stderr,"Too few columns on line %ld in data file\n",lines);
	      exit(1);
	    }
	  /* ignores -s for now */
	  process_vector(b,addmul,interleave,vec,cols);

	  line=setup_line(in);
	}
	fclose(in);
      }
    }
  }

  /* take any data from stdin */
  {
    struct stat st;
    if(fstat(STDIN_FILENO,&st)==-1){
      fprintf(stderr,"Could not stat STDIN\n");
      exit(1);
    }
    if((S_IFIFO|S_IFREG|S_IFSOCK)&st.st_mode){
      int cols;
      char *line;
      long lines=0;
      float *vec;
      if(!input){
	process_preprocess(b,basename);
	input++;
      }
      
      line=setup_line(stdin);
      /* count cols before we start reading */
      {
	char *temp=line;
	while(*temp==' ')temp++;
	for(cols=0;*temp;cols++){
	  while(*temp>32)temp++;
	  while(*temp==' ')temp++;
	}
      }
      vec=alloca(cols*sizeof(float));
      while(line){
	lines++;
	for(j=0;j<cols;j++)
	  if(get_line_value(stdin,vec+j)){
	    fprintf(stderr,"Too few columns on line %ld in data file\n",lines);
	    exit(1);
	  }
	/* ignores -s for now */
	process_vector(b,addmul,interleave,vec,cols);
	
	line=setup_line(stdin);
      }
    }
  }

  process_postprocess(b,basename);

  return 0;
}
int main(int argc,char *argv[]){
  FILE *in;
  long lines=0;
  float min;
  float max;
  long bins=-1;
  int flag=0;
  long *countarray;
  long total=0;
  char *line;

  if(argv[1]==NULL){
    fprintf(stderr,"Usage: distribution {data.vqd [bins]| book.vqh} \n\n");
    exit(1);
  }
  if(argv[2]!=NULL)
    bins=atoi(argv[2])-1;

  in=fopen(argv[1],"r");
  if(!in){
    fprintf(stderr,"Could not open input file %s\n",argv[1]);
    exit(1);
  }

  if(strrchr(argv[1],'.') && strcmp(strrchr(argv[1],'.'),".vqh")==0){
    /* load/decode a book */

    codebook *b=codebook_load(argv[1]);
    static_codebook *c=(static_codebook *)(b->c);
    float delta;
    int i;
    fclose(in);

    switch(c->maptype){
    case 0:
      printf("entropy codebook only; no mappings\n");
      exit(0);
      break;
    case 1:
      bins=_book_maptype1_quantvals(c);
      break;
    case 2:
      bins=c->entries*c->dim;
      break;
    }

    max=min=_float32_unpack(c->q_min);
    delta=_float32_unpack(c->q_delta);

    for(i=0;i<bins;i++){
      float val=c->quantlist[i]*delta+min;
      if(val>max)max=val;
    }

    printf("Minimum scalar value: %f\n",min);
    printf("Maximum scalar value: %f\n",max);

    switch(c->maptype){
    case 1:
      {
        /* lattice codebook.  dump it. */
        int j,k;
        long maxcount=0;
        long **sort=calloc(bins,sizeof(long *));
        long base=c->lengthlist[0];
        countarray=calloc(bins,sizeof(long));

        for(i=0;i<bins;i++)sort[i]=c->quantlist+i;
        qsort(sort,bins,sizeof(long *),ascend);

        for(i=0;i<b->entries;i++)
          if(c->lengthlist[i]>base)base=c->lengthlist[i];

        /* dump a full, correlated count */
        for(j=0;j<b->entries;j++){
          if(c->lengthlist[j]){
            int indexdiv=1;
            printf("%4d: ",j);
            for(k=0;k<b->dim;k++){
              int index= (j/indexdiv)%bins;
              printf("%+3.1f,", c->quantlist[index]*_float32_unpack(c->q_delta)+
                     _float32_unpack(c->q_min));
              indexdiv*=bins;
            }
            printf("\t|");
            for(k=0;k<base-c->lengthlist[j];k++)printf("*");
            printf("\n");
          }
        }

        /* do a rough count */
        for(j=0;j<b->entries;j++){
          int indexdiv=1;
          for(k=0;k<b->dim;k++){
            if(c->lengthlist[j]){
              int index= (j/indexdiv)%bins;
              countarray[index]+=(1<<(base-c->lengthlist[j]));
              indexdiv*=bins;
            }
          }
        }

        /* dump the count */

        {
          long maxcount=0,i,j;
          for(i=0;i<bins;i++)
            if(countarray[i]>maxcount)maxcount=countarray[i];

          for(i=0;i<bins;i++){
            int ptr=sort[i]-c->quantlist;
            int stars=rint(50./maxcount*countarray[ptr]);
            printf("%+08f (%8ld) |",c->quantlist[ptr]*delta+min,countarray[ptr]);
            for(j=0;j<stars;j++)printf("*");
            printf("\n");
          }
        }
      }
      break;
    case 2:
      {
        /* trained, full mapping codebook. */
        printf("Can't do probability dump of a trained [type 2] codebook (yet)\n");
      }
      break;
    }
  }else{
    /* load/count a data file */

    /* do it the simple way; two pass. */
    line=setup_line(in);
    while(line){
      float code;
      char buf[80];
      lines++;

      sprintf(buf,"getting min/max (%.2f::%.2f). lines...",min,max);
      if(!(lines&0xff))spinnit(buf,lines);

      while(!flag && sscanf(line,"%f",&code)==1){
        line=strchr(line,',');
        min=max=code;
        flag=1;
      }

      while(line && sscanf(line,"%f",&code)==1){
        line=strchr(line,',');
        if(line)line++;
        if(code<min)min=code;
        if(code>max)max=code;
      }

      line=setup_line(in);
    }

    if(bins<1){
      if((int)(max-min)==min-max){
        bins=max-min;
      }else{
        bins=25;
      }
    }

    printf("\r                                                     \r");
    printf("Minimum scalar value: %f\n",min);
    printf("Maximum scalar value: %f\n",max);

    if(argv[2]){

      printf("\n counting hits into %ld bins...\n",bins+1);
      countarray=calloc(bins+1,sizeof(long));

      rewind(in);
      line=setup_line(in);
      while(line){
        float code;
        lines--;
        if(!(lines&0xff))spinnit("counting distribution. lines so far...",lines);

        while(line && sscanf(line,"%f",&code)==1){
          line=strchr(line,',');
          if(line)line++;

          code-=min;
          code/=(max-min);
          code*=bins;
          countarray[(int)rint(code)]++;
          total++;
        }

        line=setup_line(in);
      }

      /* make a pretty graph */
      {
        long maxcount=0,i,j;
        for(i=0;i<bins+1;i++)
          if(countarray[i]>maxcount)maxcount=countarray[i];

        printf("\r                                                     \r");
        printf("Total scalars: %ld\n",total);
        for(i=0;i<bins+1;i++){
          int stars=rint(50./maxcount*countarray[i]);
          printf("%08f (%8ld) |",(max-min)/bins*i+min,countarray[i]);
          for(j=0;j<stars;j++)printf("*");
          printf("\n");
        }
      }
    }

    fclose(in);

  }
  printf("\nDone.\n");
  exit(0);
}
Exemple #3
0
int main(int argc,char *argv[]) {
    codebook *b;
    static_codebook *c;
    int entries=-1,dim=-1;
    float min,del;
    char *name;
    long i,j;
    float *suggestions;
    int suggcount=0;

    if(argv[1]==NULL) {
        fprintf(stderr,"Need a lattice book on the command line.\n");
        exit(1);
    }

    {
        char *ptr;
        char *filename=strdup(argv[1]);

        b=codebook_load(filename);
        c=(static_codebook *)(b->c);

        ptr=strrchr(filename,'.');
        if(ptr) {
            *ptr='\0';
            name=strdup(filename);
        } else {
            name=strdup(filename);
        }
    }

    if(c->maptype!=1) {
        fprintf(stderr,"Provided book is not a latticebook.\n");
        exit(1);
    }

    entries=b->entries;
    dim=b->dim;
    min=_float32_unpack(c->q_min);
    del=_float32_unpack(c->q_delta);

    /* Do we want to gen a threshold hint? */
    if(c->q_sequencep==0) {
        /* yes. Discard any preexisting threshhold hint */
        long quantvals=_book_maptype1_quantvals(c);
        long **quantsort=alloca(quantvals*sizeof(long *));
        encode_aux_threshmatch *t=_ogg_calloc(1,sizeof(encode_aux_threshmatch));
        c->thresh_tree=t;

        fprintf(stderr,"Adding threshold hint to %s...\n",name);

        /* partial/complete suggestions */
        if(argv[2]) {
            char *ptr=strdup(argv[2]);
            suggestions=alloca(sizeof(float)*quantvals);

            for(suggcount=0; ptr && suggcount<quantvals; suggcount++) {
                char *ptr2=strchr(ptr,',');
                if(ptr2)*ptr2++='\0';
                suggestions[suggcount]=atof(ptr);
                ptr=ptr2;
            }
        }

        /* simplest possible threshold hint only */
        t->quantthresh=_ogg_calloc(quantvals-1,sizeof(float));
        t->quantmap=_ogg_calloc(quantvals,sizeof(int));
        t->threshvals=quantvals;
        t->quantvals=quantvals;

        /* the quantvals may not be in order; sort em first */
        for(i=0; i<quantvals; i++)quantsort[i]=c->quantlist+i;
        qsort(quantsort,quantvals,sizeof(long *),longsort);

        /* ok, gen the map and thresholds */
        for(i=0; i<quantvals; i++)t->quantmap[i]=quantsort[i]-c->quantlist;
        for(i=0; i<quantvals-1; i++) {
            float v1=*(quantsort[i])*del+min;
            float v2=*(quantsort[i+1])*del+min;

            for(j=0; j<suggcount; j++)
                if(v1<suggestions[j] && suggestions[j]<v2) {
                    t->quantthresh[i]=suggestions[j];
                    break;
                }

            if(j==suggcount) {
                t->quantthresh[i]=(v1+v2)*.5;
            }
        }
    }

    /* Do we want to gen a pigeonhole hint? */
#if 0
    for(i=0; i<entries; i++)if(c->lengthlist[i]==0)break;
    if(c->q_sequencep || i<entries) {
        long **tempstack;
        long *tempcount;
        long *temptrack;
        float *tempmin;
        float *tempmax;
        long totalstack=0;
        long pigeons;
        long subpigeons;
        long quantvals=_book_maptype1_quantvals(c);
        int changep=1,factor;

        encode_aux_pigeonhole *p=_ogg_calloc(1,sizeof(encode_aux_pigeonhole));
        c->pigeon_tree=p;

        fprintf(stderr,"Adding pigeonhole hint to %s...\n",name);

        /* the idea is that we quantize uniformly, even in a nonuniform
           lattice, so that quantization of one scalar has a predictable
           result on the next sequential scalar in a greedy matching
           algorithm.  We generate a lookup based on the quantization of
           the vector (pigeonmap groups quantized entries together) and
           list the entries that could possible be the best fit for any
           given member of that pigeonhole.  The encode process then has a
           much smaller list to brute force */

        /* find our pigeonhole-specific quantization values, fill in the
           quant value->pigeonhole map */
        factor=3;
        p->del=del;
        p->min=min;
        p->quantvals=quantvals;
        {
            int max=0;
            for(i=0; i<quantvals; i++)if(max<c->quantlist[i])max=c->quantlist[i];
            p->mapentries=max;
        }
        p->pigeonmap=_ogg_malloc(p->mapentries*sizeof(long));
        p->quantvals=(quantvals+factor-1)/factor;

        /* pigeonhole roughly on the boundaries of the quantvals; the
           exact pigeonhole grouping is an optimization issue, not a
           correctness issue */
        for(i=0; i<p->mapentries; i++) {
            float thisval=del*i+min; /* middle of the quant zone */
            int quant=0;
            float err=fabs(c->quantlist[0]*del+min-thisval);
            for(j=1; j<quantvals; j++) {
                float thiserr=fabs(c->quantlist[j]*del+min-thisval);
                if(thiserr<err) {
                    quant=j/factor;
                    err=thiserr;
                }
            }
            p->pigeonmap[i]=quant;
        }

        /* pigeonmap complete.  Now do the grungy business of finding the
        entries that could possibly be the best fit for a value appearing
        in the pigeonhole. The trick that allows the below to work is the
        uniform quantization; even though the scalars may be 'sequential'
        (each a delta from the last), the uniform quantization means that
        the error variance is *not* dependant.  Given a pigeonhole and an
        entry, we can find the minimum and maximum possible errors
        (relative to the entry) for any point that could appear in the
        pigeonhole */

        /* must iterate over both pigeonholes and entries */
        /* temporarily (in order to avoid thinking hard), we grow each
           pigeonhole separately, the build a stack of 'em later */
        pigeons=1;
        subpigeons=1;
        for(i=0; i<dim; i++)subpigeons*=p->mapentries;
        for(i=0; i<dim; i++)pigeons*=p->quantvals;
        temptrack=_ogg_calloc(dim,sizeof(long));
        tempmin=_ogg_calloc(dim,sizeof(float));
        tempmax=_ogg_calloc(dim,sizeof(float));
        tempstack=_ogg_calloc(pigeons,sizeof(long *));
        tempcount=_ogg_calloc(pigeons,sizeof(long));

        while(1) {
            float errorpost=-1;
            char buffer[80];

            /* map our current pigeonhole to a 'big pigeonhole' so we know
               what list we're after */
            int entry=0;
            for(i=dim-1; i>=0; i--)entry=entry*p->quantvals+p->pigeonmap[temptrack[i]];
            setvals(dim,p,temptrack,tempmin,tempmax,c->q_sequencep);
            sprintf(buffer,"Building pigeonhole search list [%ld]...",totalstack);


            /* Search all entries to find the one with the minimum possible
               maximum error.  Record that error */
            for(i=0; i<entries; i++) {
                if(c->lengthlist[i]>0) {
                    float this=maxerror(dim,b->valuelist+i*dim,p,
                                        temptrack,tempmin,tempmax);
                    if(errorpost==-1 || this<errorpost)errorpost=this;
                    spinnit(buffer,subpigeons);
                }
            }

            /* Our search list will contain all entries with a minimum
               possible error <= our errorpost */
            for(i=0; i<entries; i++)
                if(c->lengthlist[i]>0) {
                    spinnit(buffer,subpigeons);
                    if(minerror(dim,b->valuelist+i*dim,p,
                                temptrack,tempmin,tempmax)<errorpost)
                        totalstack+=addtosearch(entry,tempstack,tempcount,i);
                }

            for(i=0; i<dim; i++) {
                temptrack[i]++;
                if(temptrack[i]<p->mapentries)break;
                temptrack[i]=0;
            }
            if(i==dim)break;
            subpigeons--;
        }
Exemple #4
0
int main(int argc,char *argv[]){
  codebook *b;
  static_codebook *c;
  long *lengths;
  long *hits;

  int entries=-1,dim=-1,guard=1;
  FILE *in=NULL;
  char *line,*name;
  long j;

  if(argv[1]==NULL){
    fprintf(stderr,"Need a lattice codebook on the command line.\n");
    exit(1);
  }
  if(argv[2]==NULL){
    fprintf(stderr,"Need a codeword data file on the command line.\n");
    exit(1);
  }
  if(argv[3]!=NULL)guard=0;

  {
    char *ptr;
    char *filename=strdup(argv[1]);

    b=codebook_load(filename);
    c=(static_codebook *)(b->c);
    
    ptr=strrchr(filename,'.');
    if(ptr){
      *ptr='\0';
      name=strdup(filename);
    }else{
      name=strdup(filename);
    }
  }

  if(c->maptype!=1){
    fprintf(stderr,"Provided book is not a latticebook.\n");
    exit(1);
  }

  entries=b->entries;
  dim=b->dim;

  hits=_ogg_malloc(entries*sizeof(long));
  lengths=_ogg_calloc(entries,sizeof(long));
  for(j=0;j<entries;j++)hits[j]=guard;

  in=fopen(argv[2],"r");
  if(!in){
    fprintf(stderr,"Could not open input file %s\n",argv[2]);
    exit(1);
  }

  if(!strrcmp_i(argv[0],"latticetune")){
    long lines=0;
    line=setup_line(in);
    while(line){      
      long code;
      lines++;
      if(!(lines&0xfff))spinnit("codewords so far...",lines);
      
      if(sscanf(line,"%ld",&code)==1)
	hits[code]++;

      line=setup_line(in);
    }
  }

  /* now we simply count already collated by-entry data */
  if(!strrcmp_i(argv[0],"res0tune") || !strrcmp_i(argv[0],"res1tune")){

    line=setup_line(in);
    while(line){

      /* code:hits\n */
      /* likely to have multiple listing for each code entry; must
         accumulate */

      char *pos=strchr(line,':');
      if(pos){
	long code=atol(line);
	long val=atol(pos+1); 
	hits[code]+=val;
      }

      line=setup_line(in);
    }
  }

  fclose(in);

  /* build the codeword lengths */
  build_tree_from_lengths0(entries,hits,lengths);

  c->lengthlist=lengths;
  write_codebook(stdout,name,c); 

  {
    long bins=_book_maptype1_quantvals(c);
    long i,k,base=c->lengthlist[0];
    for(i=0;i<entries;i++)
      if(c->lengthlist[i]>base)base=c->lengthlist[i];
    
    for(j=0;j<entries;j++){
      if(c->lengthlist[j]){
	int indexdiv=1;
	fprintf(stderr,"%4ld: ",j);
	for(k=0;k<c->dim;k++){      
	  int index= (j/indexdiv)%bins;
	  fprintf(stderr,"%+3.1f,", c->quantlist[index]*_float32_unpack(c->q_delta)+
		 _float32_unpack(c->q_min));
	  indexdiv*=bins;
	}
	fprintf(stderr,"\t|");
	for(k=0;k<base-c->lengthlist[j];k++)fprintf(stderr,"*");
	fprintf(stderr,"\n");
      }
    }
  }
  
  fprintf(stderr,"\r                                                     "
	  "\nDone.\n");
  exit(0);
}
Exemple #5
0
int main(int argc,char *argv[]){
  char *basename;
  codebook *b=NULL;
  int entries=0;
  int dim=0;
  long i,j,target=-1,protect=-1;
  FILE *out=NULL;

  int argnum=0;

  argv++;
  if(*argv==NULL){
    usage();
    exit(1);
  }

  while(*argv){
    if(*argv[0]=='-'){

      argv++;
	
    }else{
      switch (argnum++){
      case 0:case 1:
	{
	  /* yes, this is evil.  However, it's very convenient to parse file
	     extentions */
	  
	  /* input file.  What kind? */
	  char *dot;
	  char *ext=NULL;
	  char *name=strdup(*argv++);
	  dot=strrchr(name,'.');
	  if(dot)
	    ext=dot+1;
	  else{
	    ext="";
	    
	  }
	  
	  
	  /* codebook */
	  if(!strcmp(ext,"vqh")){
	    
	    basename=strrchr(name,'/');
	    if(basename)
	      basename=strdup(basename)+1;
	    else
	      basename=strdup(name);
	    dot=strrchr(basename,'.');
	    if(dot)*dot='\0';
	    
	    b=codebook_load(name);
	    dim=b->dim;
	    entries=b->entries;
	  }
	  
	  /* data file; we do actually need to suck it into memory */
	  /* we're dealing with just one book, so we can de-interleave */ 
	  if(!strcmp(ext,"vqd") && !points){
	    int cols;
	    long lines=0;
	    char *line;
	    float *vec;
	    FILE *in=fopen(name,"r");
	    if(!in){
	      fprintf(stderr,"Could not open input file %s\n",name);
	      exit(1);
	    }
	    
	    reset_next_value();
	    line=setup_line(in);
	    /* count cols before we start reading */
	    {
	      char *temp=line;
	      while(*temp==' ')temp++;
	      for(cols=0;*temp;cols++){
		while(*temp>32)temp++;
		while(*temp==' ')temp++;
	      }
	    }
	    vec=alloca(cols*sizeof(float));
	    /* count, then load, to avoid fragmenting the hell out of
	       memory */
	    while(line){
	      lines++;
	      for(j=0;j<cols;j++)
		if(get_line_value(in,vec+j)){
		  fprintf(stderr,"Too few columns on line %ld in data file\n",lines);
		  exit(1);
		}
	      if((lines&0xff)==0)spinnit("counting samples...",lines*cols);
	      line=setup_line(in);
	    }
	    pointlist=_ogg_malloc((cols*lines+entries*dim)*sizeof(float));
	    
	    rewind(in);
	    line=setup_line(in);
	    while(line){
	      lines--;
	      for(j=0;j<cols;j++)
		if(get_line_value(in,vec+j)){
		  fprintf(stderr,"Too few columns on line %ld in data file\n",lines);
		  exit(1);
		}
	      /* deinterleave, add to heap */
	      add_vector(b,vec,cols);
	      if((lines&0xff)==0)spinnit("loading samples...",lines*cols);
	      
	      line=setup_line(in);
	    }
	    fclose(in);
	  }
	}
	break;
      case 2:
	target=atol(*argv++);
	if(target==0)target=entries;
	break;
      case 3:
	protect=atol(*argv++);
	break;
      case 4:
	{
	  char *buff=alloca(strlen(*argv)+5);
	  sprintf(buff,"%s.vqh",*argv);
	  basename=*argv++;

	  out=fopen(buff,"w");
	  if(!out){
	    fprintf(stderr,"unable ot open %s for output",buff);
	    exit(1);
	  }
	}
	break;
      default:
	usage();
      }
    }
  }
  if(!entries || !points || !out)usage();
  if(target==-1)usage();

  /* add guard points */
  for(i=0;i<entries;i++)
    for(j=0;j<dim;j++)
      pointlist[points++]=b->valuelist[i*dim+j];
  
  points/=dim;

  /* set up auxiliary vectors for error tracking */
  {
    encode_aux_nearestmatch *nt=NULL;
    long pointssofar=0;
    long *pointindex;
    long indexedpoints=0;
    long *entryindex;
    long *reventry;
    long *membership=_ogg_malloc(points*sizeof(long));
    long *firsthead=_ogg_malloc(entries*sizeof(long));
    long *secondary=_ogg_malloc(points*sizeof(long));
    long *secondhead=_ogg_malloc(entries*sizeof(long));

    long *cellcount=_ogg_calloc(entries,sizeof(long));
    long *cellcount2=_ogg_calloc(entries,sizeof(long));
    float *cellerror=_ogg_calloc(entries,sizeof(float));
    float *cellerrormax=_ogg_calloc(entries,sizeof(float));
    long cellsleft=entries;
    for(i=0;i<points;i++)membership[i]=-1;
    for(i=0;i<entries;i++)firsthead[i]=-1;
    for(i=0;i<points;i++)secondary[i]=-1;
    for(i=0;i<entries;i++)secondhead[i]=-1;

    for(i=0;i<points;i++){
      /* assign vectors to the nearest cell.  Also keep track of second
	 nearest for error statistics */
      float *ppt=pointlist+i*dim;
      int    firstentry=closest(b,ppt,-1);
      int    secondentry=closest(b,ppt,firstentry);
      float firstmetric=_dist(dim,b->valuelist+dim*firstentry,ppt);
      float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt);
      
      if(!(i&0xff))spinnit("initializing... ",points-i);
    
      membership[i]=firsthead[firstentry];
      firsthead[firstentry]=i;
      secondary[i]=secondhead[secondentry];
      secondhead[secondentry]=i;

      if(i<points-entries){
	cellerror[firstentry]+=secondmetric-firstmetric;
	cellerrormax[firstentry]=max(cellerrormax[firstentry],
				     _heuristic(b,ppt,secondentry));
	cellcount[firstentry]++;
	cellcount2[secondentry]++;
      }
    }

    /* which cells are most heavily populated?  Protect as many from
       dispersal as the user has requested */
    {
      long **countindex=_ogg_calloc(entries,sizeof(long *));
      for(i=0;i<entries;i++)countindex[i]=cellcount+i;
      qsort(countindex,entries,sizeof(long *),longsort);
      for(i=0;i<protect;i++){
	int ptr=countindex[i]-cellcount;
	cellerrormax[ptr]=9e50f;
      }
    }

    {
      fprintf(stderr,"\r");
      for(i=0;i<entries;i++){
	/* decompose index */
	int entry=i;
	for(j=0;j<dim;j++){
	  fprintf(stderr,"%d:",entry%b->c->thresh_tree->quantvals);
	  entry/=b->c->thresh_tree->quantvals;
	}
	
	fprintf(stderr,":%ld/%ld, ",cellcount[i],cellcount2[i]);
      }
      fprintf(stderr,"\n");
    }

    /* do the automatic cull request */
    while(cellsleft>target){
      int bestcell=-1;
      float besterror=0;
      float besterror2=0;
      long head=-1;
      char spinbuf[80];
      sprintf(spinbuf,"cells left to eliminate: %ld : ",cellsleft-target);

      /* find the cell with lowest removal impact */
      for(i=0;i<entries;i++){
	if(b->c->lengthlist[i]>0){
	  if(bestcell==-1 || cellerrormax[i]<=besterror2){
	    if(bestcell==-1 || cellerrormax[i]<besterror2 || 
	       besterror>cellerror[i]){
	      besterror=cellerror[i];
	      besterror2=cellerrormax[i];
	      bestcell=i;
	    }
	  }
	}
      }

      fprintf(stderr,"\reliminating cell %d                              \n"
	      "     dispersal error of %g max/%g total (%ld hits)\n",
	      bestcell,besterror2,besterror,cellcount[bestcell]);

      /* disperse it.  move each point out, adding it (properly) to
         the second best */
      b->c->lengthlist[bestcell]=0;
      head=firsthead[bestcell];
      firsthead[bestcell]=-1;
      while(head!=-1){
	/* head is a point number */
	float *ppt=pointlist+head*dim;
	int firstentry=closest(b,ppt,-1);
	int secondentry=closest(b,ppt,firstentry);
	float firstmetric=_dist(dim,b->valuelist+dim*firstentry,ppt);
	float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt);
	long next=membership[head];

	if(head<points-entries){
	  cellcount[firstentry]++;
	  cellcount[bestcell]--;
	  cellerror[firstentry]+=secondmetric-firstmetric;
	  cellerrormax[firstentry]=max(cellerrormax[firstentry],
				       _heuristic(b,ppt,secondentry));
	}

	membership[head]=firsthead[firstentry];
	firsthead[firstentry]=head;
	head=next;
	if(cellcount[bestcell]%128==0)
	  spinnit(spinbuf,cellcount[bestcell]+cellcount2[bestcell]);

      }

      /* now see that all points that had the dispersed cell as second
         choice have second choice reassigned */
      head=secondhead[bestcell];
      secondhead[bestcell]=-1;
      while(head!=-1){
	float *ppt=pointlist+head*dim;
	/* who are we assigned to now? */
	int firstentry=closest(b,ppt,-1);
	/* what is the new second closest match? */
	int secondentry=closest(b,ppt,firstentry);
	/* old second closest is the cell being disbanded */
	float oldsecondmetric=_dist(dim,b->valuelist+dim*bestcell,ppt);
	/* new second closest error */
	float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt);
	long next=secondary[head];

	if(head<points-entries){
	  cellcount2[secondentry]++;
	  cellcount2[bestcell]--;
	  cellerror[firstentry]+=secondmetric-oldsecondmetric;
	  cellerrormax[firstentry]=max(cellerrormax[firstentry],
				       _heuristic(b,ppt,secondentry));
	}
	
	secondary[head]=secondhead[secondentry];
	secondhead[secondentry]=head;
	head=next;

	if(cellcount2[bestcell]%128==0)
	  spinnit(spinbuf,cellcount2[bestcell]);
      }

      cellsleft--;
    }

    /* paring is over.  Build decision trees using points that now fall
       through the thresh matcher. */
    /* we don't free membership; we flatten it in order to use in lp_split */

    for(i=0;i<entries;i++){
      long head=firsthead[i];
      spinnit("rearranging membership cache... ",entries-i);
      while(head!=-1){
	long next=membership[head];
	membership[head]=i;
	head=next;
      }
    }

    free(secondhead);
    free(firsthead);
    free(cellerror);
    free(cellerrormax);
    free(secondary);

    pointindex=_ogg_malloc(points*sizeof(long));
    /* make a point index of fall-through points */
    for(i=0;i<points;i++){
      int best=_best(b,pointlist+i*dim,1);
      if(best==-1)
	pointindex[indexedpoints++]=i;
      spinnit("finding orphaned points... ",points-i);
    }

    /* make an entry index */
    entryindex=_ogg_malloc(entries*sizeof(long));
    target=0;
    for(i=0;i<entries;i++){
      if(b->c->lengthlist[i]>0)
	entryindex[target++]=i;
    }

    /* make working space for a reverse entry index */
    reventry=_ogg_malloc(entries*sizeof(long));

    /* do the split */
    nt=b->c->nearest_tree=
      _ogg_calloc(1,sizeof(encode_aux_nearestmatch));

    nt->alloc=4096;
    nt->ptr0=_ogg_malloc(sizeof(long)*nt->alloc);
    nt->ptr1=_ogg_malloc(sizeof(long)*nt->alloc);
    nt->p=_ogg_malloc(sizeof(long)*nt->alloc);
    nt->q=_ogg_malloc(sizeof(long)*nt->alloc);
    nt->aux=0;

    fprintf(stderr,"Leaves added: %d              \n",
            lp_split(pointlist,points,
                     b,entryindex,target,
                     pointindex,indexedpoints,
                     membership,reventry,
                     0,&pointssofar));
    free(membership);
    free(reventry);
    free(pointindex);

    /* hack alert.  I should just change the damned splitter and
       codebook writer */
    for(i=0;i<nt->aux;i++)nt->p[i]*=dim;
    for(i=0;i<nt->aux;i++)nt->q[i]*=dim;
    
    /* recount hits.  Build new lengthlist. reuse entryindex storage */
    for(i=0;i<entries;i++)entryindex[i]=1;
    for(i=0;i<points-entries;i++){
      int best=_best(b,pointlist+i*dim,1);
      float *a=pointlist+i*dim;
      if(!(i&0xff))spinnit("counting hits...",i);
      if(best==-1){
	fprintf(stderr,"\nINTERNAL ERROR; a point count not be matched to a\n"
		"codebook entry.  The new decision tree is broken.\n");
	exit(1);
      }
      entryindex[best]++;
    }
    for(i=0;i<nt->aux;i++)nt->p[i]/=dim;
    for(i=0;i<nt->aux;i++)nt->q[i]/=dim;
    
    /* the lengthlist builder doesn't actually deal with 0 hit entries.
       So, we pack the 'sparse' hit list into a dense list, then unpack
       the lengths after the build */
    {
      int upper=0;
      long *lengthlist=_ogg_calloc(entries,sizeof(long));
      for(i=0;i<entries;i++){
	if(b->c->lengthlist[i]>0)
	  entryindex[upper++]=entryindex[i];
	else{
	  if(entryindex[i]>1){
	    fprintf(stderr,"\nINTERNAL ERROR; _best matched to unused entry\n");
	    exit(1);
	  }
	}
      }
      
      /* sanity check */
      if(upper != target){
	fprintf(stderr,"\nINTERNAL ERROR; packed the wrong number of entries\n");
	exit(1);
      }
    
      build_tree_from_lengths(upper,entryindex,lengthlist);
      
      upper=0;
      for(i=0;i<entries;i++){
	if(b->c->lengthlist[i]>0)
	  b->c->lengthlist[i]=lengthlist[upper++];
      }

    }
  }
  /* we're done.  write it out. */
  write_codebook(out,basename,b->c);

  fprintf(stderr,"\r                                        \nDone.\n");
  return(0);
}