示例#1
0
文件: mcenc.c 项目: John-He-928/krkrz
static void oc_mcenc_find_candidates_a(oc_enc_ctx *_enc,oc_mcenc_ctx *_mcenc,
 oc_mv _accum,int _mbi,int _frame){
  oc_mb_enc_info *embs;
  int             accum_x;
  int             accum_y;
  int             a[3][2];
  int             ncandidates;
  unsigned        nmbi;
  int             i;
  embs=_enc->mb_info;
  /*Skip a position to store the median predictor in.*/
  ncandidates=1;
  if(embs[_mbi].ncneighbors>0){
    /*Fill in the first part of set A: the vectors from adjacent blocks.*/
    for(i=0;i<embs[_mbi].ncneighbors;i++){
      nmbi=embs[_mbi].cneighbors[i];
      _mcenc->candidates[ncandidates][0]=
       OC_MV_X(embs[nmbi].analysis_mv[0][_frame]);
      _mcenc->candidates[ncandidates][1]=
       OC_MV_Y(embs[nmbi].analysis_mv[0][_frame]);
      ncandidates++;
    }
  }
  accum_x=OC_MV_X(_accum);
  accum_y=OC_MV_Y(_accum);
  /*Add a few additional vectors to set A: the vectors used in the previous
     frames and the (0,0) vector.*/
  _mcenc->candidates[ncandidates][0]=accum_x;
  _mcenc->candidates[ncandidates][1]=accum_y;
  ncandidates++;
  _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,
   OC_MV_X(embs[_mbi].analysis_mv[1][_frame])+accum_x,31);
  _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,
   OC_MV_Y(embs[_mbi].analysis_mv[1][_frame])+accum_y,31);
  ncandidates++;
  _mcenc->candidates[ncandidates][0]=0;
  _mcenc->candidates[ncandidates][1]=0;
  ncandidates++;
  /*Use the first three vectors of set A to find our best predictor: their
     median.*/
  memcpy(a,_mcenc->candidates+1,sizeof(a));
  OC_SORT2I(a[0][0],a[1][0]);
  OC_SORT2I(a[0][1],a[1][1]);
  OC_SORT2I(a[1][0],a[2][0]);
  OC_SORT2I(a[1][1],a[2][1]);
  OC_SORT2I(a[0][0],a[1][0]);
  OC_SORT2I(a[0][1],a[1][1]);
  _mcenc->candidates[0][0]=a[1][0];
  _mcenc->candidates[0][1]=a[1][1];
  _mcenc->setb0=ncandidates;
}
示例#2
0
/*Convenience function converts Q57 value to a clamped 32-bit Q24 value
  _in: input in Q57 format.
  Return: same number in Q24 */
static ogg_int32_t oc_q57_to_q24(ogg_int64_t _in){
  ogg_int64_t ret;
  ret=_in+((ogg_int64_t)1<<32)>>33;
  /*0x80000000 is automatically converted to unsigned on 32-bit systems.
    -0x7FFFFFFF-1 is needed to avoid "promoting" the whole expression to
    unsigned.*/
  return (ogg_int32_t)OC_CLAMPI(-0x7FFFFFFF-1,ret,0x7FFFFFFF);
}
示例#3
0
文件: mcenc.c 项目: John-He-928/krkrz
static void oc_mcenc_find_candidates_b(oc_enc_ctx *_enc,oc_mcenc_ctx *_mcenc,
 oc_mv _accum,int _mbi,int _frame){
  oc_mb_enc_info *embs;
  int             accum_x;
  int             accum_y;
  int             ncandidates;
  embs=_enc->mb_info;
  accum_x=OC_MV_X(_accum);
  accum_y=OC_MV_Y(_accum);
  /*Fill in set B: accelerated predictors for this and adjacent macro blocks.*/
  ncandidates=_mcenc->setb0;
  /*Use only the current block. Using more did not appear to be helpful
    with the current selection logic due to escaping the local search too
    quickly.*/
  _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,
   2*OC_MV_X(embs[_mbi].analysis_mv[1][_frame])
   -OC_MV_X(embs[_mbi].analysis_mv[2][_frame])+accum_x,31);
  _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,
   2*OC_MV_Y(embs[_mbi].analysis_mv[1][_frame])
   -OC_MV_Y(embs[_mbi].analysis_mv[2][_frame])+accum_y,31);
  ncandidates++;
  _mcenc->ncandidates=ncandidates;
}
示例#4
0
/*Initializes the dequantization tables from a set of quantizer info.
  Currently the dequantizer (and elsewhere enquantizer) tables are expected to
   be initialized as pointing to the storage reserved for them in the
   oc_theora_state (resp. oc_enc_ctx) structure.
  If some tables are duplicates of others, the pointers will be adjusted to
   point to a single copy of the tables, but the storage for them will not be
   freed.
  If you're concerned about the memory footprint, the obvious thing to do is
   to move the storage out of its fixed place in the structures and allocate
   it on demand.
  However, a much, much better option is to only store the quantization
   matrices being used for the current frame, and to recalculate these as the
   qi values change between frames (this is what VP3 did).*/
void oc_dequant_tables_init(ogg_uint16_t *_dequant[64][3][2],
 int _pp_dc_scale[64],const th_quant_info *_qinfo){
  /*Coding mode: intra or inter.*/
  int          qti;
  /*Y', C_b, C_r*/
  int          pli;
  for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
    /*Quality index.*/
    int qi;
    /*Range iterator.*/
    int qri;
    for(qi=0,qri=0;qri<=_qinfo->qi_ranges[qti][pli].nranges;qri++){
      th_quant_base base;
      ogg_uint32_t  q;
      int           qi_start;
      int           qi_end;
      memcpy(base,_qinfo->qi_ranges[qti][pli].base_matrices[qri],
       sizeof(base));
      qi_start=qi;
      if(qri==_qinfo->qi_ranges[qti][pli].nranges)qi_end=qi+1;
      else qi_end=qi+_qinfo->qi_ranges[qti][pli].sizes[qri];
      /*Iterate over quality indicies in this range.*/
      for(;;){
        ogg_uint32_t qfac;
        int          zzi;
        int          ci;
        /*In the original VP3.2 code, the rounding offset and the size of the
           dead zone around 0 were controlled by a "sharpness" parameter.
          The size of our dead zone is now controlled by the per-coefficient
           quality thresholds returned by our HVS module.
          We round down from a more accurate value when the quality of the
           reconstruction does not fall below our threshold and it saves bits.
          Hence, all of that VP3.2 code is gone from here, and the remaining
           floating point code has been implemented as equivalent integer code
           with exact precision.*/
        qfac=(ogg_uint32_t)_qinfo->dc_scale[qi]*base[0];
        /*For postprocessing, not dequantization.*/
        if(_pp_dc_scale!=NULL)_pp_dc_scale[qi]=(int)(qfac/160);
        /*Scale DC the coefficient from the proper table.*/
        q=(qfac/100)<<2;
        q=OC_CLAMPI(OC_DC_QUANT_MIN[qti],q,OC_QUANT_MAX);
        _dequant[qi][pli][qti][0]=(ogg_uint16_t)q;
        /*Now scale AC coefficients from the proper table.*/
        for(zzi=1;zzi<64;zzi++){
          q=((ogg_uint32_t)_qinfo->ac_scale[qi]*base[OC_FZIG_ZAG[zzi]]/100)<<2;
          q=OC_CLAMPI(OC_AC_QUANT_MIN[qti],q,OC_QUANT_MAX);
          _dequant[qi][pli][qti][zzi]=(ogg_uint16_t)q;
        }
        /*If this is a duplicate of a previous matrix, use that instead.
          This simple check helps us improve cache coherency later.*/
        {
          int dupe;
          int qtj;
          int plj;
          dupe=0;
          for(qtj=0;qtj<=qti;qtj++){
            for(plj=0;plj<(qtj<qti?3:pli);plj++){
              if(!memcmp(_dequant[qi][pli][qti],_dequant[qi][plj][qtj],
               sizeof(oc_quant_table))){
                dupe=1;
                break;
              }
            }
            if(dupe)break;
          }
          if(dupe)_dequant[qi][pli][qti]=_dequant[qi][plj][qtj];
        }
        if(++qi>=qi_end)break;
        /*Interpolate the next base matrix.*/
        for(ci=0;ci<64;ci++){
          base[ci]=(unsigned char)(
           (2*((qi_end-qi)*_qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+
           (qi-qi_start)*_qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci])
           +_qinfo->qi_ranges[qti][pli].sizes[qri])/
           (2*_qinfo->qi_ranges[qti][pli].sizes[qri]));
        }
      }
    }
  }
}
示例#5
0
/*Initializes the dequantization tables from a set of quantizer info.
  Currently the dequantizer (and elsewhere enquantizer) tables are expected to
   be initialized as pointing to the storage reserved for them in the
   oc_theora_state (resp. oc_enc_ctx) structure.
  If some tables are duplicates of others, the pointers will be adjusted to
   point to a single copy of the tables, but the storage for them will not be
   freed.
  If you're concerned about the memory footprint, the obvious thing to do is
   to move the storage out of its fixed place in the structures and allocate
   it on demand.
  However, a much, much better option is to only store the quantization
   matrices being used for the current frame, and to recalculate these as the
   qi values change between frames (this is what VP3 did).*/
void oc_dequant_tables_init(oc_quant_table *_dequant[2][3],
 int _pp_dc_scale[64],const th_quant_info *_qinfo){
  int          qti; /* coding mode: intra or inter */
  int          pli; /* Y U V */
  for(qti=0;qti<2;qti++){
    for(pli=0;pli<3;pli++){
      oc_quant_tables stage;

      int qi;  /* quality index */
      int qri; /* range iterator */
      
      for(qi=0,qri=0; qri<=_qinfo->qi_ranges[qti][pli].nranges; qri++){
	th_quant_base base;
	
	ogg_uint32_t      q;
	int               qi_start;
	int               qi_end;
	int               ci;
	memcpy(base,_qinfo->qi_ranges[qti][pli].base_matrices[qri],
	       sizeof(base));

	qi_start=qi;
	if(qri==_qinfo->qi_ranges[qti][pli].nranges)
	  qi_end=qi+1;
	else 
	  qi_end=qi+_qinfo->qi_ranges[qti][pli].sizes[qri];
	
	/* Iterate over quality indicies in this range */
	for(;;){
	  
	  /*In the original VP3.2 code, the rounding offset and the size of the
	    dead zone around 0 were controlled by a "sharpness" parameter.
	    The size of our dead zone is now controlled by the per-coefficient
	    quality thresholds returned by our HVS module.
	    We round down from a more accurate value when the quality of the
	    reconstruction does not fall below our threshold and it saves bits.
	    Hence, all of that VP3.2 code is gone from here, and the remaining
	    floating point code has been implemented as equivalent integer code
	    with exact precision.*/

	  /* for postprocess, not dequant */
	  if(_pp_dc_scale!=NULL)
	    _pp_dc_scale[qi]=(int)((ogg_uint32_t)_qinfo->dc_scale[qi]*base[0]/160);

	  /*Scale DC the coefficient from the proper table.*/
	  q=((ogg_uint32_t)_qinfo->dc_scale[qi]*base[0]/100)<<2;
	  q=OC_CLAMPI(OC_DC_QUANT_MIN[qti],q,OC_QUANT_MAX);
	  stage[qi][0]=(ogg_uint16_t)q;
	  
	  /*Now scale AC coefficients from the proper table.*/
	  for(ci=1;ci<64;ci++){
	    q=((ogg_uint32_t)_qinfo->ac_scale[qi]*base[ci]/100)<<2;
	    q=OC_CLAMPI(OC_AC_QUANT_MIN[qti],q,OC_QUANT_MAX);
	    stage[qi][ci]=(ogg_uint16_t)q;
	  }
	  
	  if(++qi>=qi_end)break;
	  
	  /*Interpolate the next base matrix.*/
	  for(ci=0;ci<64;ci++){
	    base[ci]=(unsigned char)
	      ((2*((qi_end-qi)*_qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+
		   (qi-qi_start)*_qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci])
		+_qinfo->qi_ranges[qti][pli].sizes[qri])/
	       (2*_qinfo->qi_ranges[qti][pli].sizes[qri]));
	  }
	}
      }

      /* Staging matricies complete; commit to memory only if this
	 isn't a duplicate of a preceeding plane. This simple check
	 helps us improve cache coherency later.*/
      {
	int dupe = 0;
	int i,j;
	for(i=0;i<=qti;i++){
	  for(j=0;j<(i<qti?3:pli);j++){
	    if(!memcmp(stage,_dequant[i][j],sizeof(stage))){
	      dupe = 1;
	      break;
	    }
	  }
	  if(dupe)break;
	}
	if(dupe){
	  _dequant[qti][pli]=_dequant[i][j];
	}else{
	  memcpy(_dequant[qti][pli],stage,sizeof(stage));
	}
      }
    }
  }

#ifdef _TH_DEBUG_
  int i, j, k, l;
  /* dump the calculated quantizer tables */
  for(i=0;i<2;i++){
    for(j=0;j<3;j++){
      for(k=0;k<64;k++){
	TH_DEBUG("quantizer table [%s][%s][Q%d] = {",
		 (i==0?"intra":"inter"),(j==0?"Y":(j==1?"U":"V")),k);
	for(l=0;l<64;l++){
	  if((l&7)==0)
	    TH_DEBUG("\n   ");
	  TH_DEBUG("%4d ",_dequant[i][j][k][l]);
	}
	TH_DEBUG("}\n");
      }
    }
  }
#endif

}
示例#6
0
static void oc_mcenc_find_candidates(oc_enc_ctx *_enc,oc_mcenc_ctx *_mcenc,
 oc_mv _accum,int _mbi,int _frame){
  oc_mb_enc_info *embs;
  int             accum_x;
  int             accum_y;
  int             a[3][2];
  int             ncandidates;
  unsigned        nmbi;
  int             i;
  embs=_enc->mb_info;
  /*Skip a position to store the median predictor in.*/
  ncandidates=1;
  if(embs[_mbi].ncneighbors>0){
    /*Fill in the first part of set A: the vectors from adjacent blocks.*/
    for(i=0;i<embs[_mbi].ncneighbors;i++){
      nmbi=embs[_mbi].cneighbors[i];
      _mcenc->candidates[ncandidates][0]=
       OC_MV_X(embs[nmbi].analysis_mv[0][_frame]);
      _mcenc->candidates[ncandidates][1]=
       OC_MV_Y(embs[nmbi].analysis_mv[0][_frame]);
      ncandidates++;
    }
  }
  accum_x=OC_MV_X(_accum);
  accum_y=OC_MV_Y(_accum);
  /*Add a few additional vectors to set A: the vectors used in the previous
     frames and the (0,0) vector.*/
  _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,accum_x,31);
  _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,accum_y,31);
  ncandidates++;
  _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,
   OC_MV_X(embs[_mbi].analysis_mv[1][_frame])+accum_x,31);
  _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,
   OC_MV_Y(embs[_mbi].analysis_mv[1][_frame])+accum_y,31);
  ncandidates++;
  _mcenc->candidates[ncandidates][0]=0;
  _mcenc->candidates[ncandidates][1]=0;
  ncandidates++;
  /*Use the first three vectors of set A to find our best predictor: their
     median.*/
  memcpy(a,_mcenc->candidates+1,sizeof(a));
  OC_SORT2I(a[0][0],a[1][0]);
  OC_SORT2I(a[0][1],a[1][1]);
  OC_SORT2I(a[1][0],a[2][0]);
  OC_SORT2I(a[1][1],a[2][1]);
  OC_SORT2I(a[0][0],a[1][0]);
  OC_SORT2I(a[0][1],a[1][1]);
  _mcenc->candidates[0][0]=a[1][0];
  _mcenc->candidates[0][1]=a[1][1];
  /*Fill in set B: accelerated predictors for this and adjacent macro blocks.*/
  _mcenc->setb0=ncandidates;
  /*The first time through the loop use the current macro block.*/
  nmbi=_mbi;
  for(i=0;;i++){
    _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,
     2*OC_MV_X(embs[_mbi].analysis_mv[1][_frame])
     -OC_MV_X(embs[_mbi].analysis_mv[2][_frame])+accum_x,31);
    _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,
     2*OC_MV_Y(embs[_mbi].analysis_mv[1][_frame])
     -OC_MV_Y(embs[_mbi].analysis_mv[2][_frame])+accum_y,31);
    ncandidates++;
    if(i>=embs[_mbi].npneighbors)break;
    nmbi=embs[_mbi].pneighbors[i];
  }
  /*Truncate to full-pel positions.*/
  for(i=0;i<ncandidates;i++){
    _mcenc->candidates[i][0]=OC_DIV2(_mcenc->candidates[i][0]);
    _mcenc->candidates[i][1]=OC_DIV2(_mcenc->candidates[i][1]);
  }
  _mcenc->ncandidates=ncandidates;
}
示例#7
0
文件: quant.c 项目: kazutomi/xiphqt
/*Initializes the dequantization tables from a set of quantizer info.
  Currently the dequantizer (and elsewhere enquantizer) tables are expected to
   be initialized as pointing to the storage reserved for them in the
   oc_theora_state (resp. oc_enc_ctx) structure.
  If some tables are duplicates of others, the pointers will be adjusted to
   point to a single copy of the tables, but the storage for them will not be
   freed.
  If you're concerned about the memory footprint, the obvious thing to do is
   to move the storage out of its fixed place in the structures and allocate
   it on demand.
  However, a much, much better option is to only store the quantization
   matrices being used for the current frame, and to recalculate these as the
   qi values change between frames (this is what VP3 did).*/
void oc_dequant_tables_init(oc_quant_table *_dequant[2][3],
 int _pp_dc_scale[64],const th_quant_info *_qinfo){
  int          qti;
  int          pli;
  for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
    int qi;
    int qri;
    /*These simple checks help us improve cache coherency later.*/
    if(pli>0&&memcmp(_qinfo->qi_ranges[qti]+pli-1,
     _qinfo->qi_ranges[qti]+pli,sizeof(_qinfo->qi_ranges[qti][pli]))==0){
      _dequant[qti][pli]=_dequant[qti][pli-1];
      continue;
    }
    if(qti>0&&memcmp(_qinfo->qi_ranges[qti-1]+pli,
     _qinfo->qi_ranges[qti]+pli,sizeof(_qinfo->qi_ranges[qti][pli]))==0){
      _dequant[qti][pli]=_dequant[qti-1][pli];
      continue;
    }
    for(qi=qri=0;qri<=_qinfo->qi_ranges[qti][pli].nranges;qri++){
      th_quant_base base;
      ogg_uint32_t  q;
      int           qi_start;
      int           qi_end;
      int           ci;
      memcpy(base,_qinfo->qi_ranges[qti][pli].base_matrices[qri],
       sizeof(base));
      qi_start=qi;
      if(qri==_qinfo->qi_ranges[qti][pli].nranges)qi_end=qi+1;
      else qi_end=qi+_qinfo->qi_ranges[qti][pli].sizes[qri];
      for(;;){
        ogg_uint32_t qfac;
        /*In the original VP3.2 code, the rounding offset and the size of the
           dead zone around 0 were controlled by a "sharpness" parameter.
          The size of our dead zone is now controlled by the per-coefficient
           quality thresholds returned by our HVS module.
          We round down from a more accurate value when the quality of the
           reconstruction does not fall below our threshold and it saves bits.
          Hence, all of that VP3.2 code is gone from here, and the remaining
           floating point code has been implemented as equivalent integer code
           with exact precision.*/
        /*Scale DC the coefficient from the proper table.*/
        qfac=(ogg_uint32_t)_qinfo->dc_scale[qi]*base[0];
        if(_pp_dc_scale!=NULL)_pp_dc_scale[qi]=(int)(qfac/160);
        q=(qfac/100)<<2;
        q=OC_CLAMPI(OC_DC_QUANT_MIN[qti],q,OC_QUANT_MAX);
        _dequant[qti][pli][qi][0]=(ogg_uint16_t)q;
        /*Now scale AC coefficients from the proper table.*/
        for(ci=1;ci<64;ci++){
          q=((ogg_uint32_t)_qinfo->ac_scale[qi]*base[ci]/100)<<2;
          q=OC_CLAMPI(OC_AC_QUANT_MIN[qti],q,OC_QUANT_MAX);
          _dequant[qti][pli][qi][ci]=(ogg_uint16_t)q;
        }
        if(++qi>=qi_end)break;
        /*Interpolate the next base matrix.*/
        for(ci=0;ci<64;ci++){
          base[ci]=(unsigned char)(
           (2*((qi_end-qi)*_qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+
           (qi-qi_start)*_qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci])
           +_qinfo->qi_ranges[qti][pli].sizes[qri])/
           (2*_qinfo->qi_ranges[qti][pli].sizes[qri]));
        }
      }
    }
  }
}
示例#8
0
/*Compile collected SATD/logq/rate/RMSE metrics into a form that's immediately
   useful for mode decision.*/
void oc_mode_metrics_update(oc_mode_metrics (*_metrics)[3][2][OC_COMP_BINS],
 int _niters_min,int _reweight,oc_mode_rd (*_table)[3][2][OC_COMP_BINS],
 int _shift,double (*_weight)[3][2][OC_COMP_BINS]){
  int niters;
  int prevdr;
  int prevdd;
  int dr;
  int dd;
  int pli;
  int qti;
  int qi;
  int si;
  dd=dr=INT_MAX;
  niters=0;
  /*The encoder interpolates rate and RMSE terms bilinearly from an
     OC_LOGQ_BINS by OC_COMP_BINS grid of sample points in _table.
    To find the sample values at the grid points that minimize the total
     squared prediction error actually requires solving a relatively sparse
     linear system with a number of variables equal to the number of grid
     points.
    Instead of writing a general sparse linear system solver, we just use
     Gauss-Seidel iteration, i.e., we update one grid point at time until
     they stop changing.*/
  do{
    prevdr=dr;
    prevdd=dd;
    dd=dr=0;
    for(pli=0;pli<3;pli++){
      for(qti=0;qti<2;qti++){
        for(qi=0;qi<OC_LOGQ_BINS;qi++){
          for(si=0;si<OC_COMP_BINS;si++){
            oc_mode_metrics m[4];
            int             s0[4];
            int             s1[4];
            int             q0[4];
            int             q1[4];
            double          ra[4];
            double          rb[4];
            double          rc[4];
            double          da[4];
            double          db[4];
            double          dc[4];
            double          r;
            double          d;
            int             rate;
            int             rmse;
            int             ds;
            int             n;
            n=0;
            /*Collect the statistics for the (up to) four bins grid point
               (si,qi) touches.*/
            if(qi>0&&si>0){
              q0[n]=OC_MODE_LOGQ[qi-1][pli][qti];
              q1[n]=OC_MODE_LOGQ[qi][pli][qti];
              s0[n]=si-1<<_shift;
              s1[n]=si<<_shift;
              ra[n]=ldexp(_table[qi-1][pli][qti][si-1].rate,-OC_BIT_SCALE);
              da[n]=ldexp(_table[qi-1][pli][qti][si-1].rmse,-OC_RMSE_SCALE);
              rb[n]=ldexp(_table[qi-1][pli][qti][si].rate,-OC_BIT_SCALE);
              db[n]=ldexp(_table[qi-1][pli][qti][si].rmse,-OC_RMSE_SCALE);
              rc[n]=ldexp(_table[qi][pli][qti][si-1].rate,-OC_BIT_SCALE);
              dc[n]=ldexp(_table[qi][pli][qti][si-1].rmse,-OC_RMSE_SCALE);
              *(m+n++)=*(_metrics[qi-1][pli][qti]+si-1);
            }
            if(qi>0){
              ds=si+1<OC_COMP_BINS?1:-1;
              q0[n]=OC_MODE_LOGQ[qi-1][pli][qti];
              q1[n]=OC_MODE_LOGQ[qi][pli][qti];
              s0[n]=si+ds<<_shift;
              s1[n]=si<<_shift;
              ra[n]=ldexp(_table[qi-1][pli][qti][si+ds].rate,-OC_BIT_SCALE);
              da[n]=
               ldexp(_table[qi-1][pli][qti][si+ds].rmse,-OC_RMSE_SCALE);
              rb[n]=ldexp(_table[qi-1][pli][qti][si].rate,-OC_BIT_SCALE);
              db[n]=ldexp(_table[qi-1][pli][qti][si].rmse,-OC_RMSE_SCALE);
              rc[n]=ldexp(_table[qi][pli][qti][si+ds].rate,-OC_BIT_SCALE);
              dc[n]=ldexp(_table[qi][pli][qti][si+ds].rmse,-OC_RMSE_SCALE);
              *(m+n++)=*(_metrics[qi-1][pli][qti]+si);
            }
            if(qi+1<OC_LOGQ_BINS&&si>0){
              q0[n]=OC_MODE_LOGQ[qi+1][pli][qti];
              q1[n]=OC_MODE_LOGQ[qi][pli][qti];
              s0[n]=si-1<<_shift;
              s1[n]=si<<_shift;
              ra[n]=ldexp(_table[qi+1][pli][qti][si-1].rate,-OC_BIT_SCALE);
              da[n]=ldexp(_table[qi+1][pli][qti][si-1].rmse,-OC_RMSE_SCALE);
              rb[n]=ldexp(_table[qi+1][pli][qti][si].rate,-OC_BIT_SCALE);
              db[n]=ldexp(_table[qi+1][pli][qti][si].rmse,-OC_RMSE_SCALE);
              rc[n]=ldexp(_table[qi][pli][qti][si-1].rate,-OC_BIT_SCALE);
              dc[n]=ldexp(_table[qi][pli][qti][si-1].rmse,-OC_RMSE_SCALE);
              *(m+n++)=*(_metrics[qi][pli][qti]+si-1);
            }
            if(qi+1<OC_LOGQ_BINS){
              ds=si+1<OC_COMP_BINS?1:-1;
              q0[n]=OC_MODE_LOGQ[qi+1][pli][qti];
              q1[n]=OC_MODE_LOGQ[qi][pli][qti];
              s0[n]=si+ds<<_shift;
              s1[n]=si<<_shift;
              ra[n]=ldexp(_table[qi+1][pli][qti][si+ds].rate,-OC_BIT_SCALE);
              da[n]=
               ldexp(_table[qi+1][pli][qti][si+ds].rmse,-OC_RMSE_SCALE);
              rb[n]=ldexp(_table[qi+1][pli][qti][si].rate,-OC_BIT_SCALE);
              db[n]=ldexp(_table[qi+1][pli][qti][si].rmse,-OC_RMSE_SCALE);
              rc[n]=ldexp(_table[qi][pli][qti][si+ds].rate,-OC_BIT_SCALE);
              dc[n]=ldexp(_table[qi][pli][qti][si+ds].rmse,-OC_RMSE_SCALE);
              *(m+n++)=*(_metrics[qi][pli][qti]+si);
            }
            /*On the first pass, initialize with a simple weighted average of
               the neighboring bins.*/
            if(!OC_HAS_MODE_METRICS&&niters==0){
              double w;
              w=r=d=0;
              while(n-->0){
                w+=m[n].w;
                r+=m[n].r;
                d+=m[n].d;
              }
              r=w>1E-3?r/w:0;
              d=w>1E-3?d/w:0;
              _weight[qi][pli][qti][si]=w;
            }
            else{
              /*Update the grid point and save the weight for later.*/
              _weight[qi][pli][qti][si]=
               oc_mode_metrics_solve(&r,&d,m,s0,s1,q0,q1,ra,rb,rc,da,db,dc,n);
            }
            rate=OC_CLAMPI(-32768,(int)(ldexp(r,OC_BIT_SCALE)+0.5),32767);
            rmse=OC_CLAMPI(-32768,(int)(ldexp(d,OC_RMSE_SCALE)+0.5),32767);
            dr+=abs(rate-_table[qi][pli][qti][si].rate);
            dd+=abs(rmse-_table[qi][pli][qti][si].rmse);
            _table[qi][pli][qti][si].rate=(ogg_int16_t)rate;
            _table[qi][pli][qti][si].rmse=(ogg_int16_t)rmse;
          }
        }
      }
    }
  }
  /*After a fixed number of initial iterations, only iterate so long as the
     total change is decreasing.
    This ensures we don't oscillate forever, which is a danger, as all of our
     results are rounded fairly coarsely.*/
  while((dr>0||dd>0)&&(niters++<_niters_min||(dr<prevdr&&dd<prevdd)));
  if(_reweight){
    /*Now, reduce the values of the optimal solution until we get enough
       samples in each bin to overcome the constant OC_ZWEIGHT factor.
      This encourages sampling under-populated bins and prevents a single large
       sample early on from discouraging coding in that bin ever again.*/
    for(pli=0;pli<3;pli++){
      for(qti=0;qti<2;qti++){
        for(qi=0;qi<OC_LOGQ_BINS;qi++){
          for(si=0;si<OC_COMP_BINS;si++){
            double wt;
            wt=_weight[qi][pli][qti][si];
            wt/=OC_ZWEIGHT+wt;
            _table[qi][pli][qti][si].rate=(ogg_int16_t)
             (_table[qi][pli][qti][si].rate*wt+0.5);
            _table[qi][pli][qti][si].rmse=(ogg_int16_t)
             (_table[qi][pli][qti][si].rmse*wt+0.5);
          }
        }
      }
    }
  }
}