static void oc_mcenc_find_candidates_a(oc_enc_ctx *_enc,oc_mcenc_ctx *_mcenc, oc_mv _accum,int _mbi,int _frame){ oc_mb_enc_info *embs; int accum_x; int accum_y; int a[3][2]; int ncandidates; unsigned nmbi; int i; embs=_enc->mb_info; /*Skip a position to store the median predictor in.*/ ncandidates=1; if(embs[_mbi].ncneighbors>0){ /*Fill in the first part of set A: the vectors from adjacent blocks.*/ for(i=0;i<embs[_mbi].ncneighbors;i++){ nmbi=embs[_mbi].cneighbors[i]; _mcenc->candidates[ncandidates][0]= OC_MV_X(embs[nmbi].analysis_mv[0][_frame]); _mcenc->candidates[ncandidates][1]= OC_MV_Y(embs[nmbi].analysis_mv[0][_frame]); ncandidates++; } } accum_x=OC_MV_X(_accum); accum_y=OC_MV_Y(_accum); /*Add a few additional vectors to set A: the vectors used in the previous frames and the (0,0) vector.*/ _mcenc->candidates[ncandidates][0]=accum_x; _mcenc->candidates[ncandidates][1]=accum_y; ncandidates++; _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31, OC_MV_X(embs[_mbi].analysis_mv[1][_frame])+accum_x,31); _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31, OC_MV_Y(embs[_mbi].analysis_mv[1][_frame])+accum_y,31); ncandidates++; _mcenc->candidates[ncandidates][0]=0; _mcenc->candidates[ncandidates][1]=0; ncandidates++; /*Use the first three vectors of set A to find our best predictor: their median.*/ memcpy(a,_mcenc->candidates+1,sizeof(a)); OC_SORT2I(a[0][0],a[1][0]); OC_SORT2I(a[0][1],a[1][1]); OC_SORT2I(a[1][0],a[2][0]); OC_SORT2I(a[1][1],a[2][1]); OC_SORT2I(a[0][0],a[1][0]); OC_SORT2I(a[0][1],a[1][1]); _mcenc->candidates[0][0]=a[1][0]; _mcenc->candidates[0][1]=a[1][1]; _mcenc->setb0=ncandidates; }
/*Convenience function converts Q57 value to a clamped 32-bit Q24 value _in: input in Q57 format. Return: same number in Q24 */ static ogg_int32_t oc_q57_to_q24(ogg_int64_t _in){ ogg_int64_t ret; ret=_in+((ogg_int64_t)1<<32)>>33; /*0x80000000 is automatically converted to unsigned on 32-bit systems. -0x7FFFFFFF-1 is needed to avoid "promoting" the whole expression to unsigned.*/ return (ogg_int32_t)OC_CLAMPI(-0x7FFFFFFF-1,ret,0x7FFFFFFF); }
static void oc_mcenc_find_candidates_b(oc_enc_ctx *_enc,oc_mcenc_ctx *_mcenc, oc_mv _accum,int _mbi,int _frame){ oc_mb_enc_info *embs; int accum_x; int accum_y; int ncandidates; embs=_enc->mb_info; accum_x=OC_MV_X(_accum); accum_y=OC_MV_Y(_accum); /*Fill in set B: accelerated predictors for this and adjacent macro blocks.*/ ncandidates=_mcenc->setb0; /*Use only the current block. Using more did not appear to be helpful with the current selection logic due to escaping the local search too quickly.*/ _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31, 2*OC_MV_X(embs[_mbi].analysis_mv[1][_frame]) -OC_MV_X(embs[_mbi].analysis_mv[2][_frame])+accum_x,31); _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31, 2*OC_MV_Y(embs[_mbi].analysis_mv[1][_frame]) -OC_MV_Y(embs[_mbi].analysis_mv[2][_frame])+accum_y,31); ncandidates++; _mcenc->ncandidates=ncandidates; }
/*Initializes the dequantization tables from a set of quantizer info. Currently the dequantizer (and elsewhere enquantizer) tables are expected to be initialized as pointing to the storage reserved for them in the oc_theora_state (resp. oc_enc_ctx) structure. If some tables are duplicates of others, the pointers will be adjusted to point to a single copy of the tables, but the storage for them will not be freed. If you're concerned about the memory footprint, the obvious thing to do is to move the storage out of its fixed place in the structures and allocate it on demand. However, a much, much better option is to only store the quantization matrices being used for the current frame, and to recalculate these as the qi values change between frames (this is what VP3 did).*/ void oc_dequant_tables_init(ogg_uint16_t *_dequant[64][3][2], int _pp_dc_scale[64],const th_quant_info *_qinfo){ /*Coding mode: intra or inter.*/ int qti; /*Y', C_b, C_r*/ int pli; for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){ /*Quality index.*/ int qi; /*Range iterator.*/ int qri; for(qi=0,qri=0;qri<=_qinfo->qi_ranges[qti][pli].nranges;qri++){ th_quant_base base; ogg_uint32_t q; int qi_start; int qi_end; memcpy(base,_qinfo->qi_ranges[qti][pli].base_matrices[qri], sizeof(base)); qi_start=qi; if(qri==_qinfo->qi_ranges[qti][pli].nranges)qi_end=qi+1; else qi_end=qi+_qinfo->qi_ranges[qti][pli].sizes[qri]; /*Iterate over quality indicies in this range.*/ for(;;){ ogg_uint32_t qfac; int zzi; int ci; /*In the original VP3.2 code, the rounding offset and the size of the dead zone around 0 were controlled by a "sharpness" parameter. The size of our dead zone is now controlled by the per-coefficient quality thresholds returned by our HVS module. We round down from a more accurate value when the quality of the reconstruction does not fall below our threshold and it saves bits. Hence, all of that VP3.2 code is gone from here, and the remaining floating point code has been implemented as equivalent integer code with exact precision.*/ qfac=(ogg_uint32_t)_qinfo->dc_scale[qi]*base[0]; /*For postprocessing, not dequantization.*/ if(_pp_dc_scale!=NULL)_pp_dc_scale[qi]=(int)(qfac/160); /*Scale DC the coefficient from the proper table.*/ q=(qfac/100)<<2; q=OC_CLAMPI(OC_DC_QUANT_MIN[qti],q,OC_QUANT_MAX); _dequant[qi][pli][qti][0]=(ogg_uint16_t)q; /*Now scale AC coefficients from the proper table.*/ for(zzi=1;zzi<64;zzi++){ q=((ogg_uint32_t)_qinfo->ac_scale[qi]*base[OC_FZIG_ZAG[zzi]]/100)<<2; q=OC_CLAMPI(OC_AC_QUANT_MIN[qti],q,OC_QUANT_MAX); _dequant[qi][pli][qti][zzi]=(ogg_uint16_t)q; } /*If this is a duplicate of a previous matrix, use that instead. This simple check helps us improve cache coherency later.*/ { int dupe; int qtj; int plj; dupe=0; for(qtj=0;qtj<=qti;qtj++){ for(plj=0;plj<(qtj<qti?3:pli);plj++){ if(!memcmp(_dequant[qi][pli][qti],_dequant[qi][plj][qtj], sizeof(oc_quant_table))){ dupe=1; break; } } if(dupe)break; } if(dupe)_dequant[qi][pli][qti]=_dequant[qi][plj][qtj]; } if(++qi>=qi_end)break; /*Interpolate the next base matrix.*/ for(ci=0;ci<64;ci++){ base[ci]=(unsigned char)( (2*((qi_end-qi)*_qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+ (qi-qi_start)*_qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci]) +_qinfo->qi_ranges[qti][pli].sizes[qri])/ (2*_qinfo->qi_ranges[qti][pli].sizes[qri])); } } } } }
/*Initializes the dequantization tables from a set of quantizer info. Currently the dequantizer (and elsewhere enquantizer) tables are expected to be initialized as pointing to the storage reserved for them in the oc_theora_state (resp. oc_enc_ctx) structure. If some tables are duplicates of others, the pointers will be adjusted to point to a single copy of the tables, but the storage for them will not be freed. If you're concerned about the memory footprint, the obvious thing to do is to move the storage out of its fixed place in the structures and allocate it on demand. However, a much, much better option is to only store the quantization matrices being used for the current frame, and to recalculate these as the qi values change between frames (this is what VP3 did).*/ void oc_dequant_tables_init(oc_quant_table *_dequant[2][3], int _pp_dc_scale[64],const th_quant_info *_qinfo){ int qti; /* coding mode: intra or inter */ int pli; /* Y U V */ for(qti=0;qti<2;qti++){ for(pli=0;pli<3;pli++){ oc_quant_tables stage; int qi; /* quality index */ int qri; /* range iterator */ for(qi=0,qri=0; qri<=_qinfo->qi_ranges[qti][pli].nranges; qri++){ th_quant_base base; ogg_uint32_t q; int qi_start; int qi_end; int ci; memcpy(base,_qinfo->qi_ranges[qti][pli].base_matrices[qri], sizeof(base)); qi_start=qi; if(qri==_qinfo->qi_ranges[qti][pli].nranges) qi_end=qi+1; else qi_end=qi+_qinfo->qi_ranges[qti][pli].sizes[qri]; /* Iterate over quality indicies in this range */ for(;;){ /*In the original VP3.2 code, the rounding offset and the size of the dead zone around 0 were controlled by a "sharpness" parameter. The size of our dead zone is now controlled by the per-coefficient quality thresholds returned by our HVS module. We round down from a more accurate value when the quality of the reconstruction does not fall below our threshold and it saves bits. Hence, all of that VP3.2 code is gone from here, and the remaining floating point code has been implemented as equivalent integer code with exact precision.*/ /* for postprocess, not dequant */ if(_pp_dc_scale!=NULL) _pp_dc_scale[qi]=(int)((ogg_uint32_t)_qinfo->dc_scale[qi]*base[0]/160); /*Scale DC the coefficient from the proper table.*/ q=((ogg_uint32_t)_qinfo->dc_scale[qi]*base[0]/100)<<2; q=OC_CLAMPI(OC_DC_QUANT_MIN[qti],q,OC_QUANT_MAX); stage[qi][0]=(ogg_uint16_t)q; /*Now scale AC coefficients from the proper table.*/ for(ci=1;ci<64;ci++){ q=((ogg_uint32_t)_qinfo->ac_scale[qi]*base[ci]/100)<<2; q=OC_CLAMPI(OC_AC_QUANT_MIN[qti],q,OC_QUANT_MAX); stage[qi][ci]=(ogg_uint16_t)q; } if(++qi>=qi_end)break; /*Interpolate the next base matrix.*/ for(ci=0;ci<64;ci++){ base[ci]=(unsigned char) ((2*((qi_end-qi)*_qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+ (qi-qi_start)*_qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci]) +_qinfo->qi_ranges[qti][pli].sizes[qri])/ (2*_qinfo->qi_ranges[qti][pli].sizes[qri])); } } } /* Staging matricies complete; commit to memory only if this isn't a duplicate of a preceeding plane. This simple check helps us improve cache coherency later.*/ { int dupe = 0; int i,j; for(i=0;i<=qti;i++){ for(j=0;j<(i<qti?3:pli);j++){ if(!memcmp(stage,_dequant[i][j],sizeof(stage))){ dupe = 1; break; } } if(dupe)break; } if(dupe){ _dequant[qti][pli]=_dequant[i][j]; }else{ memcpy(_dequant[qti][pli],stage,sizeof(stage)); } } } } #ifdef _TH_DEBUG_ int i, j, k, l; /* dump the calculated quantizer tables */ for(i=0;i<2;i++){ for(j=0;j<3;j++){ for(k=0;k<64;k++){ TH_DEBUG("quantizer table [%s][%s][Q%d] = {", (i==0?"intra":"inter"),(j==0?"Y":(j==1?"U":"V")),k); for(l=0;l<64;l++){ if((l&7)==0) TH_DEBUG("\n "); TH_DEBUG("%4d ",_dequant[i][j][k][l]); } TH_DEBUG("}\n"); } } } #endif }
static void oc_mcenc_find_candidates(oc_enc_ctx *_enc,oc_mcenc_ctx *_mcenc, oc_mv _accum,int _mbi,int _frame){ oc_mb_enc_info *embs; int accum_x; int accum_y; int a[3][2]; int ncandidates; unsigned nmbi; int i; embs=_enc->mb_info; /*Skip a position to store the median predictor in.*/ ncandidates=1; if(embs[_mbi].ncneighbors>0){ /*Fill in the first part of set A: the vectors from adjacent blocks.*/ for(i=0;i<embs[_mbi].ncneighbors;i++){ nmbi=embs[_mbi].cneighbors[i]; _mcenc->candidates[ncandidates][0]= OC_MV_X(embs[nmbi].analysis_mv[0][_frame]); _mcenc->candidates[ncandidates][1]= OC_MV_Y(embs[nmbi].analysis_mv[0][_frame]); ncandidates++; } } accum_x=OC_MV_X(_accum); accum_y=OC_MV_Y(_accum); /*Add a few additional vectors to set A: the vectors used in the previous frames and the (0,0) vector.*/ _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,accum_x,31); _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,accum_y,31); ncandidates++; _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31, OC_MV_X(embs[_mbi].analysis_mv[1][_frame])+accum_x,31); _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31, OC_MV_Y(embs[_mbi].analysis_mv[1][_frame])+accum_y,31); ncandidates++; _mcenc->candidates[ncandidates][0]=0; _mcenc->candidates[ncandidates][1]=0; ncandidates++; /*Use the first three vectors of set A to find our best predictor: their median.*/ memcpy(a,_mcenc->candidates+1,sizeof(a)); OC_SORT2I(a[0][0],a[1][0]); OC_SORT2I(a[0][1],a[1][1]); OC_SORT2I(a[1][0],a[2][0]); OC_SORT2I(a[1][1],a[2][1]); OC_SORT2I(a[0][0],a[1][0]); OC_SORT2I(a[0][1],a[1][1]); _mcenc->candidates[0][0]=a[1][0]; _mcenc->candidates[0][1]=a[1][1]; /*Fill in set B: accelerated predictors for this and adjacent macro blocks.*/ _mcenc->setb0=ncandidates; /*The first time through the loop use the current macro block.*/ nmbi=_mbi; for(i=0;;i++){ _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31, 2*OC_MV_X(embs[_mbi].analysis_mv[1][_frame]) -OC_MV_X(embs[_mbi].analysis_mv[2][_frame])+accum_x,31); _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31, 2*OC_MV_Y(embs[_mbi].analysis_mv[1][_frame]) -OC_MV_Y(embs[_mbi].analysis_mv[2][_frame])+accum_y,31); ncandidates++; if(i>=embs[_mbi].npneighbors)break; nmbi=embs[_mbi].pneighbors[i]; } /*Truncate to full-pel positions.*/ for(i=0;i<ncandidates;i++){ _mcenc->candidates[i][0]=OC_DIV2(_mcenc->candidates[i][0]); _mcenc->candidates[i][1]=OC_DIV2(_mcenc->candidates[i][1]); } _mcenc->ncandidates=ncandidates; }
/*Initializes the dequantization tables from a set of quantizer info. Currently the dequantizer (and elsewhere enquantizer) tables are expected to be initialized as pointing to the storage reserved for them in the oc_theora_state (resp. oc_enc_ctx) structure. If some tables are duplicates of others, the pointers will be adjusted to point to a single copy of the tables, but the storage for them will not be freed. If you're concerned about the memory footprint, the obvious thing to do is to move the storage out of its fixed place in the structures and allocate it on demand. However, a much, much better option is to only store the quantization matrices being used for the current frame, and to recalculate these as the qi values change between frames (this is what VP3 did).*/ void oc_dequant_tables_init(oc_quant_table *_dequant[2][3], int _pp_dc_scale[64],const th_quant_info *_qinfo){ int qti; int pli; for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){ int qi; int qri; /*These simple checks help us improve cache coherency later.*/ if(pli>0&&memcmp(_qinfo->qi_ranges[qti]+pli-1, _qinfo->qi_ranges[qti]+pli,sizeof(_qinfo->qi_ranges[qti][pli]))==0){ _dequant[qti][pli]=_dequant[qti][pli-1]; continue; } if(qti>0&&memcmp(_qinfo->qi_ranges[qti-1]+pli, _qinfo->qi_ranges[qti]+pli,sizeof(_qinfo->qi_ranges[qti][pli]))==0){ _dequant[qti][pli]=_dequant[qti-1][pli]; continue; } for(qi=qri=0;qri<=_qinfo->qi_ranges[qti][pli].nranges;qri++){ th_quant_base base; ogg_uint32_t q; int qi_start; int qi_end; int ci; memcpy(base,_qinfo->qi_ranges[qti][pli].base_matrices[qri], sizeof(base)); qi_start=qi; if(qri==_qinfo->qi_ranges[qti][pli].nranges)qi_end=qi+1; else qi_end=qi+_qinfo->qi_ranges[qti][pli].sizes[qri]; for(;;){ ogg_uint32_t qfac; /*In the original VP3.2 code, the rounding offset and the size of the dead zone around 0 were controlled by a "sharpness" parameter. The size of our dead zone is now controlled by the per-coefficient quality thresholds returned by our HVS module. We round down from a more accurate value when the quality of the reconstruction does not fall below our threshold and it saves bits. Hence, all of that VP3.2 code is gone from here, and the remaining floating point code has been implemented as equivalent integer code with exact precision.*/ /*Scale DC the coefficient from the proper table.*/ qfac=(ogg_uint32_t)_qinfo->dc_scale[qi]*base[0]; if(_pp_dc_scale!=NULL)_pp_dc_scale[qi]=(int)(qfac/160); q=(qfac/100)<<2; q=OC_CLAMPI(OC_DC_QUANT_MIN[qti],q,OC_QUANT_MAX); _dequant[qti][pli][qi][0]=(ogg_uint16_t)q; /*Now scale AC coefficients from the proper table.*/ for(ci=1;ci<64;ci++){ q=((ogg_uint32_t)_qinfo->ac_scale[qi]*base[ci]/100)<<2; q=OC_CLAMPI(OC_AC_QUANT_MIN[qti],q,OC_QUANT_MAX); _dequant[qti][pli][qi][ci]=(ogg_uint16_t)q; } if(++qi>=qi_end)break; /*Interpolate the next base matrix.*/ for(ci=0;ci<64;ci++){ base[ci]=(unsigned char)( (2*((qi_end-qi)*_qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+ (qi-qi_start)*_qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci]) +_qinfo->qi_ranges[qti][pli].sizes[qri])/ (2*_qinfo->qi_ranges[qti][pli].sizes[qri])); } } } } }
/*Compile collected SATD/logq/rate/RMSE metrics into a form that's immediately useful for mode decision.*/ void oc_mode_metrics_update(oc_mode_metrics (*_metrics)[3][2][OC_COMP_BINS], int _niters_min,int _reweight,oc_mode_rd (*_table)[3][2][OC_COMP_BINS], int _shift,double (*_weight)[3][2][OC_COMP_BINS]){ int niters; int prevdr; int prevdd; int dr; int dd; int pli; int qti; int qi; int si; dd=dr=INT_MAX; niters=0; /*The encoder interpolates rate and RMSE terms bilinearly from an OC_LOGQ_BINS by OC_COMP_BINS grid of sample points in _table. To find the sample values at the grid points that minimize the total squared prediction error actually requires solving a relatively sparse linear system with a number of variables equal to the number of grid points. Instead of writing a general sparse linear system solver, we just use Gauss-Seidel iteration, i.e., we update one grid point at time until they stop changing.*/ do{ prevdr=dr; prevdd=dd; dd=dr=0; for(pli=0;pli<3;pli++){ for(qti=0;qti<2;qti++){ for(qi=0;qi<OC_LOGQ_BINS;qi++){ for(si=0;si<OC_COMP_BINS;si++){ oc_mode_metrics m[4]; int s0[4]; int s1[4]; int q0[4]; int q1[4]; double ra[4]; double rb[4]; double rc[4]; double da[4]; double db[4]; double dc[4]; double r; double d; int rate; int rmse; int ds; int n; n=0; /*Collect the statistics for the (up to) four bins grid point (si,qi) touches.*/ if(qi>0&&si>0){ q0[n]=OC_MODE_LOGQ[qi-1][pli][qti]; q1[n]=OC_MODE_LOGQ[qi][pli][qti]; s0[n]=si-1<<_shift; s1[n]=si<<_shift; ra[n]=ldexp(_table[qi-1][pli][qti][si-1].rate,-OC_BIT_SCALE); da[n]=ldexp(_table[qi-1][pli][qti][si-1].rmse,-OC_RMSE_SCALE); rb[n]=ldexp(_table[qi-1][pli][qti][si].rate,-OC_BIT_SCALE); db[n]=ldexp(_table[qi-1][pli][qti][si].rmse,-OC_RMSE_SCALE); rc[n]=ldexp(_table[qi][pli][qti][si-1].rate,-OC_BIT_SCALE); dc[n]=ldexp(_table[qi][pli][qti][si-1].rmse,-OC_RMSE_SCALE); *(m+n++)=*(_metrics[qi-1][pli][qti]+si-1); } if(qi>0){ ds=si+1<OC_COMP_BINS?1:-1; q0[n]=OC_MODE_LOGQ[qi-1][pli][qti]; q1[n]=OC_MODE_LOGQ[qi][pli][qti]; s0[n]=si+ds<<_shift; s1[n]=si<<_shift; ra[n]=ldexp(_table[qi-1][pli][qti][si+ds].rate,-OC_BIT_SCALE); da[n]= ldexp(_table[qi-1][pli][qti][si+ds].rmse,-OC_RMSE_SCALE); rb[n]=ldexp(_table[qi-1][pli][qti][si].rate,-OC_BIT_SCALE); db[n]=ldexp(_table[qi-1][pli][qti][si].rmse,-OC_RMSE_SCALE); rc[n]=ldexp(_table[qi][pli][qti][si+ds].rate,-OC_BIT_SCALE); dc[n]=ldexp(_table[qi][pli][qti][si+ds].rmse,-OC_RMSE_SCALE); *(m+n++)=*(_metrics[qi-1][pli][qti]+si); } if(qi+1<OC_LOGQ_BINS&&si>0){ q0[n]=OC_MODE_LOGQ[qi+1][pli][qti]; q1[n]=OC_MODE_LOGQ[qi][pli][qti]; s0[n]=si-1<<_shift; s1[n]=si<<_shift; ra[n]=ldexp(_table[qi+1][pli][qti][si-1].rate,-OC_BIT_SCALE); da[n]=ldexp(_table[qi+1][pli][qti][si-1].rmse,-OC_RMSE_SCALE); rb[n]=ldexp(_table[qi+1][pli][qti][si].rate,-OC_BIT_SCALE); db[n]=ldexp(_table[qi+1][pli][qti][si].rmse,-OC_RMSE_SCALE); rc[n]=ldexp(_table[qi][pli][qti][si-1].rate,-OC_BIT_SCALE); dc[n]=ldexp(_table[qi][pli][qti][si-1].rmse,-OC_RMSE_SCALE); *(m+n++)=*(_metrics[qi][pli][qti]+si-1); } if(qi+1<OC_LOGQ_BINS){ ds=si+1<OC_COMP_BINS?1:-1; q0[n]=OC_MODE_LOGQ[qi+1][pli][qti]; q1[n]=OC_MODE_LOGQ[qi][pli][qti]; s0[n]=si+ds<<_shift; s1[n]=si<<_shift; ra[n]=ldexp(_table[qi+1][pli][qti][si+ds].rate,-OC_BIT_SCALE); da[n]= ldexp(_table[qi+1][pli][qti][si+ds].rmse,-OC_RMSE_SCALE); rb[n]=ldexp(_table[qi+1][pli][qti][si].rate,-OC_BIT_SCALE); db[n]=ldexp(_table[qi+1][pli][qti][si].rmse,-OC_RMSE_SCALE); rc[n]=ldexp(_table[qi][pli][qti][si+ds].rate,-OC_BIT_SCALE); dc[n]=ldexp(_table[qi][pli][qti][si+ds].rmse,-OC_RMSE_SCALE); *(m+n++)=*(_metrics[qi][pli][qti]+si); } /*On the first pass, initialize with a simple weighted average of the neighboring bins.*/ if(!OC_HAS_MODE_METRICS&&niters==0){ double w; w=r=d=0; while(n-->0){ w+=m[n].w; r+=m[n].r; d+=m[n].d; } r=w>1E-3?r/w:0; d=w>1E-3?d/w:0; _weight[qi][pli][qti][si]=w; } else{ /*Update the grid point and save the weight for later.*/ _weight[qi][pli][qti][si]= oc_mode_metrics_solve(&r,&d,m,s0,s1,q0,q1,ra,rb,rc,da,db,dc,n); } rate=OC_CLAMPI(-32768,(int)(ldexp(r,OC_BIT_SCALE)+0.5),32767); rmse=OC_CLAMPI(-32768,(int)(ldexp(d,OC_RMSE_SCALE)+0.5),32767); dr+=abs(rate-_table[qi][pli][qti][si].rate); dd+=abs(rmse-_table[qi][pli][qti][si].rmse); _table[qi][pli][qti][si].rate=(ogg_int16_t)rate; _table[qi][pli][qti][si].rmse=(ogg_int16_t)rmse; } } } } } /*After a fixed number of initial iterations, only iterate so long as the total change is decreasing. This ensures we don't oscillate forever, which is a danger, as all of our results are rounded fairly coarsely.*/ while((dr>0||dd>0)&&(niters++<_niters_min||(dr<prevdr&&dd<prevdd))); if(_reweight){ /*Now, reduce the values of the optimal solution until we get enough samples in each bin to overcome the constant OC_ZWEIGHT factor. This encourages sampling under-populated bins and prevents a single large sample early on from discouraging coding in that bin ever again.*/ for(pli=0;pli<3;pli++){ for(qti=0;qti<2;qti++){ for(qi=0;qi<OC_LOGQ_BINS;qi++){ for(si=0;si<OC_COMP_BINS;si++){ double wt; wt=_weight[qi][pli][qti][si]; wt/=OC_ZWEIGHT+wt; _table[qi][pli][qti][si].rate=(ogg_int16_t) (_table[qi][pli][qti][si].rate*wt+0.5); _table[qi][pli][qti][si].rmse=(ogg_int16_t) (_table[qi][pli][qti][si].rmse*wt+0.5); } } } } } }