Exemple #1
0
/** Updates the probability model based on the encoded/decoded value
 *
 * @param [in,out] model generic prob model
 * @param [in,out] ExQ16 expectation of x
 * @param [in]     x     variable encoded/decoded (used for ExQ16)
 * @param [in]     xs    variable x after shift (used for the model)
 * @param [in]     id    id of the icdf to adapt
 * @param [in]     integration integration period of ExQ16 (leaky average over 1<<integration samples)
 *
 */
void generic_model_update(GenericEncoder *model,int *ExQ16,int x,int xs,int id,int integration)
{
  int i;
  int xenc;
  ogg_uint16_t *cdf;

  cdf=model->cdf[id];
  /* Renormalize if we cannot add increment */
  if (cdf[15]+model->increment>32767){
    for (i=0;i<16;i++){
      /* Second term ensures that the pdf is non-null */
      cdf[i]=(cdf[i]>>1)+i+1;
    }
  }

  /* Update freq count */
  xenc=OD_MINI(15,xs);
  /* This can be easily vectorized */
  for (i=xenc;i<16;i++)
    cdf[i]+=model->increment;

  /* We could have saturated ExQ16 directly, but this is safe and simpler */
  x = OD_MINI(x, 32767);
  *ExQ16+=(x<<(16-integration))-(*ExQ16>>integration);
}
Exemple #2
0
/** Adapts a Q15 cdf after encoding/decoding a symbol. */
void od_cdf_adapt_q15(int val, uint16_t *cdf, int n, int *count, int rate) {
  int i;
  *count = OD_MINI(*count + 1, 1 << rate);
  OD_ASSERT(cdf[n - 1] == 32768);
  if (*count >= 1 << rate) {
    /* Steady-state adaptation based on a simple IIR with dyadic rate. */
    for (i = 0; i < n; i++) {
      int tmp;
      /* When (i < val), we want the adjustment ((cdf[i] - tmp) >> rate) to be
         positive so long as (cdf[i] > i + 1), and 0 when (cdf[i] == i + 1),
         to ensure we don't drive any probabilities to 0. Replacing cdf[i] with
         (i + 2) and solving ((i + 2 - tmp) >> rate == 1) for tmp produces
         tmp == i + 2 - (1 << rate). Using this value of tmp with
         cdf[i] == i + 1 instead gives an adjustment of 0 as desired.

         When (i >= val), we want ((cdf[i] - tmp) >> rate) to be negative so
         long as cdf[i] < 32768 - (n - 1 - i), and 0 when
         cdf[i] == 32768 - (n - 1 - i), again to ensure we don't drive any
         probabilities to 0. Since right-shifting any negative value is still
         negative, we can solve (32768 - (n - 1 - i) - tmp == 0) for tmp,
         producing tmp = 32769 - n + i. Using this value of tmp with smaller
         values of cdf[i] instead gives negative adjustments, as desired.

         Combining the two cases gives the expression below. These could be
         stored in a lookup table indexed by n and rate to avoid the
         arithmetic. */
      tmp = 2 - (1<<rate) + i + (32767 + (1<<rate) - n)*(i >= val);
      cdf[i] -= (cdf[i] - tmp) >> rate;
    }
  }
  else {
Exemple #3
0
/** Encodes a single vector of integers (eg, a partition within a
 *  coefficient block) using PVQ
 *
 * @param [in,out] ec         range encoder
 * @param [in]     qg         quantized gain
 * @param [in]     theta      quantized post-prediction theta
 * @param [in]     max_theta  maximum possible quantized theta value
 * @param [in]     in         coefficient vector to code
 * @param [in]     n          number of coefficients in partition
 * @param [in]     k          number of pulses in partition
 * @param [in,out] model      entropy encoder state
 * @param [in,out] adapt      adaptation context
 * @param [in,out] exg        ExQ16 expectation of gain value
 * @param [in,out] ext        ExQ16 expectation of theta value
 * @param [in]     nodesync   do not use info that depend on the reference
 * @param [in]     cdf_ctx    selects which cdf context to use
 * @param [in]     is_keyframe whether we're encoding a keyframe
 * @param [in]     code_skip  whether the "skip rest" flag is allowed
 * @param [in]     skip_rest  when set, we skip all higher bands
 * @param [in]     encode_flip whether we need to encode the CfL flip flag now
 * @param [in]     flip       value of the CfL flip flag
 */
static void pvq_encode_partition(od_ec_enc *ec,
                                 int qg,
                                 int theta,
                                 int max_theta,
                                 const od_coeff *in,
                                 int n,
                                 int k,
                                 generic_encoder model[3],
                                 od_adapt_ctx *adapt,
                                 int *exg,
                                 int *ext,
                                 int nodesync,
                                 int cdf_ctx,
                                 int is_keyframe,
                                 int code_skip,
                                 int skip_rest,
                                 int encode_flip,
                                 int flip) {
  int noref;
  int id;
  noref = (theta == -1);
  id = (qg > 0) + 2*OD_MINI(theta + 1,3) + 8*code_skip*skip_rest;
  if (is_keyframe) {
    OD_ASSERT(id != 8);
    if (id >= 8) id--;
  }
  else {
    OD_ASSERT(id != 10);
    if (id >= 10) id--;
  }
  /* Jointly code gain, theta and noref for small values. Then we handle
     larger gain and theta values. For noref, theta = -1. */
  od_encode_cdf_adapt(ec, id, &adapt->pvq.pvq_gaintheta_cdf[cdf_ctx][0],
   8 + 7*code_skip, adapt->pvq.pvq_gaintheta_increment);
  if (encode_flip) {
    /* We could eventually do some smarter entropy coding here, but it would
       have to be good enough to overcome the overhead of the entropy coder.
       An early attempt using a "toogle" flag with simple adaptation wasn't
       worth the trouble. */
    od_ec_enc_bits(ec, flip, 1);
  }
  if (qg > 0) {
    int tmp;
    tmp = *exg;
    generic_encode(ec, &model[!noref], qg - 1, -1, &tmp, 2);
    OD_IIR_DIADIC(*exg, qg << 16, 2);
  }
  if (theta > 1 && (nodesync || max_theta > 3)) {
    int tmp;
    tmp = *ext;
    generic_encode(ec, &model[2], theta - 2, nodesync ? -1 : max_theta - 3,
     &tmp, 2);
    OD_IIR_DIADIC(*ext, theta << 16, 2);
  }
  od_encode_pvq_codeword(ec, &adapt->pvq.pvq_codeword_ctx, in,
   n - (theta != -1), k);
}
Exemple #4
0
/* Initialize the quantization matrix with the magnitude compensation applied.
   We need to compensate for the magnitude because lapping causes some basis
   functions to be smaller, so they would end up being quantized too finely
   (the same error in the quantized domain would result in a smaller pixel
   domain error). */
void od_init_qm(int16_t *x, int16_t *x_inv, const int *qm) {
  int i;
  int j;
  int16_t y[OD_QM_BSIZE];
  int16_t y_inv[OD_QM_BSIZE];
  int16_t *x1;
  int16_t *x1_inv;
  int off;
  int bs;
  int xydec;
  for (bs = 0; bs < OD_NBSIZES; bs++) {
    for (xydec = 0; xydec < 2; xydec++) {
      off = od_qm_offset(bs, xydec);
      x1 = x + off;
      x1_inv = x_inv + off;
      for (i = 0; i < 4 << bs; i++) {
        for (j = 0; j < 4 << bs; j++) {
          double mag;
#if OD_DEBLOCKING || OD_DISABLE_FILTER
          mag = 1.0;
#else
          mag = OD_BASIS_MAG[xydec][bs][i]*OD_BASIS_MAG[xydec][bs][j];
#endif
          if (i == 0 && j == 0) {
            mag = 1.0;
          }
          else {
            mag /= 0.0625*qm[(i << 1 >> bs)*8 + (j << 1 >> bs)];
            OD_ASSERT(mag > 0.0);
          }
          /*Convert to fit in 16 bits.*/
          y[i*(4 << bs) + j] = (int16_t)OD_MINI(OD_QM_SCALE_MAX,
           (int16_t)floor(.5 + mag*OD_QM_SCALE));
          y_inv[i*(4 << bs) + j] = (int16_t)floor(.5
           + OD_QM_SCALE*OD_QM_INV_SCALE/(double)y[i*(4 << bs) + j]);
        }
      }
      od_raster_to_coding_order_16(x1, 4 << bs, y, 4 << bs);
      od_raster_to_coding_order_16(x1_inv, 4 << bs, y_inv, 4 << bs);
    }
  }
}
Exemple #5
0
/*This is a smart copy that copies the intersection of the two img planes
   and performs any needed bitdepth or packing conversion.
  Note that this copy performs clamping as it's only used for copying into
   and out of the codec, not for internal reference->reference copies.
  Does not touch any padding/border/unintersected area.*/
void od_img_plane_copy(daala_image* dst, daala_image* src, int pli) {
  daala_image_plane *dst_plane;
  daala_image_plane *src_plane;
  unsigned char *dst_data;
  unsigned char *src_data;
  int dst_xstride;
  int dst_ystride;
  int src_xstride;
  int src_ystride;
  int dst_xdec;
  int dst_ydec;
  int src_xdec;
  int src_ydec;
  int dst_plane_width;
  int dst_plane_height;
  int src_plane_width;
  int src_plane_height;
  int w;
  int h;
  int x;
  int y;
  if (pli >= dst->nplanes || pli >= src->nplanes) {
    return;
  }
  dst_plane = dst->planes+pli;
  src_plane = src->planes+pli;
  dst_xstride = dst_plane->xstride;
  dst_ystride = dst_plane->ystride;
  src_xstride = src_plane->xstride;
  src_ystride = src_plane->ystride;

  OD_ASSERT(dst_xstride == 1 || dst_xstride == 2);
  OD_ASSERT(src_xstride == 1 || src_xstride == 2);
  dst_xdec = dst_plane->xdec;
  dst_ydec = dst_plane->ydec;
  src_xdec = src_plane->xdec;
  src_ydec = src_plane->ydec;
  dst_data = dst_plane->data;
  src_data = src_plane->data;
  dst_plane_width = OD_PLANE_SZ(dst->width, dst_xdec);
  dst_plane_height = OD_PLANE_SZ(dst->height, dst_ydec);
  src_plane_width = OD_PLANE_SZ(src->width, src_xdec);
  src_plane_height = OD_PLANE_SZ(src->height, src_ydec);
  w = OD_MINI(dst_plane_width, src_plane_width);
  h = OD_MINI(dst_plane_height, src_plane_height);
  for (y = 0; y < h; y++) {
    unsigned char *src_ptr;
    unsigned char *dst_ptr;
    src_ptr = src_data;
    dst_ptr = dst_data;
    if (src_plane->bitdepth > 8) {
      if (dst_plane->bitdepth > 8) {
        /*Both source and destination are multibyte.*/
        if (dst_plane->bitdepth >= src_plane->bitdepth) {
          /*Shift source up into destination.*/
          int upshift;
          upshift = dst_plane->bitdepth - src_plane->bitdepth;
          for (x = 0; x < w; x++) {
            *(int16_t *)dst_ptr = OD_CLAMPI(0,
             *(int16_t *)src_ptr << upshift,
             (1 << dst_plane->bitdepth) - 1);
            src_ptr += src_xstride;
            dst_ptr += dst_xstride;
          }
        }
        else {
          /*Round source down into destination.*/
          int dnshift;
          dnshift = src_plane->bitdepth - dst_plane->bitdepth;
          for (x = 0; x < w; x++) {
            *(int16_t *)dst_ptr = OD_CLAMPI(0,
             (*(int16_t *)src_ptr + (1 << dnshift >> 1)) >> dnshift,
             (1 << dst_plane->bitdepth) - 1);
            src_ptr += src_xstride;
            dst_ptr += dst_xstride;
          }
        }
      }
      else {
        /*Round multibyte source down into 8-bit destination.*/
        int dnshift;
        dnshift = src_plane->bitdepth - 8;
        OD_ASSERT(dst_plane->bitdepth == 8);
        for (x = 0; x < w; x++) {
          *dst_ptr = OD_CLAMP255((*(int16_t *)src_ptr
           + (1 << dnshift >> 1)) >> dnshift);
          src_ptr += src_xstride;
          dst_ptr += dst_xstride;
        }
      }
    }
    else {
Exemple #6
0
/** Decode quantized theta value from coded value
 *
 * @param [in]      t          quantized companded gain value
 * @param [in]      max_theta  maximum theta value
 * @return                     decoded theta value
 */
double od_pvq_compute_theta(int t, int max_theta) {
  if (max_theta != 0) return OD_MINI(t, max_theta - 1)*.5*M_PI/max_theta;
  return 0;
}
Exemple #7
0
/** Applies Householder reflection from compute_householder(). The
 * reflection is its own inverse.
 *
 * @param [out]     out    reflected vector
 * @param [in]      x      vector to be reflected
 * @param [in]      r      reflection
 * @param [in]      n      number of dimensions in x,r
 */
void od_apply_householder(od_val16 *out, const od_val16 *x, const od_val16 *r,
 int n) {
  int i;
  od_val32 proj;
  od_val16 proj_1;
  od_val32 l2r;
#if !defined(OD_FLOAT_PVQ)
  od_val16 proj_norm;
  od_val16 l2r_norm;
  od_val16 rcp;
  int proj_shift;
  int l2r_shift;
  int outshift;
#endif
  /*FIXME: Can we get l2r and/or l2r_shift from an earlier computation?*/
  l2r = 0;
  for (i = 0; i < n; i++) {
    l2r += OD_MULT16_16(r[i], r[i]);
  }
  /* Apply Householder reflection */
  proj = 0;
  for (i = 0; i < n; i++) {
    proj += OD_MULT16_16(r[i], x[i]);
  }
#if defined(OD_FLOAT_PVQ)
  proj_1 = proj*2./(1e-100 + l2r);
  for (i = 0; i < n; i++) {
    out[i] = x[i] - r[i]*proj_1;
  }
#else
  /*l2r_norm is [0.5, 1.0[ in Q15.*/
  l2r_shift = (OD_ILOG(l2r) - 1) - 14;
  l2r_norm = OD_VSHR_ROUND(l2r, l2r_shift);
  rcp = od_rcp(l2r_norm);
  proj_shift = (OD_ILOG(abs(proj)) - 1) - 14;
  /*proj_norm is [0.5, 1.0[ in Q15.*/
  proj_norm = OD_VSHR_ROUND(proj, proj_shift);
  proj_1 = OD_MULT16_16_Q15(proj_norm, rcp);
  /*The proj*2. in the float code becomes -1 in the final outshift.
    The sign of l2r_shift is positive since we're taking the reciprocal of
     l2r_norm and this is a right shift.*/
  outshift = OD_MINI(30, OD_RCP_OUTSHIFT - proj_shift - 1 + l2r_shift);
  if (outshift >= 0) {
    for (i = 0; i < n; i++) {
      int32_t tmp;
      tmp = OD_MULT16_16(r[i], proj_1);
      tmp = OD_SHR_ROUND(tmp, outshift);
      out[i] = x[i] - tmp;
    }
  }
  else {
    /*FIXME: Can we make this case impossible?
      Right now, if r[] is all zeros except for 1, 2, or 3 ones, and
       if x[] is all zeros except for large values at the same position as the
       ones in r[], then we can end up with a shift of -1.*/
    for (i = 0; i < n; i++) {
      int32_t tmp;
      tmp = OD_MULT16_16(r[i], proj_1);
      tmp = OD_SHL(tmp, -outshift);
      out[i] = x[i] - tmp;
    }
  }
#endif
}
Exemple #8
0
/** Perform PVQ quantization with prediction, trying several
 * possible gains and angles. See draft-valin-videocodec-pvq and
 * http://jmvalin.ca/slides/pvq.pdf for more details.
 *
 * @param [out]    out       coefficients after quantization
 * @param [in]     x0        coefficients before quantization
 * @param [in]     r0        reference, aka predicted coefficients
 * @param [in]     n         number of dimensions
 * @param [in]     q0        quantization step size
 * @param [out]    y         pulse vector (i.e. selected PVQ codevector)
 * @param [out]    itheta    angle between input and reference (-1 if noref)
 * @param [out]    max_theta maximum value of itheta that could have been
 * @param [out]    vk        total number of pulses
 * @param [in]     beta      per-band activity masking beta param
 * @param [out]    skip_diff distortion cost of skipping this block
 *                           (accumulated)
 * @param [in]     robust    make stream robust to error in the reference
 * @param [in]     is_keyframe whether we're encoding a keyframe
 * @param [in]     pli       plane index
 * @param [in]     adapt     probability adaptation context
 * @param [in]     qm        QM with magnitude compensation
 * @param [in]     qm_inv    Inverse of QM with magnitude compensation
 * @return         gain      index of the quatized gain
*/
static int pvq_theta(od_coeff *out, const od_coeff *x0, const od_coeff *r0,
 int n, int q0, od_coeff *y, int *itheta, int *max_theta, int *vk,
 double beta, double *skip_diff, int robust, int is_keyframe, int pli,
 const od_adapt_ctx *adapt, const int16_t *qm,
 const int16_t *qm_inv) {
  od_val32 g;
  od_val32 gr;
  od_coeff y_tmp[MAXN];
  int i;
  /* Number of pulses. */
  int k;
  /* Companded gain of x and reference, normalized to q. */
  od_val32 cg;
  od_val32 cgr;
  int icgr;
  int qg;
  /* Best RDO cost (D + lamdba*R) so far. */
  double best_cost;
  /* Distortion (D) that corresponds to the best RDO cost. */
  double best_dist;
  double dist;
  /* Sign of Householder reflection. */
  int s;
  /* Dimension on which Householder reflects. */
  int m;
  od_val32 theta;
  double corr;
  int best_k;
  od_val32 best_qtheta;
  od_val32 gain_offset;
  int noref;
  double lambda;
  double skip_dist;
  int cfl_enabled;
  int skip;
  double gain_weight;
  od_val16 x16[MAXN];
  od_val16 r16[MAXN];
  int xshift;
  int rshift;
  lambda = OD_PVQ_LAMBDA;
  /* Give more weight to gain error when calculating the total distortion. */
  gain_weight = 1.4;
  OD_ASSERT(n > 1);
  corr = 0;
#if !defined(OD_FLOAT_PVQ)
  /* Shift needed to make x fit in 16 bits even after rotation.
     This shift value is not normative (it can be changed without breaking
     the bitstream) */
  xshift = OD_MAXI(0, od_vector_log_mag(x0, n) - 15);
  /* Shift needed to make the reference fit in 15 bits, so that the Householder
     vector can fit in 16 bits.
     This shift value *is* normative, and has to match the decoder. */
  rshift = OD_MAXI(0, od_vector_log_mag(r0, n) - 14);
#else
  xshift = 0;
  rshift = 0;
#endif
  for (i = 0; i < n; i++) {
#if defined(OD_FLOAT_PVQ)
    /*This is slightly different from the original float PVQ code,
       where the qm was applied in the accumulation in od_pvq_compute_gain and
       the vectors were od_coeffs, not od_val16 (i.e. double).*/
    x16[i] = x0[i]*(double)qm[i]*OD_QM_SCALE_1;
    r16[i] = r0[i]*(double)qm[i]*OD_QM_SCALE_1;
#else
    x16[i] = OD_SHR_ROUND(x0[i]*qm[i], OD_QM_SHIFT + xshift);
    r16[i] = OD_SHR_ROUND(r0[i]*qm[i], OD_QM_SHIFT + rshift);
#endif
    corr += OD_MULT16_16(x16[i], r16[i]);
  }
  cfl_enabled = is_keyframe && pli != 0 && !OD_DISABLE_CFL;
  cg  = od_pvq_compute_gain(x16, n, q0, &g, beta, xshift);
  cgr = od_pvq_compute_gain(r16, n, q0, &gr, beta, rshift);
  if (cfl_enabled) cgr = OD_CGAIN_SCALE;
  /* gain_offset is meant to make sure one of the quantized gains has
     exactly the same gain as the reference. */
#if defined(OD_FLOAT_PVQ)
  icgr = (int)floor(.5 + cgr);
#else
  icgr = OD_SHR_ROUND(cgr, OD_CGAIN_SHIFT);
#endif
  gain_offset = cgr - OD_SHL(icgr, OD_CGAIN_SHIFT);
  /* Start search with null case: gain=0, no pulse. */
  qg = 0;
  dist = gain_weight*cg*cg*OD_CGAIN_SCALE_2;
  best_dist = dist;
  best_cost = dist + lambda*od_pvq_rate(0, 0, -1, 0, adapt, NULL, 0, n,
   is_keyframe, pli);
  noref = 1;
  best_k = 0;
  *itheta = -1;
  *max_theta = 0;
  OD_CLEAR(y, n);
  best_qtheta = 0;
  m = 0;
  s = 1;
  corr = corr/(1e-100 + g*(double)gr/OD_SHL(1, xshift + rshift));
  corr = OD_MAXF(OD_MINF(corr, 1.), -1.);
  if (is_keyframe) skip_dist = gain_weight*cg*cg*OD_CGAIN_SCALE_2;
  else {
    skip_dist = gain_weight*(cg - cgr)*(cg - cgr)
     + cgr*(double)cg*(2 - 2*corr);
    skip_dist *= OD_CGAIN_SCALE_2;
  }
  if (!is_keyframe) {
    /* noref, gain=0 isn't allowed, but skip is allowed. */
    od_val32 scgr;
    scgr = OD_MAXF(0,gain_offset);
    if (icgr == 0) {
      best_dist = gain_weight*(cg - scgr)*(cg - scgr)
       + scgr*(double)cg*(2 - 2*corr);
      best_dist *= OD_CGAIN_SCALE_2;
    }
    best_cost = best_dist + lambda*od_pvq_rate(0, icgr, 0, 0, adapt, NULL,
     0, n, is_keyframe, pli);
    best_qtheta = 0;
    *itheta = 0;
    *max_theta = 0;
    noref = 0;
  }
  if (n <= OD_MAX_PVQ_SIZE && !od_vector_is_null(r0, n) && corr > 0) {
    od_val16 xr[MAXN];
    int gain_bound;
    gain_bound = OD_SHR(cg - gain_offset, OD_CGAIN_SHIFT);
    /* Perform theta search only if prediction is useful. */
    theta = OD_ROUND32(OD_THETA_SCALE*acos(corr));
    m = od_compute_householder(r16, n, gr, &s, rshift);
    od_apply_householder(xr, x16, r16, n);
    for (i = m; i < n - 1; i++) xr[i] = xr[i + 1];
    /* Search for the best gain within a reasonable range. */
    for (i = OD_MAXI(1, gain_bound - 1); i <= gain_bound + 1; i++) {
      int j;
      od_val32 qcg;
      int ts;
      /* Quantized companded gain */
      qcg = OD_SHL(i, OD_CGAIN_SHIFT) + gain_offset;
      /* Set angular resolution (in ra) to match the encoded gain */
      ts = od_pvq_compute_max_theta(qcg, beta);
      /* Search for the best angle within a reasonable range. */
      for (j = OD_MAXI(0, (int)floor(.5 + theta*OD_THETA_SCALE_1*2/M_PI*ts)
       - 2); j <= OD_MINI(ts - 1, (int)ceil(theta*OD_THETA_SCALE_1*2/M_PI*ts));
       j++) {
        double cos_dist;
        double cost;
        double dist_theta;
        double sin_prod;
        od_val32 qtheta;
        qtheta = od_pvq_compute_theta(j, ts);
        k = od_pvq_compute_k(qcg, j, qtheta, 0, n, beta, robust || is_keyframe);
        sin_prod = od_pvq_sin(theta)*OD_TRIG_SCALE_1*od_pvq_sin(qtheta)*
         OD_TRIG_SCALE_1;
        /* PVQ search, using a gain of qcg*cg*sin(theta)*sin(qtheta) since
           that's the factor by which cos_dist is multiplied to get the
           distortion metric. */
        cos_dist = pvq_search_rdo_double(xr, n - 1, k, y_tmp,
         qcg*(double)cg*sin_prod*OD_CGAIN_SCALE_2);
        /* See Jmspeex' Journal of Dubious Theoretical Results. */
        dist_theta = 2 - 2.*od_pvq_cos(theta - qtheta)*OD_TRIG_SCALE_1
         + sin_prod*(2 - 2*cos_dist);
        dist = gain_weight*(qcg - cg)*(qcg - cg) + qcg*(double)cg*dist_theta;
        dist *= OD_CGAIN_SCALE_2;
        /* Do approximate RDO. */
        cost = dist + lambda*od_pvq_rate(i, icgr, j, ts, adapt, y_tmp, k, n,
         is_keyframe, pli);
        if (cost < best_cost) {
          best_cost = cost;
          best_dist = dist;
          qg = i;
          best_k = k;
          best_qtheta = qtheta;
          *itheta = j;
          *max_theta = ts;
          noref = 0;
          OD_COPY(y, y_tmp, n - 1);
        }
      }
    }
  }
  /* Don't bother with no-reference version if there's a reasonable
     correlation. The only exception is luma on a keyframe because
     H/V prediction is unreliable. */
  if (n <= OD_MAX_PVQ_SIZE &&
   ((is_keyframe && pli == 0) || corr < .5
   || cg < (od_val32)(OD_SHL(2, OD_CGAIN_SHIFT)))) {
    int gain_bound;
    gain_bound = OD_SHR(cg, OD_CGAIN_SHIFT);
    /* Search for the best gain (haven't determined reasonable range yet). */
    for (i = OD_MAXI(1, gain_bound); i <= gain_bound + 1; i++) {
      double cos_dist;
      double cost;
      od_val32 qcg;
      qcg = OD_SHL(i, OD_CGAIN_SHIFT);
      k = od_pvq_compute_k(qcg, -1, -1, 1, n, beta, robust || is_keyframe);
      cos_dist = pvq_search_rdo_double(x16, n, k, y_tmp,
       qcg*(double)cg*OD_CGAIN_SCALE_2);
      /* See Jmspeex' Journal of Dubious Theoretical Results. */
      dist = gain_weight*(qcg - cg)*(qcg - cg)
       + qcg*(double)cg*(2 - 2*cos_dist);
      dist *= OD_CGAIN_SCALE_2;
      /* Do approximate RDO. */
      cost = dist + lambda*od_pvq_rate(i, 0, -1, 0, adapt, y_tmp, k, n,
       is_keyframe, pli);
      if (cost <= best_cost) {
        best_cost = cost;
        best_dist = dist;
        qg = i;
        noref = 1;
        best_k = k;
        *itheta = -1;
        *max_theta = 0;
        OD_COPY(y, y_tmp, n);
      }
    }
  }
  k = best_k;
  theta = best_qtheta;
  skip = 0;
  if (noref) {
    if (qg == 0) skip = OD_PVQ_SKIP_ZERO;
  }
  else {
    if (!is_keyframe && qg == 0) {
      skip = (icgr ? OD_PVQ_SKIP_ZERO : OD_PVQ_SKIP_COPY);
    }
    if (qg == icgr && *itheta == 0 && !cfl_enabled) skip = OD_PVQ_SKIP_COPY;
  }
  /* Synthesize like the decoder would. */
  if (skip) {
    if (skip == OD_PVQ_SKIP_COPY) OD_COPY(out, r0, n);
    else OD_CLEAR(out, n);
  }
  else {
    if (noref) gain_offset = 0;
    g = od_gain_expand(OD_SHL(qg, OD_CGAIN_SHIFT) + gain_offset, q0, beta);
    od_pvq_synthesis_partial(out, y, r16, n, noref, g, theta, m, s,
     qm_inv);
  }
  *vk = k;
  *skip_diff += skip_dist - best_dist;
  /* Encode gain differently depending on whether we use prediction or not.
     Special encoding on inter frames where qg=0 is allowed for noref=0
     but not noref=1.*/
  if (is_keyframe) return noref ? qg : neg_interleave(qg, icgr);
  else return noref ? qg - 1 : neg_interleave(qg + 1, icgr + 1);
}