Beispiel #1
0
/** Encodes a single vector of integers (eg, a partition within a
 *  coefficient block) using PVQ
 *
 * @param [in,out] ec         range encoder
 * @param [in]     qg         quantized gain
 * @param [in]     theta      quantized post-prediction theta
 * @param [in]     max_theta  maximum possible quantized theta value
 * @param [in]     in         coefficient vector to code
 * @param [in]     n          number of coefficients in partition
 * @param [in]     k          number of pulses in partition
 * @param [in,out] model      entropy encoder state
 * @param [in,out] adapt      adaptation context
 * @param [in,out] exg        ExQ16 expectation of gain value
 * @param [in,out] ext        ExQ16 expectation of theta value
 * @param [in]     nodesync   do not use info that depend on the reference
 * @param [in]     cdf_ctx    selects which cdf context to use
 * @param [in]     is_keyframe whether we're encoding a keyframe
 * @param [in]     code_skip  whether the "skip rest" flag is allowed
 * @param [in]     skip_rest  when set, we skip all higher bands
 * @param [in]     encode_flip whether we need to encode the CfL flip flag now
 * @param [in]     flip       value of the CfL flip flag
 */
static void pvq_encode_partition(od_ec_enc *ec,
                                 int qg,
                                 int theta,
                                 int max_theta,
                                 const od_coeff *in,
                                 int n,
                                 int k,
                                 generic_encoder model[3],
                                 od_adapt_ctx *adapt,
                                 int *exg,
                                 int *ext,
                                 int nodesync,
                                 int cdf_ctx,
                                 int is_keyframe,
                                 int code_skip,
                                 int skip_rest,
                                 int encode_flip,
                                 int flip) {
  int noref;
  int id;
  noref = (theta == -1);
  id = (qg > 0) + 2*OD_MINI(theta + 1,3) + 8*code_skip*skip_rest;
  if (is_keyframe) {
    OD_ASSERT(id != 8);
    if (id >= 8) id--;
  }
  else {
    OD_ASSERT(id != 10);
    if (id >= 10) id--;
  }
  /* Jointly code gain, theta and noref for small values. Then we handle
     larger gain and theta values. For noref, theta = -1. */
  od_encode_cdf_adapt(ec, id, &adapt->pvq.pvq_gaintheta_cdf[cdf_ctx][0],
   8 + 7*code_skip, adapt->pvq.pvq_gaintheta_increment);
  if (encode_flip) {
    /* We could eventually do some smarter entropy coding here, but it would
       have to be good enough to overcome the overhead of the entropy coder.
       An early attempt using a "toogle" flag with simple adaptation wasn't
       worth the trouble. */
    od_ec_enc_bits(ec, flip, 1);
  }
  if (qg > 0) {
    int tmp;
    tmp = *exg;
    generic_encode(ec, &model[!noref], qg - 1, -1, &tmp, 2);
    OD_IIR_DIADIC(*exg, qg << 16, 2);
  }
  if (theta > 1 && (nodesync || max_theta > 3)) {
    int tmp;
    tmp = *ext;
    generic_encode(ec, &model[2], theta - 2, nodesync ? -1 : max_theta - 3,
     &tmp, 2);
    OD_IIR_DIADIC(*ext, theta << 16, 2);
  }
  od_encode_pvq_codeword(ec, &adapt->pvq.pvq_codeword_ctx, in,
   n - (theta != -1), k);
}
Beispiel #2
0
int oggbyte_readcopy(oggbyte_buffer *_b, void *_dest, uint32_t _bytes) {
  ptrdiff_t endbyte;
  endbyte = _b->ptr - _b->buf;
  OD_ASSERT(endbyte >= 0);
  OD_ASSERT(endbyte <= _b->storage);
  if ((size_t)(_b->storage - endbyte) < _bytes) return -1;
  memcpy(_dest, _b->ptr, _bytes);
  _b->ptr += _bytes;
  return 0;
}
Beispiel #3
0
void image_data_pred_block(image_data *_this,int _bi,int _bj){
  int       b_sz;
  double   *pred;
  int       mode;
  od_coeff  coeffs[5*B_SZ_MAX*B_SZ_MAX];
  b_sz=1<<_this->b_sz_log;
  pred=&_this->pred[_this->pred_stride*b_sz*_bj+b_sz*_bi];
#if MASK_BLOCKS
  if(!_this->mask[_this->nxblocks*_bj+_bi]){
    od_coeff *fdct;
    int       j;
    int       i;
    fdct=&_this->fdct[_this->fdct_stride*b_sz*(_bj+1)+b_sz*(_bi+1)];
    for(j=0;j<b_sz;j++){
      for(i=0;i<b_sz;i++){
        pred[_this->pred_stride*j+i]=fdct[_this->fdct_stride*j+i];
      }
    }
    return;
  }
#endif
  mode=_this->mode[_this->nxblocks*_bj+_bi];
  image_data_load_block(_this,_bi,_bj,coeffs);
  OD_ASSERT(_this->b_sz_log>=OD_LOG_BSIZE0&&_this->b_sz_log<=B_SZ_LOG_MAX);
  (*NE_INTRA_MULT[_this->b_sz_log-OD_LOG_BSIZE0])(pred,_this->pred_stride,
   coeffs,mode);
}
Beispiel #4
0
void oggbyte_readinit(oggbyte_buffer *_b, unsigned char *_buf,
 ptrdiff_t _bytes) {
  OD_ASSERT(_bytes >= 0);
  OD_CLEAR(_b, 1);
  _b->buf = _b->ptr = _buf;
  _b->storage = _bytes;
}
Beispiel #5
0
static const char *od_log_level_name(od_log_level level) {
  /* Check for invalid input */
  OD_ASSERT(level < OD_LOG_LEVEL_MAX);
  if (level >= OD_LOG_LEVEL_MAX)
    return "INVALID";
  return od_log_level_names[level];
}
Beispiel #6
0
static const char *od_log_facility_name(od_log_facility fac) {
  /* Check for invalid input */
  OD_ASSERT(fac < OD_LOG_FACILITY_MAX);
  if (fac >= OD_LOG_FACILITY_MAX)
    return "INVALID";
  return od_log_module_names[fac];
}
Beispiel #7
0
/** Synthesizes one parition of coefficient values from a PVQ-encoded
 * vector.  This 'partial' version is called by the encode loop where
 * the Householder reflection has already been computed and there's no
 * need to recompute it.
 *
 * @param [out]     xcoeff  output coefficient partition (x in math doc)
 * @param [in]      ypulse  PVQ-encoded values (y in the math doc); in
 *                          the noref case, this vector has n entries,
 *                          in the reference case it contains n-1 entries
 *                          (the m-th entry is not included)
 * @param [in]      r       reference vector (prediction)
 * @param [in]      n       number of elements in this partition
 * @param [in]      noref   indicates presence or lack of prediction
 * @param [in]      g       decoded quantized vector gain
 * @param [in]      theta   decoded theta (prediction error)
 * @param [in]      m       alignment dimension of Householder reflection
 * @param [in]      s       sign of Householder reflection
 * @param [in]      qm_inv  inverse of the QM with magnitude compensation
 */
void od_pvq_synthesis_partial(od_coeff *xcoeff, const od_coeff *ypulse,
 const double *r, int n, int noref, double g, double theta, int m, int s,
 const int16_t *qm_inv) {
  int i;
  int yy;
  double scale;
  int nn;
  OD_ASSERT(g != 0);
  nn = n-(!noref); /* when noref==0, vector in is sized n-1 */
  yy = 0;
  for (i = 0; i < nn; i++)
    yy += ypulse[i]*(int32_t)ypulse[i];
  if (yy == 0) scale = 0;
  else scale = g/sqrt(yy);
  if (noref) {
    for (i = 0; i < n; i++) {
      xcoeff[i] = (od_coeff)floor(.5
       + (ypulse[i]*scale)*(qm_inv[i]*OD_QM_INV_SCALE_1));
    }
  }
  else{
    double x[MAXN];
    scale *= sin(theta);
    for (i = 0; i < m; i++)
      x[i] = ypulse[i]*scale;
    x[m] = -s*g*cos(theta);
    for (i = m; i < nn; i++)
      x[i+1] = ypulse[i]*scale;
    od_apply_householder(x, r, n);
    for (i = 0; i < n; i++) {
      xcoeff[i] = (od_coeff)floor(.5 + (x[i]*(qm_inv[i]*OD_QM_INV_SCALE_1)));
    }
  }
}
Beispiel #8
0
/** Adapts a Q15 cdf after encoding/decoding a symbol. */
void od_cdf_adapt_q15(int val, uint16_t *cdf, int n, int *count, int rate) {
  int i;
  *count = OD_MINI(*count + 1, 1 << rate);
  OD_ASSERT(cdf[n - 1] == 32768);
  if (*count >= 1 << rate) {
    /* Steady-state adaptation based on a simple IIR with dyadic rate. */
    for (i = 0; i < n; i++) {
      int tmp;
      /* When (i < val), we want the adjustment ((cdf[i] - tmp) >> rate) to be
         positive so long as (cdf[i] > i + 1), and 0 when (cdf[i] == i + 1),
         to ensure we don't drive any probabilities to 0. Replacing cdf[i] with
         (i + 2) and solving ((i + 2 - tmp) >> rate == 1) for tmp produces
         tmp == i + 2 - (1 << rate). Using this value of tmp with
         cdf[i] == i + 1 instead gives an adjustment of 0 as desired.

         When (i >= val), we want ((cdf[i] - tmp) >> rate) to be negative so
         long as cdf[i] < 32768 - (n - 1 - i), and 0 when
         cdf[i] == 32768 - (n - 1 - i), again to ensure we don't drive any
         probabilities to 0. Since right-shifting any negative value is still
         negative, we can solve (32768 - (n - 1 - i) - tmp == 0) for tmp,
         producing tmp = 32769 - n + i. Using this value of tmp with smaller
         values of cdf[i] instead gives negative adjustments, as desired.

         Combining the two cases gives the expression below. These could be
         stored in a lookup table indexed by n and rate to avoid the
         arithmetic. */
      tmp = 2 - (1<<rate) + i + (32767 + (1<<rate) - n)*(i >= val);
      cdf[i] -= (cdf[i] - tmp) >> rate;
    }
  }
  else {
Beispiel #9
0
/*Computes the SATD using the actual DCT, instead of the Hadamard transform.
  This is useful for comparing SATD numbers to the frequency domain techniques
   without having to worry about scaling.*/
static unsigned od_satd(const unsigned char *_src,int _src_stride,
 const unsigned char *_ref,int _ref_stride){
  od_coeff buf[B_SZ*B_SZ];
  unsigned satd;
  int      i;
  int      j;
  OD_ASSERT(B_SZ==4);
  for(i=0;i<B_SZ;i++){
    for(j=0;j<B_SZ;j++){
      buf[B_SZ*i+j]=_src[j]-_ref[j];
    }
    _src+=_src_stride;
    _ref+=_ref_stride;
  }
#if B_SZ_LOG>=OD_LOG_BSIZE0&&B_SZ_LOG<OD_LOG_BSIZE0+OD_NBSIZES
  (*OD_FDCT_2D[B_SZ_LOG-OD_LOG_BSIZE0])(buf,B_SZ,buf,B_SZ);
#else
# error "Need an fDCT implementation for this block size."
#endif
  satd=0;
  for(i=0;i<B_SZ;i++){
    for(j=0;j<B_SZ;j++)satd+=abs(buf[B_SZ*i+j]);
  }
  return satd;
}
Beispiel #10
0
void od_copy_nxm(unsigned char *_dst, int _dstride,
 const unsigned char *_src, int _sstride, int _log_n, int _log_m) {
  int j;
  OD_ASSERT(_log_n != _log_m);
  for (j = 0; j < 1 << _log_m; j++) {
    OD_COPY(_dst, _src, 1 << _log_n);
    _dst += _dstride;
    _src += _sstride;
  }
}
Beispiel #11
0
int od_logging_active_impl(od_log_facility fac, od_log_level level) {
  /* Check for invalid input */
  OD_ASSERT(fac < OD_LOG_FACILITY_MAX);
  if (fac >= OD_LOG_FACILITY_MAX)
    return 0;
  if (od_log_levels[fac] < level)
    return 0;  /* Skipped */

  return 1;
}
Beispiel #12
0
/*Takes updated low and range values, renormalizes them so that
   32768 <= rng < 65536 (flushing bytes from low to the pre-carry buffer if
   necessary), and stores them back in the encoder context.
  low: The new value of low.
  rng: The new value of the range.*/
static void od_ec_enc_normalize(od_ec_enc *enc,
 od_ec_window low, unsigned rng) {
  int d;
  int c;
  int s;
  c = enc->cnt;
  OD_ASSERT(rng <= 65535U);
  d = 16 - OD_ILOG_NZ(rng);
  s = c + d;
  /*TODO: Right now we flush every time we have at least one byte available.
    Instead we should use an od_ec_window and flush right before we're about to
     shift bits off the end of the window.
    For a 32-bit window this is about the same amount of work, but for a 64-bit
     window it should be a fair win.*/
  if (s >= 0) {
    ogg_uint16_t *buf;
    ogg_uint32_t storage;
    ogg_uint32_t offs;
    unsigned m;
    buf = enc->precarry_buf;
    storage = enc->precarry_storage;
    offs = enc->offs;
    if (offs + 2 > storage) {
      storage = 2*storage + 2;
      buf = (ogg_uint16_t *)_ogg_realloc(buf, sizeof(*buf)*storage);
      if (buf == NULL) {
        enc->error = -1;
        enc->offs = 0;
        return;
      }
      enc->precarry_buf = buf;
      enc->precarry_storage = storage;
    }
    c += 16;
    m = (1 << c) - 1;
    if (s >= 8) {
      OD_ASSERT(offs < storage);
      buf[offs++] = (ogg_uint16_t)(low >> c);
      low &= m;
      c -= 8;
      m >>= 8;
    }
Beispiel #13
0
static void od_pre_blocks(od_coeff *_out,int _out_stride,od_coeff *_in,
 int _in_stride,int _bx,int _by,int _b_sz_log){
  int b_sz;
  int by;
  int bx;
  int j;
  int i;
  b_sz=1<<_b_sz_log;
  for(by=0;by<_by;by++){
    int y;
    y=by<<_b_sz_log;
    for(bx=0;bx<_bx;bx++){
      int x;
      x=bx<<_b_sz_log;
      for(i=0;i<b_sz;i++){
        od_coeff col[B_SZ_MAX];
#if APPLY_FILTER
        for(j=0;j<b_sz;j++){
          col[j]=_in[_in_stride*(y+j)+x+i];
        }
        OD_ASSERT(_b_sz_log>=OD_LOG_BSIZE0&&_b_sz_log<=B_SZ_LOG_MAX);
        (*NE_PRE_FILTER[_b_sz_log-OD_LOG_BSIZE0])(col,col);
        for(j=0;j<b_sz;j++){
          _out[_out_stride*(y+j)+x+i]=col[j];
        }
#else
        for(j=0;j<b_sz;j++){
          _out[_out_stride*(y+j)+x+i]=_in[_in_stride*(y+j)+x+i];
        }
#endif
      }
#if APPLY_FILTER
      for(j=0;j<b_sz;j++){
        od_coeff *row;
        row=&_out[_out_stride*(y+j)+x];
        OD_ASSERT(_b_sz_log>=OD_LOG_BSIZE0&&_b_sz_log<=B_SZ_LOG_MAX);
        (*NE_PRE_FILTER[_b_sz_log-OD_LOG_BSIZE0])(row,row);
      }
#endif
    }
  }
}
Beispiel #14
0
void od_encode_quantizer_scaling(daala_enc_ctx *enc, int q_scaling,
 int sbx, int sby, int skip) {
  int nhsb;
  OD_ASSERT(skip == !!skip);
  nhsb = enc->state.nhsb;
  OD_ASSERT(sbx < nhsb);
  OD_ASSERT(sby < enc->state.nvsb);
  OD_ASSERT(!skip || q_scaling == 0);
  enc->state.sb_q_scaling[sby*nhsb + sbx] = q_scaling;
  if (!skip) {
    int above;
    int left;
    /* use value from neighbour if possible, otherwise use 0 */
    above = sby > 0 ? enc->state.sb_q_scaling[(sby - 1)*enc->state.nhsb + sbx]
     : 0;
    left = sbx > 0 ? enc->state.sb_q_scaling[sby*enc->state.nhsb + (sbx - 1)]
     : 0;
    od_encode_cdf_adapt(&enc->ec, q_scaling,
     enc->state.adapt.q_cdf[above + left*4], 4,
     enc->state.adapt.q_increment);
  }
}
Beispiel #15
0
static void od_idct_blocks(od_coeff *_out,int _out_stride,od_coeff *_in,
 int _in_stride,int _bx,int _by,int _b_sz_log){
  int bx;
  int by;
  for(by=0;by<_by;by++){
    int y;
    y=by<<_b_sz_log;
    for(bx=0;bx<_bx;bx++){
      int x;
      x=bx<<_b_sz_log;
      OD_ASSERT(_b_sz_log>=OD_LOG_BSIZE0&&_b_sz_log<=B_SZ_LOG_MAX);
      (*OD_IDCT_2D[_b_sz_log-OD_LOG_BSIZE0])(&_out[_out_stride*y+x],_out_stride,
       &_in[_in_stride*y+x],_in_stride);
    }
  }
}
Beispiel #16
0
/*Perform SATD on 8x8 blocks within src and ref then sum the results of
   each one.*/
OD_SIMD_INLINE int32_t od_mc_compute_sum_8x8_satd8(int ln,
 const unsigned char *src, int systride,
 const unsigned char *ref, int rystride) {
  int n;
  int i;
  int j;
  int32_t satd;
  n = 1 << ln;
  OD_ASSERT(n >= 8);
  satd = 0;
  for (i = 0; i < n; i += 8) {
    for (j = 0; j < n; j += 8) {
      satd += od_mc_compute_satd8_8x8_part(src + i*systride + j, systride,
       ref + i*rystride + j, rystride);
    }
  }
  return satd;
}
Beispiel #17
0
/* There is a bug in get_intra_dims where chroma blocks do not line up under
    luma blocks. */
static void cfl_get_intra_dims(const video_input_info *_info,int _pli,
 int _padding,int *_x0,int *_y0,int *_nxblocks,int *_nyblocks){
  int xshift;
  int yshift;
  int padding;
  xshift=_pli!=0&&!(_info->pixel_fmt&1);
  yshift=_pli!=0&&!(_info->pixel_fmt&2);
  OD_ASSERT(xshift==yshift);
  padding=_padding<<B_SZ_LOG;
  /*An offset of 1 would be fine to provide enough context for VP8-style intra
     prediction, but for frequency-domain prediction, we'll want a full block,
     plus overlap.*/
  *_x0=(_info->pic_x>>xshift)+(padding>>1+xshift);
  *_y0=(_info->pic_y>>yshift)+(padding>>1+yshift);
  /*We take an extra block off the end to give enough context for above-right
     intra prediction.*/
  *_nxblocks=_info->pic_w-padding>>B_SZ_LOG;
  *_nyblocks=_info->pic_h-padding>>B_SZ_LOG;
}
Beispiel #18
0
/* Initialize the quantization matrix with the magnitude compensation applied.
   We need to compensate for the magnitude because lapping causes some basis
   functions to be smaller, so they would end up being quantized too finely
   (the same error in the quantized domain would result in a smaller pixel
   domain error). */
void od_init_qm(int16_t *x, int16_t *x_inv, const int *qm) {
  int i;
  int j;
  int16_t y[OD_QM_BSIZE];
  int16_t y_inv[OD_QM_BSIZE];
  int16_t *x1;
  int16_t *x1_inv;
  int off;
  int bs;
  int xydec;
  for (bs = 0; bs < OD_NBSIZES; bs++) {
    for (xydec = 0; xydec < 2; xydec++) {
      off = od_qm_offset(bs, xydec);
      x1 = x + off;
      x1_inv = x_inv + off;
      for (i = 0; i < 4 << bs; i++) {
        for (j = 0; j < 4 << bs; j++) {
          double mag;
#if OD_DEBLOCKING || OD_DISABLE_FILTER
          mag = 1.0;
#else
          mag = OD_BASIS_MAG[xydec][bs][i]*OD_BASIS_MAG[xydec][bs][j];
#endif
          if (i == 0 && j == 0) {
            mag = 1.0;
          }
          else {
            mag /= 0.0625*qm[(i << 1 >> bs)*8 + (j << 1 >> bs)];
            OD_ASSERT(mag > 0.0);
          }
          /*Convert to fit in 16 bits.*/
          y[i*(4 << bs) + j] = (int16_t)OD_MINI(OD_QM_SCALE_MAX,
           (int16_t)floor(.5 + mag*OD_QM_SCALE));
          y_inv[i*(4 << bs) + j] = (int16_t)floor(.5
           + OD_QM_SCALE*OD_QM_INV_SCALE/(double)y[i*(4 << bs) + j]);
        }
      }
      od_raster_to_coding_order_16(x1, 4 << bs, y, 4 << bs);
      od_raster_to_coding_order_16(x1_inv, 4 << bs, y_inv, 4 << bs);
    }
  }
}
Beispiel #19
0
void image_data_mask(image_data *_this,const unsigned char *_data,int _stride){
  int j;
  int i;
  memset(_this->mask,0,_this->nyblocks*_this->nxblocks);
  /* process_block_size32 needs 32x32 image data with 6 pixel padding */
  OD_ASSERT(((_stride-_this->nxblocks*B_SZ)/2)>=6);
  for(j=0;j<_this->nyblocks*B_SZ/32;j++){
    for(i=0;i<_this->nxblocks*B_SZ/32;i++){
      const unsigned char *b;
      BlockSizeComp        bs;
      int                  dec[4][4];
      int                  k;
      int                  l;
      b=&_data[_stride*32*j+32*i];
      process_block_size32(&bs, b,_stride, b, _stride, dec, 21);
      for(l=0;l<32/B_SZ;l++){
        for(k=0;k<32/B_SZ;k++){
          /*printf("i=%i j=%i k=%i l=%i\n",i,j,k,l);
          printf("bx=%i by=%i\n",i*32/B_SZ+k*B_SZ,j*32/B_SZ+l*B_SZ);
          fflush(stdout);*/
#if B_SZ==4
          _this->mask[_this->nxblocks*(j*32/B_SZ+l)+i*32/B_SZ+k]=
           (dec[l>>1][k>>1]==0);
#elif B_SZ==8
          _this->mask[_this->nxblocks*(j*32/B_SZ+l)+i*32/B_SZ+k]=
           (dec[l][1]==1);
#elif B_SZ==16
          _this->mask[_this->nxblocks*(j*32/B_SZ+l)+i*32/B_SZ+k]=
           (dec[l<<1][k<<1]==2||dec[l<<1][k<<1]==3);
#else
# error "Invalid block size."
#endif
        }
      }
    }
  }
}
Beispiel #20
0
/*This is a smart copy that copies the intersection of the two img planes
   and performs any needed bitdepth or packing conversion.
  Note that this copy performs clamping as it's only used for copying into
   and out of the codec, not for internal reference->reference copies.
  Does not touch any padding/border/unintersected area.*/
void od_img_plane_copy(daala_image* dst, daala_image* src, int pli) {
  daala_image_plane *dst_plane;
  daala_image_plane *src_plane;
  unsigned char *dst_data;
  unsigned char *src_data;
  int dst_xstride;
  int dst_ystride;
  int src_xstride;
  int src_ystride;
  int dst_xdec;
  int dst_ydec;
  int src_xdec;
  int src_ydec;
  int dst_plane_width;
  int dst_plane_height;
  int src_plane_width;
  int src_plane_height;
  int w;
  int h;
  int x;
  int y;
  if (pli >= dst->nplanes || pli >= src->nplanes) {
    return;
  }
  dst_plane = dst->planes+pli;
  src_plane = src->planes+pli;
  dst_xstride = dst_plane->xstride;
  dst_ystride = dst_plane->ystride;
  src_xstride = src_plane->xstride;
  src_ystride = src_plane->ystride;

  OD_ASSERT(dst_xstride == 1 || dst_xstride == 2);
  OD_ASSERT(src_xstride == 1 || src_xstride == 2);
  dst_xdec = dst_plane->xdec;
  dst_ydec = dst_plane->ydec;
  src_xdec = src_plane->xdec;
  src_ydec = src_plane->ydec;
  dst_data = dst_plane->data;
  src_data = src_plane->data;
  dst_plane_width = OD_PLANE_SZ(dst->width, dst_xdec);
  dst_plane_height = OD_PLANE_SZ(dst->height, dst_ydec);
  src_plane_width = OD_PLANE_SZ(src->width, src_xdec);
  src_plane_height = OD_PLANE_SZ(src->height, src_ydec);
  w = OD_MINI(dst_plane_width, src_plane_width);
  h = OD_MINI(dst_plane_height, src_plane_height);
  for (y = 0; y < h; y++) {
    unsigned char *src_ptr;
    unsigned char *dst_ptr;
    src_ptr = src_data;
    dst_ptr = dst_data;
    if (src_plane->bitdepth > 8) {
      if (dst_plane->bitdepth > 8) {
        /*Both source and destination are multibyte.*/
        if (dst_plane->bitdepth >= src_plane->bitdepth) {
          /*Shift source up into destination.*/
          int upshift;
          upshift = dst_plane->bitdepth - src_plane->bitdepth;
          for (x = 0; x < w; x++) {
            *(int16_t *)dst_ptr = OD_CLAMPI(0,
             *(int16_t *)src_ptr << upshift,
             (1 << dst_plane->bitdepth) - 1);
            src_ptr += src_xstride;
            dst_ptr += dst_xstride;
          }
        }
        else {
          /*Round source down into destination.*/
          int dnshift;
          dnshift = src_plane->bitdepth - dst_plane->bitdepth;
          for (x = 0; x < w; x++) {
            *(int16_t *)dst_ptr = OD_CLAMPI(0,
             (*(int16_t *)src_ptr + (1 << dnshift >> 1)) >> dnshift,
             (1 << dst_plane->bitdepth) - 1);
            src_ptr += src_xstride;
            dst_ptr += dst_xstride;
          }
        }
      }
      else {
        /*Round multibyte source down into 8-bit destination.*/
        int dnshift;
        dnshift = src_plane->bitdepth - 8;
        OD_ASSERT(dst_plane->bitdepth == 8);
        for (x = 0; x < w; x++) {
          *dst_ptr = OD_CLAMP255((*(int16_t *)src_ptr
           + (1 << dnshift >> 1)) >> dnshift);
          src_ptr += src_xstride;
          dst_ptr += dst_xstride;
        }
      }
    }
    else {
Beispiel #21
0
/** Perform PVQ quantization with prediction, trying several
 * possible gains and angles. See draft-valin-videocodec-pvq and
 * http://jmvalin.ca/slides/pvq.pdf for more details.
 *
 * @param [out]    out       coefficients after quantization
 * @param [in]     x0        coefficients before quantization
 * @param [in]     r0        reference, aka predicted coefficients
 * @param [in]     n         number of dimensions
 * @param [in]     q0        quantization step size
 * @param [out]    y         pulse vector (i.e. selected PVQ codevector)
 * @param [out]    itheta    angle between input and reference (-1 if noref)
 * @param [out]    max_theta maximum value of itheta that could have been
 * @param [out]    vk        total number of pulses
 * @param [in]     beta      per-band activity masking beta param
 * @param [out]    skip_diff distortion cost of skipping this block
 *                           (accumulated)
 * @param [in]     robust    make stream robust to error in the reference
 * @param [in]     is_keyframe whether we're encoding a keyframe
 * @param [in]     pli       plane index
 * @param [in]     adapt     probability adaptation context
 * @param [in]     qm        QM with magnitude compensation
 * @param [in]     qm_inv    Inverse of QM with magnitude compensation
 * @return         gain      index of the quatized gain
*/
static int pvq_theta(od_coeff *out, const od_coeff *x0, const od_coeff *r0,
 int n, int q0, od_coeff *y, int *itheta, int *max_theta, int *vk,
 double beta, double *skip_diff, int robust, int is_keyframe, int pli,
 const od_adapt_ctx *adapt, const int16_t *qm,
 const int16_t *qm_inv) {
  od_val32 g;
  od_val32 gr;
  od_coeff y_tmp[MAXN];
  int i;
  /* Number of pulses. */
  int k;
  /* Companded gain of x and reference, normalized to q. */
  od_val32 cg;
  od_val32 cgr;
  int icgr;
  int qg;
  /* Best RDO cost (D + lamdba*R) so far. */
  double best_cost;
  /* Distortion (D) that corresponds to the best RDO cost. */
  double best_dist;
  double dist;
  /* Sign of Householder reflection. */
  int s;
  /* Dimension on which Householder reflects. */
  int m;
  od_val32 theta;
  double corr;
  int best_k;
  od_val32 best_qtheta;
  od_val32 gain_offset;
  int noref;
  double lambda;
  double skip_dist;
  int cfl_enabled;
  int skip;
  double gain_weight;
  od_val16 x16[MAXN];
  od_val16 r16[MAXN];
  int xshift;
  int rshift;
  lambda = OD_PVQ_LAMBDA;
  /* Give more weight to gain error when calculating the total distortion. */
  gain_weight = 1.4;
  OD_ASSERT(n > 1);
  corr = 0;
#if !defined(OD_FLOAT_PVQ)
  /* Shift needed to make x fit in 16 bits even after rotation.
     This shift value is not normative (it can be changed without breaking
     the bitstream) */
  xshift = OD_MAXI(0, od_vector_log_mag(x0, n) - 15);
  /* Shift needed to make the reference fit in 15 bits, so that the Householder
     vector can fit in 16 bits.
     This shift value *is* normative, and has to match the decoder. */
  rshift = OD_MAXI(0, od_vector_log_mag(r0, n) - 14);
#else
  xshift = 0;
  rshift = 0;
#endif
  for (i = 0; i < n; i++) {
#if defined(OD_FLOAT_PVQ)
    /*This is slightly different from the original float PVQ code,
       where the qm was applied in the accumulation in od_pvq_compute_gain and
       the vectors were od_coeffs, not od_val16 (i.e. double).*/
    x16[i] = x0[i]*(double)qm[i]*OD_QM_SCALE_1;
    r16[i] = r0[i]*(double)qm[i]*OD_QM_SCALE_1;
#else
    x16[i] = OD_SHR_ROUND(x0[i]*qm[i], OD_QM_SHIFT + xshift);
    r16[i] = OD_SHR_ROUND(r0[i]*qm[i], OD_QM_SHIFT + rshift);
#endif
    corr += OD_MULT16_16(x16[i], r16[i]);
  }
  cfl_enabled = is_keyframe && pli != 0 && !OD_DISABLE_CFL;
  cg  = od_pvq_compute_gain(x16, n, q0, &g, beta, xshift);
  cgr = od_pvq_compute_gain(r16, n, q0, &gr, beta, rshift);
  if (cfl_enabled) cgr = OD_CGAIN_SCALE;
  /* gain_offset is meant to make sure one of the quantized gains has
     exactly the same gain as the reference. */
#if defined(OD_FLOAT_PVQ)
  icgr = (int)floor(.5 + cgr);
#else
  icgr = OD_SHR_ROUND(cgr, OD_CGAIN_SHIFT);
#endif
  gain_offset = cgr - OD_SHL(icgr, OD_CGAIN_SHIFT);
  /* Start search with null case: gain=0, no pulse. */
  qg = 0;
  dist = gain_weight*cg*cg*OD_CGAIN_SCALE_2;
  best_dist = dist;
  best_cost = dist + lambda*od_pvq_rate(0, 0, -1, 0, adapt, NULL, 0, n,
   is_keyframe, pli);
  noref = 1;
  best_k = 0;
  *itheta = -1;
  *max_theta = 0;
  OD_CLEAR(y, n);
  best_qtheta = 0;
  m = 0;
  s = 1;
  corr = corr/(1e-100 + g*(double)gr/OD_SHL(1, xshift + rshift));
  corr = OD_MAXF(OD_MINF(corr, 1.), -1.);
  if (is_keyframe) skip_dist = gain_weight*cg*cg*OD_CGAIN_SCALE_2;
  else {
    skip_dist = gain_weight*(cg - cgr)*(cg - cgr)
     + cgr*(double)cg*(2 - 2*corr);
    skip_dist *= OD_CGAIN_SCALE_2;
  }
  if (!is_keyframe) {
    /* noref, gain=0 isn't allowed, but skip is allowed. */
    od_val32 scgr;
    scgr = OD_MAXF(0,gain_offset);
    if (icgr == 0) {
      best_dist = gain_weight*(cg - scgr)*(cg - scgr)
       + scgr*(double)cg*(2 - 2*corr);
      best_dist *= OD_CGAIN_SCALE_2;
    }
    best_cost = best_dist + lambda*od_pvq_rate(0, icgr, 0, 0, adapt, NULL,
     0, n, is_keyframe, pli);
    best_qtheta = 0;
    *itheta = 0;
    *max_theta = 0;
    noref = 0;
  }
  if (n <= OD_MAX_PVQ_SIZE && !od_vector_is_null(r0, n) && corr > 0) {
    od_val16 xr[MAXN];
    int gain_bound;
    gain_bound = OD_SHR(cg - gain_offset, OD_CGAIN_SHIFT);
    /* Perform theta search only if prediction is useful. */
    theta = OD_ROUND32(OD_THETA_SCALE*acos(corr));
    m = od_compute_householder(r16, n, gr, &s, rshift);
    od_apply_householder(xr, x16, r16, n);
    for (i = m; i < n - 1; i++) xr[i] = xr[i + 1];
    /* Search for the best gain within a reasonable range. */
    for (i = OD_MAXI(1, gain_bound - 1); i <= gain_bound + 1; i++) {
      int j;
      od_val32 qcg;
      int ts;
      /* Quantized companded gain */
      qcg = OD_SHL(i, OD_CGAIN_SHIFT) + gain_offset;
      /* Set angular resolution (in ra) to match the encoded gain */
      ts = od_pvq_compute_max_theta(qcg, beta);
      /* Search for the best angle within a reasonable range. */
      for (j = OD_MAXI(0, (int)floor(.5 + theta*OD_THETA_SCALE_1*2/M_PI*ts)
       - 2); j <= OD_MINI(ts - 1, (int)ceil(theta*OD_THETA_SCALE_1*2/M_PI*ts));
       j++) {
        double cos_dist;
        double cost;
        double dist_theta;
        double sin_prod;
        od_val32 qtheta;
        qtheta = od_pvq_compute_theta(j, ts);
        k = od_pvq_compute_k(qcg, j, qtheta, 0, n, beta, robust || is_keyframe);
        sin_prod = od_pvq_sin(theta)*OD_TRIG_SCALE_1*od_pvq_sin(qtheta)*
         OD_TRIG_SCALE_1;
        /* PVQ search, using a gain of qcg*cg*sin(theta)*sin(qtheta) since
           that's the factor by which cos_dist is multiplied to get the
           distortion metric. */
        cos_dist = pvq_search_rdo_double(xr, n - 1, k, y_tmp,
         qcg*(double)cg*sin_prod*OD_CGAIN_SCALE_2);
        /* See Jmspeex' Journal of Dubious Theoretical Results. */
        dist_theta = 2 - 2.*od_pvq_cos(theta - qtheta)*OD_TRIG_SCALE_1
         + sin_prod*(2 - 2*cos_dist);
        dist = gain_weight*(qcg - cg)*(qcg - cg) + qcg*(double)cg*dist_theta;
        dist *= OD_CGAIN_SCALE_2;
        /* Do approximate RDO. */
        cost = dist + lambda*od_pvq_rate(i, icgr, j, ts, adapt, y_tmp, k, n,
         is_keyframe, pli);
        if (cost < best_cost) {
          best_cost = cost;
          best_dist = dist;
          qg = i;
          best_k = k;
          best_qtheta = qtheta;
          *itheta = j;
          *max_theta = ts;
          noref = 0;
          OD_COPY(y, y_tmp, n - 1);
        }
      }
    }
  }
  /* Don't bother with no-reference version if there's a reasonable
     correlation. The only exception is luma on a keyframe because
     H/V prediction is unreliable. */
  if (n <= OD_MAX_PVQ_SIZE &&
   ((is_keyframe && pli == 0) || corr < .5
   || cg < (od_val32)(OD_SHL(2, OD_CGAIN_SHIFT)))) {
    int gain_bound;
    gain_bound = OD_SHR(cg, OD_CGAIN_SHIFT);
    /* Search for the best gain (haven't determined reasonable range yet). */
    for (i = OD_MAXI(1, gain_bound); i <= gain_bound + 1; i++) {
      double cos_dist;
      double cost;
      od_val32 qcg;
      qcg = OD_SHL(i, OD_CGAIN_SHIFT);
      k = od_pvq_compute_k(qcg, -1, -1, 1, n, beta, robust || is_keyframe);
      cos_dist = pvq_search_rdo_double(x16, n, k, y_tmp,
       qcg*(double)cg*OD_CGAIN_SCALE_2);
      /* See Jmspeex' Journal of Dubious Theoretical Results. */
      dist = gain_weight*(qcg - cg)*(qcg - cg)
       + qcg*(double)cg*(2 - 2*cos_dist);
      dist *= OD_CGAIN_SCALE_2;
      /* Do approximate RDO. */
      cost = dist + lambda*od_pvq_rate(i, 0, -1, 0, adapt, y_tmp, k, n,
       is_keyframe, pli);
      if (cost <= best_cost) {
        best_cost = cost;
        best_dist = dist;
        qg = i;
        noref = 1;
        best_k = k;
        *itheta = -1;
        *max_theta = 0;
        OD_COPY(y, y_tmp, n);
      }
    }
  }
  k = best_k;
  theta = best_qtheta;
  skip = 0;
  if (noref) {
    if (qg == 0) skip = OD_PVQ_SKIP_ZERO;
  }
  else {
    if (!is_keyframe && qg == 0) {
      skip = (icgr ? OD_PVQ_SKIP_ZERO : OD_PVQ_SKIP_COPY);
    }
    if (qg == icgr && *itheta == 0 && !cfl_enabled) skip = OD_PVQ_SKIP_COPY;
  }
  /* Synthesize like the decoder would. */
  if (skip) {
    if (skip == OD_PVQ_SKIP_COPY) OD_COPY(out, r0, n);
    else OD_CLEAR(out, n);
  }
  else {
    if (noref) gain_offset = 0;
    g = od_gain_expand(OD_SHL(qg, OD_CGAIN_SHIFT) + gain_offset, q0, beta);
    od_pvq_synthesis_partial(out, y, r16, n, noref, g, theta, m, s,
     qm_inv);
  }
  *vk = k;
  *skip_diff += skip_dist - best_dist;
  /* Encode gain differently depending on whether we use prediction or not.
     Special encoding on inter frames where qg=0 is allowed for noref=0
     but not noref=1.*/
  if (is_keyframe) return noref ? qg : neg_interleave(qg, icgr);
  else return noref ? qg - 1 : neg_interleave(qg + 1, icgr + 1);
}
Beispiel #22
0
/* Indexing for the packed quantization matrices. */
int od_qm_get_index(int bs, int band) {
  /* The -band/3 term is due to the fact that we force corresponding horizontal
     and vertical bands to have the same quantization. */
  OD_ASSERT(bs >= 0 && bs < OD_NBSIZES);
  return bs*(bs + 1) + band - band/3;
}
Beispiel #23
0
int daala_encode_flush_header(daala_enc_ctx *_enc, daala_comment *_dc,
 ogg_packet *_op) {
  daala_info *info = &_enc->state.info;
  if (_enc == NULL || _op == NULL) return OD_EFAULT;
  switch (_enc->packet_state) {
    case OD_PACKET_INFO_HDR:
    {
      int pli;
      oggbyte_reset(&_enc->obb);
      oggbyte_write1(&_enc->obb, 0x80);
      oggbyte_writecopy(&_enc->obb, "daala", 5);
      oggbyte_write1(&_enc->obb, info->version_major);
      oggbyte_write1(&_enc->obb, info->version_minor);
      oggbyte_write1(&_enc->obb, info->version_sub);
      OD_ASSERT(info->pic_width > 0);
      oggbyte_write4(&_enc->obb, info->pic_width);
      OD_ASSERT(info->pic_height > 0);
      oggbyte_write4(&_enc->obb, info->pic_height);
      oggbyte_write4(&_enc->obb, info->pixel_aspect_numerator);
      oggbyte_write4(&_enc->obb, info->pixel_aspect_denominator);
      oggbyte_write4(&_enc->obb, info->timebase_numerator);
      oggbyte_write4(&_enc->obb, info->timebase_denominator);
      oggbyte_write4(&_enc->obb, info->frame_duration);
      OD_ASSERT(info->keyframe_granule_shift < 32);
      oggbyte_write1(&_enc->obb, info->keyframe_granule_shift);
      OD_ASSERT((info->nplanes >= 1) && (info->nplanes <= OD_NPLANES_MAX));
      oggbyte_write1(&_enc->obb, info->nplanes);
      for (pli = 0; pli < info->nplanes; ++pli) {
        oggbyte_write1(&_enc->obb, info->plane_info[pli].xdec);
        oggbyte_write1(&_enc->obb, info->plane_info[pli].ydec);
      }
      _op->b_o_s = 1;
    }
    break;
    case OD_PACKET_COMMENT_HDR:
    {
      const char *vendor;
      uint32_t  vendor_len;
      int           ci;
      oggbyte_reset(&_enc->obb);
      oggbyte_write1(&_enc->obb, 0x81);
      oggbyte_writecopy(&_enc->obb, "daala", 5);
      vendor = daala_version_string();
      vendor_len = strlen(vendor);
      oggbyte_write4(&_enc->obb, vendor_len);
      oggbyte_writecopy(&_enc->obb, vendor, vendor_len);
      oggbyte_write4(&_enc->obb, _dc->comments);
      for (ci = 0; ci < _dc->comments; ci++) {
        if (_dc->user_comments[ci] != NULL) {
          oggbyte_write4(&_enc->obb, _dc->comment_lengths[ci]);
          oggbyte_writecopy(&_enc->obb, _dc->user_comments[ci],
           _dc->comment_lengths[ci]);
        }
        else oggbyte_write4(&_enc->obb, 0);
      }
      _op->b_o_s = 0;
    }
    break;
    case OD_PACKET_SETUP_HDR:
    {
      /*TODO: Produce header contents.*/
      oggbyte_reset(&_enc->obb);
      oggbyte_write1(&_enc->obb, 0x82);
      oggbyte_writecopy(&_enc->obb, "daala", 5);
      _op->b_o_s = 0;
    }
    break;
    /*No more headers to emit.*/
    default: return 0;
  }
  /*This is kind of fugly: we hand the user a buffer which they do not own.
    We will overwrite it when the next packet is output, so the user better be
     done with it by then.
    Vorbis is little better: it hands back buffers that it will free the next
     time the headers are requested, or when the encoder is cleared.
    Hopefully libogg2 will make this much cleaner.*/
  _op->packet = oggbyte_get_buffer(&_enc->obb);
  _op->bytes = oggbyte_bytes(&_enc->obb);
  _op->e_o_s = 0;
  _op->granulepos = 0;
  /*Is this smart? Vorbis does not even set this field.*/
  _op->packetno = 0;
  return ++_enc->packet_state+3;
}
Beispiel #24
0
void oggbyte_writetrunc(oggbyte_buffer *_b, ptrdiff_t _bytes) {
  OD_ASSERT(_bytes >= 0);
  _b->ptr = _b->buf + _bytes;
}