예제 #1
0
/*420jpeg chroma samples are sited like:
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |   BR  |       |   BR  |
  |       |       |       |
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |   BR  |       |   BR  |
  |       |       |       |
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |

  420mpeg2 chroma samples are sited like:
  Y-------Y-------Y-------Y-------
  |       |       |       |
  BR      |       BR      |
  |       |       |       |
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |
  Y-------Y-------Y-------Y-------
  |       |       |       |
  BR      |       BR      |
  |       |       |       |
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |

  We use a resampling filter to shift the site locations one quarter pixel (at
   the chroma plane's resolution) to the right.
  The 4:2:2 modes look exactly the same, except there are twice as many chroma
   lines, and they are vertically co-sited with the luma samples in both the
   mpeg2 and jpeg cases (thus requiring no vertical resampling).*/
static void y4m_42xmpeg2_42xjpeg_helper(unsigned char *_dst,
 const unsigned char *_src,int _c_w,int _c_h){
  int y;
  int x;
  for(y=0;y<_c_h;y++){
    /*Filter: [4 -17 114 35 -9 1]/128, derived from a 6-tap Lanczos
       window.*/
    for(x=0;x<OC_MINI(_c_w,2);x++){
      _dst[x]=(unsigned char)OC_CLAMPI(0,(4*_src[0]-17*_src[OC_MAXI(x-1,0)]+
       114*_src[x]+35*_src[OC_MINI(x+1,_c_w-1)]-9*_src[OC_MINI(x+2,_c_w-1)]+
       _src[OC_MINI(x+3,_c_w-1)]+64)>>7,255);
    }
    for(;x<_c_w-3;x++){
      _dst[x]=(unsigned char)OC_CLAMPI(0,(4*_src[x-2]-17*_src[x-1]+
       114*_src[x]+35*_src[x+1]-9*_src[x+2]+_src[x+3]+64)>>7,255);
    }
    for(;x<_c_w;x++){
      _dst[x]=(unsigned char)OC_CLAMPI(0,(4*_src[x-2]-17*_src[x-1]+
       114*_src[x]+35*_src[OC_MINI(x+1,_c_w-1)]-9*_src[OC_MINI(x+2,_c_w-1)]+
       _src[_c_w-1]+64)>>7,255);
    }
    _dst+=_c_w;
    _src+=_c_w;
  }
}
예제 #2
0
static size_t oc_huff_tree_collapse_size(oc_huff_node *_binode,int _depth){
  size_t size;
  int    mindepth;
  int    depth;
  int    loccupancy;
  int    occupancy;
  if(_binode->nbits!=0&&_depth>0){
    return oc_huff_tree_collapse_size(_binode->nodes[0],_depth-1)+
     oc_huff_tree_collapse_size(_binode->nodes[1],_depth-1);
  }
  depth=mindepth=oc_huff_tree_mindepth(_binode);
  occupancy=1<<mindepth;
  do{
    loccupancy=occupancy;
    occupancy=oc_huff_tree_occupancy(_binode,++depth);
  }
  while(occupancy>loccupancy&&occupancy>=1<<OC_MAXI(depth-OC_HUFF_SLUSH,0));
  depth--;
  size=oc_huff_node_size(depth);
  if(depth>0){
    size+=oc_huff_tree_collapse_size(_binode->nodes[0],depth-1);
    size+=oc_huff_tree_collapse_size(_binode->nodes[1],depth-1);
  }
  return size;
}
예제 #3
0
파일: rate.c 프로젝트: Asvarox/Unvanquished
void oc_enc_calc_lambda(oc_enc_ctx *_enc,int _qti){
  ogg_int64_t lq;
  int         qi;
  int         qi1;
  int         nqis;
  /*For now, lambda is fixed depending on the qi value and frame type:
      lambda=qscale*(qavg[qti][qi]**2),
     where qscale=0.2125.
    This was derived by exhaustively searching for the optimal quantizer for
     the AC coefficients in each block from a number of test sequences for a
     number of fixed lambda values and fitting the peaks of the resulting
     histograms (on the log(qavg) scale).
    The same model applies to both inter and intra frames.
    A more adaptive scheme might perform better.*/
  qi=_enc->state.qis[0];
  /*If rate control is active, use the lambda for the _target_ quantizer.
    This allows us to scale to rates slightly lower than we'd normally be able
     to reach, and give the rate control a semblance of "fractional qi"
     precision.
    TODO: Add API for changing QI, and allow extra precision.*/
  if(_enc->state.info.target_bitrate>0)lq=_enc->rc.log_qtarget;
  else lq=_enc->log_qavg[_qti][qi];
  /*The resulting lambda value is less than 0x500000.*/
  _enc->lambda=(int)oc_bexp64(2*lq-0x4780BD468D6B62BLL);
  /*Select additional quantizers.
    The R-D optimal block AC quantizer statistics suggest that the distribution
     is roughly Gaussian-like with a slight positive skew.
    K-means clustering on log_qavg to select 3 quantizers produces cluster
     centers of {log_qavg-0.6,log_qavg,log_qavg+0.7}.
    Experiments confirm these are relatively good choices.

    Although we do greedy R-D optimization of the qii flags to avoid switching
     too frequently, this becomes ineffective at low rates, either because we
     do a poor job of predicting the actual R-D cost, or the greedy
     optimization is not sufficient.
    Therefore adaptive quantization is disabled above an (experimentally
     suggested) threshold of log_qavg=7.00 (e.g., below INTRA qi=12 or
     INTER qi=20 with current matrices).
    This may need to be revised if the R-D cost estimation or qii flag
     optimization strategies change.*/
  nqis=1;
  if(lq<(OC_Q57(56)>>3)&&!_enc->vp3_compatible&&
   _enc->sp_level<OC_SP_LEVEL_FAST_ANALYSIS){
    qi1=oc_enc_find_qi_for_target(_enc,_qti,OC_MAXI(qi-1,0),0,
     lq+(OC_Q57(7)+5)/10);
    if(qi1!=qi)_enc->state.qis[nqis++]=qi1;
    qi1=oc_enc_find_qi_for_target(_enc,_qti,OC_MINI(qi+1,63),0,
     lq-(OC_Q57(6)+5)/10);
    if(qi1!=qi&&qi1!=_enc->state.qis[nqis-1])_enc->state.qis[nqis++]=qi1;
  }
  _enc->state.nqis=nqis;
}
예제 #4
0
파일: rate.c 프로젝트: Asvarox/Unvanquished
/*Re-initialize the Bessel filter coefficients with the specified delay.
  This does not alter the x/y state, but changes the reaction time of the
   filter.
  Altering the time constant of a reactive filter without alterning internal
   state is something that has to be done carefuly, but our design operates at
   high enough delays and with small enough time constant changes to make it
   safe.*/
static void oc_iir_filter_reinit(oc_iir_filter *_f,int _delay){
  int         alpha;
  ogg_int64_t one48;
  ogg_int64_t warp;
  ogg_int64_t k1;
  ogg_int64_t k2;
  ogg_int64_t d;
  ogg_int64_t a;
  ogg_int64_t ik2;
  ogg_int64_t b1;
  ogg_int64_t b2;
  /*This borrows some code from an unreleased version of Postfish.
    See the recipe at http://unicorn.us.com/alex/2polefilters.html for details
     on deriving the filter coefficients.*/
  /*alpha is Q24*/
  alpha=(1<<24)/_delay;
  one48=(ogg_int64_t)1<<48;
  /*warp is 7.12*/
  warp=OC_MAXI(oc_warp_alpha(alpha),1);
  /*k1 is 9.12*/
  k1=3*warp;
  /*k2 is 16.24.*/
  k2=k1*warp;
  /*d is 16.15.*/
  d=((1<<12)+k1<<12)+k2+256>>9;
  /*a is 0.32, since d is larger than both 1.0 and k2.*/
  a=(k2<<23)/d;
  /*ik2 is 25.24.*/
  ik2=one48/k2;
  /*b1 is Q56; in practice, the integer ranges between -2 and 2.*/
  b1=2*a*(ik2-(1<<24));
  /*b2 is Q56; in practice, the integer ranges between -2 and 2.*/
  b2=(one48<<8)-(4*a<<24)-b1;
  /*All of the filter parameters are Q24.*/
  _f->c[0]=(ogg_int32_t)(b1+((ogg_int64_t)1<<31)>>32);
  _f->c[1]=(ogg_int32_t)(b2+((ogg_int64_t)1<<31)>>32);
  _f->g=(ogg_int32_t)(a+128>>8);
}
예제 #5
0
/*Finds the largest complete sub-tree rooted at the current node and collapses
   it into a single node.
  This procedure is then applied recursively to all the children of that node.
  _binode: The root of the sub-tree to collapse.
           _binode->nbits must be 0 or 1.
  Return: The new root of the collapsed sub-tree.*/
static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode,
 char **_storage){
  oc_huff_node *root;
  size_t        size;
  int           mindepth;
  int           depth;
  int           loccupancy;
  int           occupancy;
  depth=mindepth=oc_huff_tree_mindepth(_binode);
  occupancy=1<<mindepth;
  do{
    loccupancy=occupancy;
    occupancy=oc_huff_tree_occupancy(_binode,++depth);
  }
  while(occupancy>loccupancy&&occupancy>=1<<OC_MAXI(depth-OC_HUFF_SLUSH,0));
  depth--;
  if(depth<=1)return oc_huff_tree_copy(_binode,_storage);
  size=oc_huff_node_size(depth);
  root=oc_huff_node_init(_storage,size,depth);
  root->depth=_binode->depth;
  oc_huff_node_fill(root->nodes,_binode,depth,depth,_storage);
  return root;
}
예제 #6
0
/*Encodes a description of the given Huffman tables.
  Although the codes are stored in the encoder as flat arrays, in the bit
   stream and in the decoder they are structured as a tree.
  This function recovers the tree structure from the flat array and then
   writes it out.
  Note that the codes MUST form a Huffman code, and not merely a prefix-free
   code, since the binary tree is assumed to be full.
  _opb:   The buffer to store the tree in.
  _codes: The Huffman tables to pack.
  Return: 0 on success, or a negative value if one of the given Huffman tables
   does not form a full, prefix-free code.*/
int oc_huff_codes_pack(oggpack_buffer *_opb,
 const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]){
  int i;
  for(i=0;i<TH_NHUFFMAN_TABLES;i++){
    oc_huff_entry entries[TH_NDCT_TOKENS];
    int           bpos;
    int           maxlen;
    int           mask;
    int           j;
    /*First, find the maximum code length so we can align all the bit
       patterns.*/
    maxlen=_codes[i][0].nbits;
    for(j=1;j<TH_NDCT_TOKENS;j++)maxlen=OC_MAXI(_codes[i][j].nbits,maxlen);
    /*It's improbable that a code with more than 32 bits could pass the
       validation below, but abort early in any case.*/
    if(maxlen>32)return TH_EINVAL;
    mask=(1<<(maxlen>>1)<<(maxlen+1>>1))-1;
    /*Copy over the codes into our temporary workspace.
      The bit patterns are aligned, and the original entry each code is from
       is stored as well.*/
    for(j=0;j<TH_NDCT_TOKENS;j++){
      entries[j].shift=maxlen-_codes[i][j].nbits;
      entries[j].pattern=_codes[i][j].pattern<<entries[j].shift&mask;
      entries[j].token=j;
    }
    /*Sort the codes into ascending order.
      This is the order the leaves of the tree will be traversed.*/
    qsort(entries,TH_NDCT_TOKENS,sizeof(entries[0]),huff_entry_cmp);
    /*For each leaf of the tree:*/
    bpos=maxlen;
    for(j=0;j<TH_NDCT_TOKENS;j++){
      ogg_uint32_t bit;
      /*Fail if this code has no bits at all.
        Technically a codebook with a single 0-bit entry is legal, but the
         encoder currently does not support codebooks which do not contain all
         the tokens.*/
      if(entries[j].shift>=maxlen)return TH_EINVAL;
      /*Descend into the tree, writing a bit for each branch.*/
      for(;bpos>entries[j].shift;bpos--)oggpackB_write(_opb,0,1);
      /*Mark this as a leaf node, and write its value.*/
      oggpackB_write(_opb,1,1);
      oggpackB_write(_opb,entries[j].token,5);
      /*For each 1 branch we've descended, back up the tree until we reach a
         0 branch.*/
      bit=(ogg_uint32_t)1<<bpos;
      for(;entries[j].pattern&bit;bpos++)bit<<=1;
      /*Validate the code.*/
      if(j+1<TH_NDCT_TOKENS){
        mask=~(bit-1)<<1;
        /*The next entry should have a 1 bit where we had a 0, and should
           match our code above that bit.
          This verifies both fullness and prefix-freeness simultaneously.*/
        if(!(entries[j+1].pattern&bit)||
         (entries[j].pattern&mask)!=(entries[j+1].pattern&mask)){
          return TH_EINVAL;
        }
      }
      /*If there are no more codes, we should have ascended back to the top
         of the tree.*/
      else if(bpos<maxlen)return TH_EINVAL;
    }
  }
  return 0;
}
예제 #7
0
/*Perform a motion vector search for this macro block against a single
   reference frame.
  As a bonus, individual block motion vectors are computed as well, as much of
   the work can be shared.
  The actual motion vector is stored in the appropriate place in the
   oc_mb_enc_info structure.
  _accum:      Drop frame/golden MV accumulators.
  _mbi:        The macro block index.
  _frame:      The frame to use for SATD calculations and refinement,
                either OC_FRAME_PREV or OC_FRAME_GOLD.
  _frame_full: The frame to perform the 1px search on, one of OC_FRAME_PREV,
                OC_FRAME_GOLD, OC_FRAME_PREV_ORIG, or OC_FRAME_GOLD_ORIG.*/
void oc_mcenc_search_frame(oc_enc_ctx *_enc,oc_mv _accum,int _mbi,int _frame,
 int _frame_full){
  /*Note: Traditionally this search is done using a rate-distortion objective
     function of the form D+lambda*R.
    However, xiphmont tested this and found it produced a small degredation,
     while requiring extra computation.
    This is most likely due to Theora's peculiar MV encoding scheme: MVs are
     not coded relative to a predictor, and the only truly cheap way to use a
     MV is in the LAST or LAST2 MB modes, which are not being considered here.
    Therefore if we use the MV found here, it's only because both LAST and
     LAST2 performed poorly, and therefore the MB is not likely to be uniform
     or suffer from the aperture problem.
    Furthermore we would like to re-use the MV found here for as many MBs as
     possible, so picking a slightly sub-optimal vector to save a bit or two
     may cause increased degredation in many blocks to come.
    We could artificially reduce lambda to compensate, but it's faster to just
     disable it entirely, and use D (the distortion) as the sole criterion.*/
  oc_mcenc_ctx         mcenc;
  const ptrdiff_t     *frag_buf_offs;
  const ptrdiff_t     *fragis;
  const unsigned char *src;
  const unsigned char *ref;
  const unsigned char *satd_ref;
  int                  ystride;
  oc_mb_enc_info      *embs;
  ogg_int32_t          hit_cache[31];
  ogg_int32_t          hitbit;
  unsigned             best_block_err[4];
  unsigned             block_err[4];
  unsigned             best_err;
  int                  best_vec[2];
  int                  best_block_vec[4][2];
  int                  candx;
  int                  candy;
  int                  bi;
  embs=_enc->mb_info;
  /*Find some candidate motion vectors.*/
  oc_mcenc_find_candidates(_enc,&mcenc,_accum,_mbi,_frame);
  /*Clear the cache of locations we've examined.*/
  memset(hit_cache,0,sizeof(hit_cache));
  /*Start with the median predictor.*/
  candx=mcenc.candidates[0][0];
  candy=mcenc.candidates[0][1];
  hit_cache[candy+15]|=(ogg_int32_t)1<<candx+15;
  frag_buf_offs=_enc->state.frag_buf_offs;
  fragis=_enc->state.mb_maps[_mbi][0];
  src=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_IO]];
  ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_frame_full]];
  satd_ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_frame]];
  ystride=_enc->state.ref_ystride[0];
  /*TODO: customize error function for speed/(quality+size) tradeoff.*/
  best_err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
   frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
  best_vec[0]=candx;
  best_vec[1]=candy;
  if(_frame==OC_FRAME_PREV){
    for(bi=0;bi<4;bi++){
      best_block_err[bi]=block_err[bi];
      best_block_vec[bi][0]=candx;
      best_block_vec[bi][1]=candy;
    }
  }
  /*If this predictor fails, move on to set A.*/
  if(best_err>OC_YSAD_THRESH1){
    unsigned err;
    unsigned t2;
    int      ncs;
    int      ci;
    /*Compute the early termination threshold for set A.*/
    t2=embs[_mbi].error[_frame];
    ncs=OC_MINI(3,embs[_mbi].ncneighbors);
    for(ci=0;ci<ncs;ci++){
      t2=OC_MAXI(t2,embs[embs[_mbi].cneighbors[ci]].error[_frame]);
    }
    t2+=(t2>>OC_YSAD_THRESH2_SCALE_BITS)+OC_YSAD_THRESH2_OFFSET;
    /*Examine the candidates in set A.*/
    for(ci=1;ci<mcenc.setb0;ci++){
      candx=mcenc.candidates[ci][0];
      candy=mcenc.candidates[ci][1];
      /*If we've already examined this vector, then we would be using it if it
         was better than what we are using.*/
      hitbit=(ogg_int32_t)1<<candx+15;
      if(hit_cache[candy+15]&hitbit)continue;
      hit_cache[candy+15]|=hitbit;
      err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
       frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
      if(err<best_err){
        best_err=err;
        best_vec[0]=candx;
        best_vec[1]=candy;
      }
      if(_frame==OC_FRAME_PREV){
        for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
          best_block_err[bi]=block_err[bi];
          best_block_vec[bi][0]=candx;
          best_block_vec[bi][1]=candy;
        }
      }
    }
    if(best_err>t2){
      /*Examine the candidates in set B.*/
      for(;ci<mcenc.ncandidates;ci++){
        candx=mcenc.candidates[ci][0];
        candy=mcenc.candidates[ci][1];
        hitbit=(ogg_int32_t)1<<candx+15;
        if(hit_cache[candy+15]&hitbit)continue;
        hit_cache[candy+15]|=hitbit;
        err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
         frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
        if(err<best_err){
          best_err=err;
          best_vec[0]=candx;
          best_vec[1]=candy;
        }
        if(_frame==OC_FRAME_PREV){
          for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
            best_block_err[bi]=block_err[bi];
            best_block_vec[bi][0]=candx;
            best_block_vec[bi][1]=candy;
          }
        }
      }
      /*Use the same threshold for set B as in set A.*/
      if(best_err>t2){
        int best_site;
        int nsites;
        int sitei;
        int site;
        int b;
        /*Square pattern search.*/
        for(;;){
          best_site=4;
          /*Compose the bit flags for boundary conditions.*/
          b=OC_DIV16(-best_vec[0]+1)|OC_DIV16(best_vec[0]+1)<<1|
           OC_DIV16(-best_vec[1]+1)<<2|OC_DIV16(best_vec[1]+1)<<3;
          nsites=OC_SQUARE_NSITES[b];
          for(sitei=0;sitei<nsites;sitei++){
            site=OC_SQUARE_SITES[b][sitei];
            candx=best_vec[0]+OC_SQUARE_DX[site];
            candy=best_vec[1]+OC_SQUARE_DY[site];
            hitbit=(ogg_int32_t)1<<candx+15;
            if(hit_cache[candy+15]&hitbit)continue;
            hit_cache[candy+15]|=hitbit;
            err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
             frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
            if(err<best_err){
              best_err=err;
              best_site=site;
            }
            if(_frame==OC_FRAME_PREV){
              for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
                best_block_err[bi]=block_err[bi];
                best_block_vec[bi][0]=candx;
                best_block_vec[bi][1]=candy;
              }
            }
          }
          if(best_site==4)break;
          best_vec[0]+=OC_SQUARE_DX[best_site];
          best_vec[1]+=OC_SQUARE_DY[best_site];
        }
        /*Final 4-MV search.*/
        /*Simply use 1/4 of the macro block set A and B threshold as the
           individual block threshold.*/
        if(_frame==OC_FRAME_PREV){
          t2>>=2;
          for(bi=0;bi<4;bi++){
            if(best_block_err[bi]>t2){
              /*Square pattern search.
                We do this in a slightly interesting manner.
                We continue to check the SAD of all four blocks in the
                 macro block.
                This gives us two things:
                 1) We can continue to use the hit_cache to avoid duplicate
                     checks.
                    Otherwise we could continue to read it, but not write to it
                     without saving and restoring it for each block.
                    Note that we could still eliminate a large number of
                     duplicate checks by taking into account the site we came
                     from when choosing the site list.
                    We can still do that to avoid extra hit_cache queries, and
                     it might even be a speed win.
                 2) It gives us a slightly better chance of escaping local
                     minima.
                    We would not be here if we weren't doing a fairly bad job
                     in finding a good vector, and checking these vectors can
                     save us from 100 to several thousand points off our SAD 1
                     in 15 times.
                TODO: Is this a good idea?
                Who knows.
                It needs more testing.*/
              for(;;){
                int bestx;
                int besty;
                int bj;
                bestx=best_block_vec[bi][0];
                besty=best_block_vec[bi][1];
                /*Compose the bit flags for boundary conditions.*/
                b=OC_DIV16(-bestx+1)|OC_DIV16(bestx+1)<<1|
                 OC_DIV16(-besty+1)<<2|OC_DIV16(besty+1)<<3;
                nsites=OC_SQUARE_NSITES[b];
                for(sitei=0;sitei<nsites;sitei++){
                  site=OC_SQUARE_SITES[b][sitei];
                  candx=bestx+OC_SQUARE_DX[site];
                  candy=besty+OC_SQUARE_DY[site];
                  hitbit=(ogg_int32_t)1<<candx+15;
                  if(hit_cache[candy+15]&hitbit)continue;
                  hit_cache[candy+15]|=hitbit;
                  err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
                   frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
                  if(err<best_err){
                    best_err=err;
                    best_vec[0]=candx;
                    best_vec[1]=candy;
                  }
                  for(bj=0;bj<4;bj++)if(block_err[bj]<best_block_err[bj]){
                    best_block_err[bj]=block_err[bj];
                    best_block_vec[bj][0]=candx;
                    best_block_vec[bj][1]=candy;
                  }
                }
                if(best_block_vec[bi][0]==bestx&&best_block_vec[bi][1]==besty){
                  break;
                }
              }
            }
          }
        }
      }
예제 #8
0
void oc_quant_params_pack(oggpack_buffer *_opb,const th_quant_info *_qinfo){
  const th_quant_ranges *qranges;
  const th_quant_base   *base_mats[2*3*64];
  int                    indices[2][3][64];
  int                    nbase_mats;
  int                    nbits;
  int                    ci;
  int                    qi;
  int                    qri;
  int                    qti;
  int                    pli;
  int                    qtj;
  int                    plj;
  int                    bmi;
  int                    i;
  i=_qinfo->loop_filter_limits[0];
  for(qi=1;qi<64;qi++)i=OC_MAXI(i,_qinfo->loop_filter_limits[qi]);
  nbits=OC_ILOG_32(i);
  oggpackB_write(_opb,nbits,3);
  for(qi=0;qi<64;qi++){
    oggpackB_write(_opb,_qinfo->loop_filter_limits[qi],nbits);
  }
  /*580 bits for VP3.*/
  i=1;
  for(qi=0;qi<64;qi++)i=OC_MAXI(_qinfo->ac_scale[qi],i);
  nbits=OC_ILOGNZ_32(i);
  oggpackB_write(_opb,nbits-1,4);
  for(qi=0;qi<64;qi++)oggpackB_write(_opb,_qinfo->ac_scale[qi],nbits);
  /*516 bits for VP3.*/
  i=1;
  for(qi=0;qi<64;qi++)i=OC_MAXI(_qinfo->dc_scale[qi],i);
  nbits=OC_ILOGNZ_32(i);
  oggpackB_write(_opb,nbits-1,4);
  for(qi=0;qi<64;qi++)oggpackB_write(_opb,_qinfo->dc_scale[qi],nbits);
  /*Consolidate any duplicate base matrices.*/
  nbase_mats=0;
  for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
    qranges=_qinfo->qi_ranges[qti]+pli;
    for(qri=0;qri<=qranges->nranges;qri++){
      for(bmi=0;;bmi++){
        if(bmi>=nbase_mats){
          base_mats[bmi]=qranges->base_matrices+qri;
          indices[qti][pli][qri]=nbase_mats++;
          break;
        }
        else if(memcmp(base_mats[bmi][0],qranges->base_matrices[qri],
         sizeof(base_mats[bmi][0]))==0){
          indices[qti][pli][qri]=bmi;
          break;
        }
      }
    }
  }
  /*Write out the list of unique base matrices.
    1545 bits for VP3 matrices.*/
  oggpackB_write(_opb,nbase_mats-1,9);
  for(bmi=0;bmi<nbase_mats;bmi++){
    for(ci=0;ci<64;ci++)oggpackB_write(_opb,base_mats[bmi][0][ci],8);
  }
  /*Now store quant ranges and their associated indices into the base matrix
     list.
    46 bits for VP3 matrices.*/
  nbits=OC_ILOG_32(nbase_mats-1);
  for(i=0;i<6;i++){
    qti=i/3;
    pli=i%3;
    qranges=_qinfo->qi_ranges[qti]+pli;
    if(i>0){
      if(qti>0){
        if(qranges->nranges==_qinfo->qi_ranges[qti-1][pli].nranges&&
         memcmp(qranges->sizes,_qinfo->qi_ranges[qti-1][pli].sizes,
         qranges->nranges*sizeof(qranges->sizes[0]))==0&&
         memcmp(indices[qti][pli],indices[qti-1][pli],
         (qranges->nranges+1)*sizeof(indices[qti][pli][0]))==0){
          oggpackB_write(_opb,1,2);
          continue;
        }
      }
      qtj=(i-1)/3;
      plj=(i-1)%3;
      if(qranges->nranges==_qinfo->qi_ranges[qtj][plj].nranges&&
       memcmp(qranges->sizes,_qinfo->qi_ranges[qtj][plj].sizes,
       qranges->nranges*sizeof(qranges->sizes[0]))==0&&
       memcmp(indices[qti][pli],indices[qtj][plj],
       (qranges->nranges+1)*sizeof(indices[qti][pli][0]))==0){
        oggpackB_write(_opb,0,1+(qti>0));
        continue;
      }
      oggpackB_write(_opb,1,1);
    }
    oggpackB_write(_opb,indices[qti][pli][0],nbits);
    for(qi=qri=0;qi<63;qri++){
      oggpackB_write(_opb,qranges->sizes[qri]-1,OC_ILOG_32(62-qi));
      qi+=qranges->sizes[qri];
      oggpackB_write(_opb,indices[qti][pli][qri+1],nbits);
    }
  }
}
예제 #9
0
파일: rate.c 프로젝트: Asvarox/Unvanquished
void oc_enc_rc_resize(oc_enc_ctx *_enc){
  /*If encoding has not yet begun, reset the buffer state.*/
  if(_enc->state.curframe_num<0)oc_enc_rc_reset(_enc);
  else{
    int idt;
    /*Otherwise, update the bounds on the buffer, but not the current
       fullness.*/
    _enc->rc.bits_per_frame=(_enc->state.info.target_bitrate*
     (ogg_int64_t)_enc->state.info.fps_denominator)/
     _enc->state.info.fps_numerator;
    /*Insane framerates or frame sizes mean insane bitrates.
      Let's not get carried away.*/
    if(_enc->rc.bits_per_frame>0x400000000000LL){
      _enc->rc.bits_per_frame=(ogg_int64_t)0x400000000000LL;
    }
    else if(_enc->rc.bits_per_frame<32)_enc->rc.bits_per_frame=32;
    _enc->rc.buf_delay=OC_MAXI(_enc->rc.buf_delay,12);
    _enc->rc.max=_enc->rc.bits_per_frame*_enc->rc.buf_delay;
    _enc->rc.target=(_enc->rc.max+1>>1)+(_enc->rc.bits_per_frame+2>>2)*
     OC_MINI(_enc->keyframe_frequency_force,_enc->rc.buf_delay);
    /*Update the INTER-frame scale filter delay.
      We jump to it immediately if we've already seen enough frames; otherwise
       it is simply set as the new target.*/
    _enc->rc.inter_delay_target=idt=OC_MAXI(_enc->rc.buf_delay>>1,10);
    if(idt<OC_MINI(_enc->rc.inter_delay,_enc->rc.inter_count)){
      oc_iir_filter_init(&_enc->rc.scalefilter[1],idt,
       _enc->rc.scalefilter[1].y[0]);
      _enc->rc.inter_delay=idt;
    }
  }
  /*If we're in pass-2 mode, make sure the frame metrics array is big enough
     to hold frame statistics for the full buffer.*/
  if(_enc->rc.twopass==2){
    int cfm;
    int buf_delay;
    int reset_window;
    buf_delay=_enc->rc.buf_delay;
    reset_window=_enc->rc.frame_metrics==NULL&&(_enc->rc.frames_total[0]==0||
     buf_delay<_enc->rc.frames_total[0]+_enc->rc.frames_total[1]
     +_enc->rc.frames_total[2]);
    cfm=_enc->rc.cframe_metrics;
    /*Only try to resize the frame metrics buffer if a) it's too small and
       b) we were using a finite buffer, or are about to start.*/
    if(cfm<buf_delay&&(_enc->rc.frame_metrics!=NULL||reset_window)){
      oc_frame_metrics *fm;
      int               nfm;
      int               fmh;
      fm=(oc_frame_metrics *)_ogg_realloc(_enc->rc.frame_metrics,
       buf_delay*sizeof(*_enc->rc.frame_metrics));
      if(fm==NULL){
        /*We failed to allocate a finite buffer.*/
        /*If we don't have a valid 2-pass header yet, just return; we'll reset
           the buffer size when we read the header.*/
        if(_enc->rc.frames_total[0]==0)return;
        /*Otherwise revert to the largest finite buffer previously set, or to
           whole-file buffering if we were still using that.*/
        _enc->rc.buf_delay=_enc->rc.frame_metrics!=NULL?
         cfm:_enc->rc.frames_total[0]+_enc->rc.frames_total[1]
         +_enc->rc.frames_total[2];
        oc_enc_rc_resize(_enc);
        return;
      }
      _enc->rc.frame_metrics=fm;
      _enc->rc.cframe_metrics=buf_delay;
      /*Re-organize the circular buffer.*/
      fmh=_enc->rc.frame_metrics_head;
      nfm=_enc->rc.nframe_metrics;
      if(fmh+nfm>cfm){
        int shift;
        shift=OC_MINI(fmh+nfm-cfm,buf_delay-cfm);
        memcpy(fm+cfm,fm,OC_MINI(fmh+nfm-cfm,buf_delay-cfm)*sizeof(*fm));
        if(fmh+nfm>buf_delay)memmove(fm,fm+shift,fmh+nfm-buf_delay);
      }
    }
    /*We were using whole-file buffering; now we're not.*/
    if(reset_window){
      _enc->rc.nframes[0]=_enc->rc.nframes[1]=_enc->rc.nframes[2]=0;
      _enc->rc.scale_sum[0]=_enc->rc.scale_sum[1]=0;
      _enc->rc.scale_window_end=_enc->rc.scale_window0=
       _enc->state.curframe_num+_enc->prev_dup_count+1;
      if(_enc->rc.twopass_buffer_bytes){
        int qti;
        /*We already read the metrics for the first frame in the window.*/
        *(_enc->rc.frame_metrics)=*&_enc->rc.cur_metrics;
        _enc->rc.nframe_metrics++;
        qti=_enc->rc.cur_metrics.frame_type;
        _enc->rc.nframes[qti]++;
        _enc->rc.nframes[2]+=_enc->rc.cur_metrics.dup_count;
        _enc->rc.scale_sum[qti]+=oc_bexp_q24(_enc->rc.cur_metrics.log_scale);
        _enc->rc.scale_window_end+=_enc->rc.cur_metrics.dup_count+1;
        if(_enc->rc.scale_window_end-_enc->rc.scale_window0<buf_delay){
          /*We need more frame data.*/
          _enc->rc.twopass_buffer_bytes=0;
        }
      }
    }
    /*Otherwise, we could shrink the size of the current window, if necessary,
       but leaving it like it is lets us adapt to the new buffer size more
       gracefully.*/
  }
}
예제 #10
0
파일: rate.c 프로젝트: Asvarox/Unvanquished
static void oc_enc_rc_reset(oc_enc_ctx *_enc){
  ogg_int64_t npixels;
  ogg_int64_t ibpp;
  int         inter_delay;
  /*TODO: These parameters should be exposed in a th_encode_ctl() API.*/
  _enc->rc.bits_per_frame=(_enc->state.info.target_bitrate*
   (ogg_int64_t)_enc->state.info.fps_denominator)/
   _enc->state.info.fps_numerator;
  /*Insane framerates or frame sizes mean insane bitrates.
    Let's not get carried away.*/
  if(_enc->rc.bits_per_frame>0x400000000000LL){
    _enc->rc.bits_per_frame=(ogg_int64_t)0x400000000000LL;
  }
  else if(_enc->rc.bits_per_frame<32)_enc->rc.bits_per_frame=32;
  _enc->rc.buf_delay=OC_MAXI(_enc->rc.buf_delay,12);
  _enc->rc.max=_enc->rc.bits_per_frame*_enc->rc.buf_delay;
  /*Start with a buffer fullness of 50% plus 25% of the amount we plan to spend
     on a single keyframe interval.
    We can require fully half the bits in an interval for a keyframe, so this
     initial level gives us maximum flexibility for over/under-shooting in
     subsequent frames.*/
  _enc->rc.target=(_enc->rc.max+1>>1)+(_enc->rc.bits_per_frame+2>>2)*
   OC_MINI(_enc->keyframe_frequency_force,_enc->rc.buf_delay);
  _enc->rc.fullness=_enc->rc.target;
  /*Pick exponents and initial scales for quantizer selection.*/
  npixels=_enc->state.info.frame_width*
   (ogg_int64_t)_enc->state.info.frame_height;
  _enc->rc.log_npixels=oc_blog64(npixels);
  ibpp=npixels/_enc->rc.bits_per_frame;
  if(ibpp<1){
    _enc->rc.exp[0]=59;
    _enc->rc.log_scale[0]=oc_blog64(1997)-OC_Q57(8);
  }
  else if(ibpp<2){
    _enc->rc.exp[0]=55;
    _enc->rc.log_scale[0]=oc_blog64(1604)-OC_Q57(8);
  }
  else{
    _enc->rc.exp[0]=48;
    _enc->rc.log_scale[0]=oc_blog64(834)-OC_Q57(8);
  }
  if(ibpp<4){
    _enc->rc.exp[1]=100;
    _enc->rc.log_scale[1]=oc_blog64(2249)-OC_Q57(8);
  }
  else if(ibpp<8){
    _enc->rc.exp[1]=95;
    _enc->rc.log_scale[1]=oc_blog64(1751)-OC_Q57(8);
  }
  else{
    _enc->rc.exp[1]=73;
    _enc->rc.log_scale[1]=oc_blog64(1260)-OC_Q57(8);
  }
  _enc->rc.prev_drop_count=0;
  _enc->rc.log_drop_scale=OC_Q57(0);
  /*Set up second order followers, initialized according to corresponding
     time constants.*/
  oc_iir_filter_init(&_enc->rc.scalefilter[0],4,
   oc_q57_to_q24(_enc->rc.log_scale[0]));
  inter_delay=(_enc->rc.twopass?
   OC_MAXI(_enc->keyframe_frequency_force,12):_enc->rc.buf_delay)>>1;
  _enc->rc.inter_count=0;
  /*We clamp the actual inter_delay to a minimum of 10 to work within the range
     of values where later incrementing the delay works as designed.
    10 is not an exact choice, but rather a good working trade-off.*/
  _enc->rc.inter_delay=10;
  _enc->rc.inter_delay_target=inter_delay;
  oc_iir_filter_init(&_enc->rc.scalefilter[1],_enc->rc.inter_delay,
   oc_q57_to_q24(_enc->rc.log_scale[1]));
  oc_iir_filter_init(&_enc->rc.vfrfilter,4,
   oc_bexp64_q24(_enc->rc.log_drop_scale));
}
예제 #11
0
/*This format is only used for interlaced content, but is included for
   completeness.

  420jpeg chroma samples are sited like:
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |   BR  |       |   BR  |
  |       |       |       |
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |   BR  |       |   BR  |
  |       |       |       |
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |

  420paldv chroma samples are sited like:
  YR------Y-------YR------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |
  YB------Y-------YB------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |
  YR------Y-------YR------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |
  YB------Y-------YB------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |

  We use a resampling filter to shift the site locations one quarter pixel (at
   the chroma plane's resolution) to the right.
  Then we use another filter to move the C_r location down one quarter pixel,
   and the C_b location up one quarter pixel.*/
static void y4m_convert_42xpaldv_42xjpeg(y4m_input *_y4m,unsigned char *_dst,
 unsigned char *_aux){
  unsigned char *tmp;
  int            c_w;
  int            c_h;
  int            c_sz;
  int            pli;
  int            y;
  int            x;
  /*Skip past the luma data.*/
  _dst+=_y4m->pic_w*_y4m->pic_h;
  /*Compute the size of each chroma plane.*/
  c_w=(_y4m->pic_w+1)/2;
  c_h=(_y4m->pic_h+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
  c_sz=c_w*c_h;
  tmp=_aux+2*c_sz;
  for(pli=1;pli<3;pli++){
    /*First do the horizontal re-sampling.
      This is the same as the mpeg2 case, except that after the horizontal
       case, we need to apply a second vertical filter.*/
    y4m_42xmpeg2_42xjpeg_helper(tmp,_aux,c_w,c_h);
    _aux+=c_sz;
    switch(pli){
      case 1:{
        /*Slide C_b up a quarter-pel.
          This is the same filter used above, but in the other order.*/
        for(x=0;x<c_w;x++){
          for(y=0;y<OC_MINI(c_h,3);y++){
            _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(tmp[0]
             -9*tmp[OC_MAXI(y-2,0)*c_w]+35*tmp[OC_MAXI(y-1,0)*c_w]
             +114*tmp[y*c_w]-17*tmp[OC_MINI(y+1,c_h-1)*c_w]
             +4*tmp[OC_MINI(y+2,c_h-1)*c_w]+64)>>7,255);
          }
          for(;y<c_h-2;y++){
            _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(tmp[(y-3)*c_w]
             -9*tmp[(y-2)*c_w]+35*tmp[(y-1)*c_w]+114*tmp[y*c_w]
             -17*tmp[(y+1)*c_w]+4*tmp[(y+2)*c_w]+64)>>7,255);
          }
          for(;y<c_h;y++){
            _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(tmp[(y-3)*c_w]
             -9*tmp[(y-2)*c_w]+35*tmp[(y-1)*c_w]+114*tmp[y*c_w]
             -17*tmp[OC_MINI(y+1,c_h-1)*c_w]+4*tmp[(c_h-1)*c_w]+64)>>7,255);
          }
          _dst++;
          tmp++;
        }
        _dst+=c_sz-c_w;
        tmp-=c_w;
      }break;
      case 2:{
        /*Slide C_r down a quarter-pel.
          This is the same as the horizontal filter.*/
        for(x=0;x<c_w;x++){
          for(y=0;y<OC_MINI(c_h,2);y++){
            _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(4*tmp[0]
             -17*tmp[OC_MAXI(y-1,0)*c_w]+114*tmp[y*c_w]
             +35*tmp[OC_MINI(y+1,c_h-1)*c_w]-9*tmp[OC_MINI(y+2,c_h-1)*c_w]
             +tmp[OC_MINI(y+3,c_h-1)*c_w]+64)>>7,255);
          }
          for(;y<c_h-3;y++){
            _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(4*tmp[(y-2)*c_w]
             -17*tmp[(y-1)*c_w]+114*tmp[y*c_w]+35*tmp[(y+1)*c_w]
             -9*tmp[(y+2)*c_w]+tmp[(y+3)*c_w]+64)>>7,255);
          }
          for(;y<c_h;y++){
            _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(4*tmp[(y-2)*c_w]
             -17*tmp[(y-1)*c_w]+114*tmp[y*c_w]+35*tmp[OC_MINI(y+1,c_h-1)*c_w]
             -9*tmp[OC_MINI(y+2,c_h-1)*c_w]+tmp[(c_h-1)*c_w]+64)>>7,255);
          }
          _dst++;
          tmp++;
        }
      }break;
    }
    /*For actual interlaced material, this would have to be done separately on
       each field, and the shift amounts would be different.
      C_r moves down 1/8, C_b up 3/8 in the top field, and C_r moves down 3/8,
       C_b up 1/8 in the bottom field.
      The corresponding filters would be:
       Down 1/8 (reverse order for up): [3 -11 125 15 -4 0]/128
       Down 3/8 (reverse order for up): [4 -19 98 56 -13 2]/128*/
  }
}