예제 #1
0
void oc_theora_info2th_info(th_info *_info,const theora_info *_ci){
  _info->version_major=_ci->version_major;
  _info->version_minor=_ci->version_minor;
  _info->version_subminor=_ci->version_subminor;
  _info->frame_width=_ci->width;
  _info->frame_height=_ci->height;
  _info->pic_width=_ci->frame_width;
  _info->pic_height=_ci->frame_height;
  _info->pic_x=_ci->offset_x;
  _info->pic_y=_ci->offset_y;
  _info->fps_numerator=_ci->fps_numerator;
  _info->fps_denominator=_ci->fps_denominator;
  _info->aspect_numerator=_ci->aspect_numerator;
  _info->aspect_denominator=_ci->aspect_denominator;
  switch(_ci->colorspace){
    case OC_CS_ITU_REC_470M:_info->colorspace=TH_CS_ITU_REC_470M;break;
    case OC_CS_ITU_REC_470BG:_info->colorspace=TH_CS_ITU_REC_470BG;break;
    default:_info->colorspace=TH_CS_UNSPECIFIED;break;
  }
  switch(_ci->pixelformat){
    case OC_PF_420:_info->pixel_fmt=TH_PF_420;break;
    case OC_PF_422:_info->pixel_fmt=TH_PF_422;break;
    case OC_PF_444:_info->pixel_fmt=TH_PF_444;break;
    default:_info->pixel_fmt=TH_PF_RSVD;
  }
  _info->target_bitrate=_ci->target_bitrate;
  _info->quality=_ci->quality;
  _info->keyframe_granule_shift=_ci->keyframe_frequency_force>0?
   OC_MINI(31,oc_ilog(_ci->keyframe_frequency_force-1)):0;
}
예제 #2
0
/*420jpeg chroma samples are sited like:
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |   BR  |       |   BR  |
  |       |       |       |
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |   BR  |       |   BR  |
  |       |       |       |
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |

  420mpeg2 chroma samples are sited like:
  Y-------Y-------Y-------Y-------
  |       |       |       |
  BR      |       BR      |
  |       |       |       |
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |
  Y-------Y-------Y-------Y-------
  |       |       |       |
  BR      |       BR      |
  |       |       |       |
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |

  We use a resampling filter to shift the site locations one quarter pixel (at
   the chroma plane's resolution) to the right.
  The 4:2:2 modes look exactly the same, except there are twice as many chroma
   lines, and they are vertically co-sited with the luma samples in both the
   mpeg2 and jpeg cases (thus requiring no vertical resampling).*/
static void y4m_convert_42xmpeg2_42xjpeg(y4m_input *_y4m,unsigned char *_dst,
 unsigned char *_aux){
  int c_w;
  int c_h;
  int pli;
  int y;
  int x;
  /*Skip past the luma data.*/
  _dst+=_y4m->pic_w*_y4m->pic_h;
  /*Compute the size of each chroma plane.*/
  c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
  c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
  for(pli=1;pli<3;pli++){
    for(y=0;y<c_h;y++){
      /*Filter: [4 -17 114 35 -9 1]/128, derived from a 6-tap Lanczos
         window.*/
      for(x=0;x<OC_MINI(c_w,2);x++){
        _dst[x]=(unsigned char)OC_CLAMPI(0,4*_aux[0]-17*_aux[OC_MAXI(x-1,0)]+
         114*_aux[x]+35*_aux[OC_MINI(x+1,c_w-1)]-9*_aux[OC_MINI(x+2,c_w-1)]+
         _aux[OC_MINI(x+3,c_w-1)]+64>>7,255);
      }
      for(;x<c_w-3;x++){
        _dst[x]=(unsigned char)OC_CLAMPI(0,4*_aux[x-2]-17*_aux[x-1]+
         114*_aux[x]+35*_aux[x+1]-9*_aux[x+2]+_aux[x+3]+64>>7,255);
      }
      for(;x<c_w;x++){
        _dst[x]=(unsigned char)OC_CLAMPI(0,4*_aux[x-2]-17*_aux[x-1]+
         114*_aux[x]+35*_aux[OC_MINI(x+1,c_w-1)]-9*_aux[OC_MINI(x+2,c_w-1)]+
         _aux[c_w-1]+64>>7,255);
      }
      _dst+=c_w;
      _aux+=c_w;
    }
  }
}
예제 #3
0
/*Finds the depth of shortest branch of the given sub-tree.
  The tree must be binary.
  _binode: The root of the given sub-tree.
           _binode->nbits must be 0 or 1.
  Return: The smallest depth of a leaf node in this sub-tree.
          0 indicates this sub-tree is a leaf node.*/
static int oc_huff_tree_mindepth(oc_huff_node *_binode){
  int depth0;
  int depth1;
  if(_binode->nbits==0)return 0;
  depth0=oc_huff_tree_mindepth(_binode->nodes[0]);
  depth1=oc_huff_tree_mindepth(_binode->nodes[1]);
  return OC_MINI(depth0,depth1)+1;
}
예제 #4
0
/*420jpeg chroma samples are sited like:
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |   BR  |       |   BR  |
  |       |       |       |
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |   BR  |       |   BR  |
  |       |       |       |
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |

  411 chroma samples are sited like:
  YBR-----Y-------Y-------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |
  YBR-----Y-------Y-------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |
  YBR-----Y-------Y-------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |
  YBR-----Y-------Y-------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |

  We use a filter to resample at site locations one eighth pixel (at the source
   chroma plane's horizontal resolution) and five eighths of a pixel to the
   right.
  Then we use another filter to decimate the planes by 2 in the vertical
   direction.*/
static void y4m_convert_411_420jpeg(y4m_input *_y4m,unsigned char *_dst,
 unsigned char *_aux){
  unsigned char *tmp;
  int            c_w;
  int            c_h;
  int            c_sz;
  int            dst_c_w;
  int            dst_c_h;
  int            dst_c_sz;
  int            tmp_sz;
  int            pli;
  int            y;
  int            x;
  /*Skip past the luma data.*/
  _dst+=_y4m->pic_w*_y4m->pic_h;
  /*Compute the size of each chroma plane.*/
  c_w=(_y4m->pic_w+_y4m->src_c_dec_h-1)/_y4m->src_c_dec_h;
  c_h=_y4m->pic_h;
  dst_c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
  dst_c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
  c_sz=c_w*c_h;
  dst_c_sz=dst_c_w*dst_c_h;
  tmp_sz=dst_c_w*c_h;
  tmp=_aux+2*c_sz;
  for(pli=1;pli<3;pli++){
    /*In reality, the horizontal and vertical steps could be pipelined, for
       less memory consumption and better cache performance, but we do them
       separately for simplicity.*/
    /*First do horizontal filtering (convert to 422jpeg)*/
    for(y=0;y<c_h;y++){
      /*Filters: [1 110 18 -1]/128 and [-3 50 86 -5]/128, both derived from a
         4-tap Mitchell window.*/
      for(x=0;x<OC_MINI(c_w,1);x++){
        tmp[x<<1]=(unsigned char)OC_CLAMPI(0,(111*_aux[0]
         +18*_aux[OC_MINI(1,c_w-1)]-_aux[OC_MINI(2,c_w-1)]+64)>>7,255);
        tmp[x<<1|1]=(unsigned char)OC_CLAMPI(0,(47*_aux[0]
         +86*_aux[OC_MINI(1,c_w-1)]-5*_aux[OC_MINI(2,c_w-1)]+64)>>7,255);
      }
      for(;x<c_w-2;x++){
        tmp[x<<1]=(unsigned char)OC_CLAMPI(0,(_aux[x-1]+110*_aux[x]
         +18*_aux[x+1]-_aux[x+2]+64)>>7,255);
        tmp[x<<1|1]=(unsigned char)OC_CLAMPI(0,(-3*_aux[x-1]+50*_aux[x]
         +86*_aux[x+1]-5*_aux[x+2]+64)>>7,255);
      }
      for(;x<c_w;x++){
        tmp[x<<1]=(unsigned char)OC_CLAMPI(0,(_aux[x-1]+110*_aux[x]
         +18*_aux[OC_MINI(x+1,c_w-1)]-_aux[c_w-1]+64)>>7,255);
        if((x<<1|1)<dst_c_w){
          tmp[x<<1|1]=(unsigned char)OC_CLAMPI(0,(-3*_aux[x-1]+50*_aux[x]
           +86*_aux[OC_MINI(x+1,c_w-1)]-5*_aux[c_w-1]+64)>>7,255);
        }
      }
      tmp+=dst_c_w;
      _aux+=c_w;
    }
    tmp-=tmp_sz;
    /*Now do the vertical filtering.*/
    y4m_422jpeg_420jpeg_helper(_dst,tmp,dst_c_w,c_h);
    _dst+=dst_c_sz;
  }
예제 #5
0
/*Perform vertical filtering to reduce a single plane from 4:2:2 to 4:2:0.
  This is used as a helper by several converation routines.*/
static void y4m_422jpeg_420jpeg_helper(unsigned char *_dst,
 const unsigned char *_src,int _c_w,int _c_h){
  int y;
  int x;
  /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
  for(x=0;x<_c_w;x++){
    for(y=0;y<OC_MINI(_c_h,2);y+=2){
      _dst[(y>>1)*_c_w]=OC_CLAMPI(0,(64*_src[0]
       +78*_src[OC_MINI(1,_c_h-1)*_c_w]
       -17*_src[OC_MINI(2,_c_h-1)*_c_w]
       +3*_src[OC_MINI(3,_c_h-1)*_c_w]+64)>>7,255);
    }
    for(;y<_c_h-3;y+=2){
      _dst[(y>>1)*_c_w]=OC_CLAMPI(0,(3*(_src[(y-2)*_c_w]+_src[(y+3)*_c_w])
       -17*(_src[(y-1)*_c_w]+_src[(y+2)*_c_w])
       +78*(_src[y*_c_w]+_src[(y+1)*_c_w])+64)>>7,255);
    }
    for(;y<_c_h;y+=2){
      _dst[(y>>1)*_c_w]=OC_CLAMPI(0,(3*(_src[(y-2)*_c_w]
       +_src[(_c_h-1)*_c_w])-17*(_src[(y-1)*_c_w]
       +_src[OC_MINI(y+2,_c_h-1)*_c_w])
       +78*(_src[y*_c_w]+_src[OC_MINI(y+1,_c_h-1)*_c_w])+64)>>7,255);
    }
    _src++;
    _dst++;
  }
}
예제 #6
0
/*420jpeg chroma samples are sited like:
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |   BR  |       |   BR  |
  |       |       |       |
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |   BR  |       |   BR  |
  |       |       |       |
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |

  420mpeg2 chroma samples are sited like:
  Y-------Y-------Y-------Y-------
  |       |       |       |
  BR      |       BR      |
  |       |       |       |
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |
  Y-------Y-------Y-------Y-------
  |       |       |       |
  BR      |       BR      |
  |       |       |       |
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |

  We use a resampling filter to shift the site locations one quarter pixel (at
   the chroma plane's resolution) to the right.
  The 4:2:2 modes look exactly the same, except there are twice as many chroma
   lines, and they are vertically co-sited with the luma samples in both the
   mpeg2 and jpeg cases (thus requiring no vertical resampling).*/
static void y4m_42xmpeg2_42xjpeg_helper(unsigned char *_dst,
 const unsigned char *_src,int _c_w,int _c_h){
  int y;
  int x;
  for(y=0;y<_c_h;y++){
    /*Filter: [4 -17 114 35 -9 1]/128, derived from a 6-tap Lanczos
       window.*/
    for(x=0;x<OC_MINI(_c_w,2);x++){
      _dst[x]=(unsigned char)OC_CLAMPI(0,(4*_src[0]-17*_src[OC_MAXI(x-1,0)]+
       114*_src[x]+35*_src[OC_MINI(x+1,_c_w-1)]-9*_src[OC_MINI(x+2,_c_w-1)]+
       _src[OC_MINI(x+3,_c_w-1)]+64)>>7,255);
    }
    for(;x<_c_w-3;x++){
      _dst[x]=(unsigned char)OC_CLAMPI(0,(4*_src[x-2]-17*_src[x-1]+
       114*_src[x]+35*_src[x+1]-9*_src[x+2]+_src[x+3]+64)>>7,255);
    }
    for(;x<_c_w;x++){
      _dst[x]=(unsigned char)OC_CLAMPI(0,(4*_src[x-2]-17*_src[x-1]+
       114*_src[x]+35*_src[OC_MINI(x+1,_c_w-1)]-9*_src[OC_MINI(x+2,_c_w-1)]+
       _src[_c_w-1]+64)>>7,255);
    }
    _dst+=_c_w;
    _src+=_c_w;
  }
}
예제 #7
0
파일: rate.c 프로젝트: Asvarox/Unvanquished
void oc_enc_calc_lambda(oc_enc_ctx *_enc,int _qti){
  ogg_int64_t lq;
  int         qi;
  int         qi1;
  int         nqis;
  /*For now, lambda is fixed depending on the qi value and frame type:
      lambda=qscale*(qavg[qti][qi]**2),
     where qscale=0.2125.
    This was derived by exhaustively searching for the optimal quantizer for
     the AC coefficients in each block from a number of test sequences for a
     number of fixed lambda values and fitting the peaks of the resulting
     histograms (on the log(qavg) scale).
    The same model applies to both inter and intra frames.
    A more adaptive scheme might perform better.*/
  qi=_enc->state.qis[0];
  /*If rate control is active, use the lambda for the _target_ quantizer.
    This allows us to scale to rates slightly lower than we'd normally be able
     to reach, and give the rate control a semblance of "fractional qi"
     precision.
    TODO: Add API for changing QI, and allow extra precision.*/
  if(_enc->state.info.target_bitrate>0)lq=_enc->rc.log_qtarget;
  else lq=_enc->log_qavg[_qti][qi];
  /*The resulting lambda value is less than 0x500000.*/
  _enc->lambda=(int)oc_bexp64(2*lq-0x4780BD468D6B62BLL);
  /*Select additional quantizers.
    The R-D optimal block AC quantizer statistics suggest that the distribution
     is roughly Gaussian-like with a slight positive skew.
    K-means clustering on log_qavg to select 3 quantizers produces cluster
     centers of {log_qavg-0.6,log_qavg,log_qavg+0.7}.
    Experiments confirm these are relatively good choices.

    Although we do greedy R-D optimization of the qii flags to avoid switching
     too frequently, this becomes ineffective at low rates, either because we
     do a poor job of predicting the actual R-D cost, or the greedy
     optimization is not sufficient.
    Therefore adaptive quantization is disabled above an (experimentally
     suggested) threshold of log_qavg=7.00 (e.g., below INTRA qi=12 or
     INTER qi=20 with current matrices).
    This may need to be revised if the R-D cost estimation or qii flag
     optimization strategies change.*/
  nqis=1;
  if(lq<(OC_Q57(56)>>3)&&!_enc->vp3_compatible&&
   _enc->sp_level<OC_SP_LEVEL_FAST_ANALYSIS){
    qi1=oc_enc_find_qi_for_target(_enc,_qti,OC_MAXI(qi-1,0),0,
     lq+(OC_Q57(7)+5)/10);
    if(qi1!=qi)_enc->state.qis[nqis++]=qi1;
    qi1=oc_enc_find_qi_for_target(_enc,_qti,OC_MINI(qi+1,63),0,
     lq-(OC_Q57(6)+5)/10);
    if(qi1!=qi&&qi1!=_enc->state.qis[nqis-1])_enc->state.qis[nqis++]=qi1;
  }
  _enc->state.nqis=nqis;
}
예제 #8
0
/*422jpeg chroma samples are sited like:
  Y---BR--Y-------Y---BR--Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |
  Y---BR--Y-------Y---BR--Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |
  Y---BR--Y-------Y---BR--Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |
  Y---BR--Y-------Y---BR--Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |

  411 chroma samples are sited like:
  YBR-----Y-------Y-------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |
  YBR-----Y-------Y-------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |
  YBR-----Y-------Y-------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |
  YBR-----Y-------Y-------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |

  We use a filter to resample at site locations one eighth pixel (at the source
   chroma plane's horizontal resolution) and five eighths of a pixel to the
   right.*/
static void y4m_convert_411_422jpeg(y4m_input *_y4m,unsigned char *_dst,
 unsigned char *_aux){
  int c_w;
  int dst_c_w;
  int c_h;
  int pli;
  int y;
  int x;
  /*Skip past the luma data.*/
  _dst+=_y4m->pic_w*_y4m->pic_h;
  /*Compute the size of each chroma plane.*/
  c_w=(_y4m->pic_w+_y4m->src_c_dec_h-1)/_y4m->src_c_dec_h;
  dst_c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
  c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
  for(pli=1;pli<3;pli++){
    for(y=0;y<c_h;y++){
      /*Filters: [1 110 18 -1]/128 and [-3 50 86 -5]/128, both derived from a
         4-tap Mitchell window.*/
      for(x=0;x<OC_MINI(c_w,1);x++){
        _dst[x<<1]=(unsigned char)OC_CLAMPI(0,111*_aux[0]+
         18*_aux[OC_MINI(1,c_w-1)]-_aux[OC_MINI(2,c_w-1)]+64>>7,255);
        _dst[x<<1|1]=(unsigned char)OC_CLAMPI(0,47*_aux[0]+
         86*_aux[OC_MINI(1,c_w-1)]-5*_aux[OC_MINI(2,c_w-1)]+64>>7,255);
      }
      for(;x<c_w-2;x++){
        _dst[x<<1]=(unsigned char)OC_CLAMPI(0,_aux[x-1]+110*_aux[x]+
         18*_aux[x+1]-_aux[x+2]+64>>7,255);
        _dst[x<<1|1]=(unsigned char)OC_CLAMPI(0,-3*_aux[x-1]+50*_aux[x]+
         86*_aux[x+1]-5*_aux[x+2]+64>>7,255);
      }
      for(;x<c_w;x++){
        _dst[x<<1]=(unsigned char)OC_CLAMPI(0,_aux[x-1]+110*_aux[x]+
         18*_aux[OC_MINI(x+1,c_w-1)]-_aux[c_w-1]+64>>7,255);
        if((x<<1|1)<dst_c_w){
          _dst[x<<1|1]=(unsigned char)OC_CLAMPI(0,-3*_aux[x-1]+50*_aux[x]+
           86*_aux[OC_MINI(x+1,c_w-1)]-5*_aux[c_w-1]+64>>7,255);
        }
      }
      _dst+=dst_c_w;
      _aux+=c_w;
    }
  }
예제 #9
0
/*This format is only used for interlaced content, but is included for
   completeness.

  420jpeg chroma samples are sited like:
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |   BR  |       |   BR  |
  |       |       |       |
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |   BR  |       |   BR  |
  |       |       |       |
  Y-------Y-------Y-------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |

  420paldv chroma samples are sited like:
  YR------Y-------YR------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |
  YB------Y-------YB------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |
  YR------Y-------YR------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |
  YB------Y-------YB------Y-------
  |       |       |       |
  |       |       |       |
  |       |       |       |

  We use a resampling filter to shift the site locations one quarter pixel (at
   the chroma plane's resolution) to the right.
  Then we use another filter to move the C_r location down one quarter pixel,
   and the C_b location up one quarter pixel.*/
static void y4m_convert_42xpaldv_42xjpeg(y4m_input *_y4m,unsigned char *_dst,
 unsigned char *_aux){
  unsigned char *tmp;
  int            c_w;
  int            c_h;
  int            c_sz;
  int            pli;
  int            y;
  int            x;
  /*Skip past the luma data.*/
  _dst+=_y4m->pic_w*_y4m->pic_h;
  /*Compute the size of each chroma plane.*/
  c_w=(_y4m->pic_w+1)/2;
  c_h=(_y4m->pic_h+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
  c_sz=c_w*c_h;
  /*First do the horizontal re-sampling.
    This is the same as the mpeg2 case, except that after the horizontal case,
     we need to apply a second vertical filter.*/
  tmp=_aux+2*c_sz;
  for(pli=1;pli<3;pli++){
    for(y=0;y<c_h;y++){
      /*Filter: [4 -17 114 35 -9 1]/128, derived from a 6-tap Lanczos
         window.*/
      for(x=0;x<OC_MINI(c_w,2);x++){
        tmp[x]=(unsigned char)OC_CLAMPI(0,4*_aux[0]-17*_aux[OC_MAXI(x-1,0)]+
         114*_aux[x]+35*_aux[OC_MINI(x+1,c_w-1)]-9*_aux[OC_MINI(x+2,c_w-1)]+
         _aux[OC_MINI(x+3,c_w-1)]+64>>7,255);
      }
      for(;x<c_w-3;x++){
        tmp[x]=(unsigned char)OC_CLAMPI(0,4*_aux[x-2]-17*_aux[x-1]+
         114*_aux[x]+35*_aux[x+1]-9*_aux[x+2]+_aux[x+3]+64>>7,255);
      }
      for(;x<c_w;x++){
        tmp[x]=(unsigned char)OC_CLAMPI(0,4*_aux[x-2]-17*_aux[x-1]+
         114*_aux[x]+35*_aux[OC_MINI(x+1,c_w-1)]-9*_aux[OC_MINI(x+2,c_w-1)]+
         _aux[c_w-1]+64>>7,255);
      }
      tmp+=c_w;
      _aux+=c_w;
    }
    switch(pli){
      case 1:{
        tmp-=c_sz;
        /*Slide C_b up a quarter-pel.
          This is the same filter used above, but in the other order.*/
        for(x=0;x<c_w;x++){
          for(y=0;y<OC_MINI(c_h,3);y++){
            _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,tmp[0]-
             9*tmp[OC_MAXI(y-2,0)*c_w]+35*tmp[OC_MAXI(y-1,0)*c_w]+
             114*tmp[y*c_w]-17*tmp[OC_MINI(y+1,c_h-1)*c_w]+
             4*tmp[OC_MINI(y+2,c_h-1)*c_w]+64>>7,255);
          }
          for(;y<c_h-2;y++){
            _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,tmp[(y-3)*c_w]-
             9*tmp[(y-2)*c_w]+35*tmp[(y-1)*c_w]+114*tmp[y*c_w]-
             17*tmp[(y+1)*c_w]+4*tmp[(y+2)*c_w]+64>>7,255);
          }
          for(;y<c_h;y++){
            _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,tmp[(y-3)*c_w]-
             9*tmp[(y-2)*c_w]+35*tmp[(y-1)*c_w]+114*tmp[y*c_w]-
             17*tmp[OC_MINI(y+1,c_h-1)*c_w]+4*tmp[(c_h-1)*c_w]+64>>7,255);
          }
          _dst++;
          tmp++;
        }
        _dst+=c_sz-c_w;
        tmp-=c_w;
      }break;
      case 2:{
        tmp-=c_sz;
        /*Slide C_r down a quarter-pel.
          This is the same as the horizontal filter.*/
        for(x=0;x<c_w;x++){
          for(y=0;y<OC_MINI(c_h,2);y++){
            _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,4*tmp[0]-
             17*tmp[OC_MAXI(y-1,0)*c_w]+114*tmp[y*c_w]+
             35*tmp[OC_MINI(y+1,c_h-1)*c_w]-9*tmp[OC_MINI(y+2,c_h-1)*c_w]+
             tmp[OC_MINI(y+3,c_h-1)*c_w]+64>>7,255);
          }
          for(;y<c_h-3;y++){
            _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,4*tmp[(y-2)*c_w]-
             17*tmp[(y-1)*c_w]+114*tmp[y*c_w]+35*tmp[(y+1)*c_w]-
             9*tmp[(y+2)*c_w]+tmp[(y+3)*c_w]+64>>7,255);
          }
          for(;y<c_h;y++){
            _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,4*tmp[(y-2)*c_w]-
             17*tmp[(y-1)*c_w]+114*tmp[y*c_w]+35*tmp[OC_MINI(y+1,c_h-1)*c_w]-
             9*tmp[OC_MINI(y+2,c_h-1)*c_w]+tmp[(c_h-1)*c_w]+64>>7,255);
          }
          _dst++;
          tmp++;
        }
      }break;
    }
    /*For actual interlaced material, this would have to be done separately on
       each field, and the shift amounts would be different.
      C_r moves down 1/8, C_b up 3/8 in the top field, and C_r moves down 3/8,
       C_b up 1/8 in the bottom field.
      The corresponding filters would be:
       Down 1/8 (reverse order for up): [3 -11 125 15 -4 0]/128
       Down 3/8 (reverse order for up): [4 -19 98 56 -13 2]/128*/
  }
}
예제 #10
0
/*Perform a motion vector search for this macro block against a single
   reference frame.
  As a bonus, individual block motion vectors are computed as well, as much of
   the work can be shared.
  The actual motion vector is stored in the appropriate place in the
   oc_mb_enc_info structure.
  _accum:      Drop frame/golden MV accumulators.
  _mbi:        The macro block index.
  _frame:      The frame to use for SATD calculations and refinement,
                either OC_FRAME_PREV or OC_FRAME_GOLD.
  _frame_full: The frame to perform the 1px search on, one of OC_FRAME_PREV,
                OC_FRAME_GOLD, OC_FRAME_PREV_ORIG, or OC_FRAME_GOLD_ORIG.*/
void oc_mcenc_search_frame(oc_enc_ctx *_enc,oc_mv _accum,int _mbi,int _frame,
 int _frame_full){
  /*Note: Traditionally this search is done using a rate-distortion objective
     function of the form D+lambda*R.
    However, xiphmont tested this and found it produced a small degredation,
     while requiring extra computation.
    This is most likely due to Theora's peculiar MV encoding scheme: MVs are
     not coded relative to a predictor, and the only truly cheap way to use a
     MV is in the LAST or LAST2 MB modes, which are not being considered here.
    Therefore if we use the MV found here, it's only because both LAST and
     LAST2 performed poorly, and therefore the MB is not likely to be uniform
     or suffer from the aperture problem.
    Furthermore we would like to re-use the MV found here for as many MBs as
     possible, so picking a slightly sub-optimal vector to save a bit or two
     may cause increased degredation in many blocks to come.
    We could artificially reduce lambda to compensate, but it's faster to just
     disable it entirely, and use D (the distortion) as the sole criterion.*/
  oc_mcenc_ctx         mcenc;
  const ptrdiff_t     *frag_buf_offs;
  const ptrdiff_t     *fragis;
  const unsigned char *src;
  const unsigned char *ref;
  const unsigned char *satd_ref;
  int                  ystride;
  oc_mb_enc_info      *embs;
  ogg_int32_t          hit_cache[31];
  ogg_int32_t          hitbit;
  unsigned             best_block_err[4];
  unsigned             block_err[4];
  unsigned             best_err;
  int                  best_vec[2];
  int                  best_block_vec[4][2];
  int                  candx;
  int                  candy;
  int                  bi;
  embs=_enc->mb_info;
  /*Find some candidate motion vectors.*/
  oc_mcenc_find_candidates(_enc,&mcenc,_accum,_mbi,_frame);
  /*Clear the cache of locations we've examined.*/
  memset(hit_cache,0,sizeof(hit_cache));
  /*Start with the median predictor.*/
  candx=mcenc.candidates[0][0];
  candy=mcenc.candidates[0][1];
  hit_cache[candy+15]|=(ogg_int32_t)1<<candx+15;
  frag_buf_offs=_enc->state.frag_buf_offs;
  fragis=_enc->state.mb_maps[_mbi][0];
  src=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_IO]];
  ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_frame_full]];
  satd_ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[_frame]];
  ystride=_enc->state.ref_ystride[0];
  /*TODO: customize error function for speed/(quality+size) tradeoff.*/
  best_err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
   frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
  best_vec[0]=candx;
  best_vec[1]=candy;
  if(_frame==OC_FRAME_PREV){
    for(bi=0;bi<4;bi++){
      best_block_err[bi]=block_err[bi];
      best_block_vec[bi][0]=candx;
      best_block_vec[bi][1]=candy;
    }
  }
  /*If this predictor fails, move on to set A.*/
  if(best_err>OC_YSAD_THRESH1){
    unsigned err;
    unsigned t2;
    int      ncs;
    int      ci;
    /*Compute the early termination threshold for set A.*/
    t2=embs[_mbi].error[_frame];
    ncs=OC_MINI(3,embs[_mbi].ncneighbors);
    for(ci=0;ci<ncs;ci++){
      t2=OC_MAXI(t2,embs[embs[_mbi].cneighbors[ci]].error[_frame]);
    }
    t2+=(t2>>OC_YSAD_THRESH2_SCALE_BITS)+OC_YSAD_THRESH2_OFFSET;
    /*Examine the candidates in set A.*/
    for(ci=1;ci<mcenc.setb0;ci++){
      candx=mcenc.candidates[ci][0];
      candy=mcenc.candidates[ci][1];
      /*If we've already examined this vector, then we would be using it if it
         was better than what we are using.*/
      hitbit=(ogg_int32_t)1<<candx+15;
      if(hit_cache[candy+15]&hitbit)continue;
      hit_cache[candy+15]|=hitbit;
      err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
       frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
      if(err<best_err){
        best_err=err;
        best_vec[0]=candx;
        best_vec[1]=candy;
      }
      if(_frame==OC_FRAME_PREV){
        for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
          best_block_err[bi]=block_err[bi];
          best_block_vec[bi][0]=candx;
          best_block_vec[bi][1]=candy;
        }
      }
    }
    if(best_err>t2){
      /*Examine the candidates in set B.*/
      for(;ci<mcenc.ncandidates;ci++){
        candx=mcenc.candidates[ci][0];
        candy=mcenc.candidates[ci][1];
        hitbit=(ogg_int32_t)1<<candx+15;
        if(hit_cache[candy+15]&hitbit)continue;
        hit_cache[candy+15]|=hitbit;
        err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
         frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
        if(err<best_err){
          best_err=err;
          best_vec[0]=candx;
          best_vec[1]=candy;
        }
        if(_frame==OC_FRAME_PREV){
          for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
            best_block_err[bi]=block_err[bi];
            best_block_vec[bi][0]=candx;
            best_block_vec[bi][1]=candy;
          }
        }
      }
      /*Use the same threshold for set B as in set A.*/
      if(best_err>t2){
        int best_site;
        int nsites;
        int sitei;
        int site;
        int b;
        /*Square pattern search.*/
        for(;;){
          best_site=4;
          /*Compose the bit flags for boundary conditions.*/
          b=OC_DIV16(-best_vec[0]+1)|OC_DIV16(best_vec[0]+1)<<1|
           OC_DIV16(-best_vec[1]+1)<<2|OC_DIV16(best_vec[1]+1)<<3;
          nsites=OC_SQUARE_NSITES[b];
          for(sitei=0;sitei<nsites;sitei++){
            site=OC_SQUARE_SITES[b][sitei];
            candx=best_vec[0]+OC_SQUARE_DX[site];
            candy=best_vec[1]+OC_SQUARE_DY[site];
            hitbit=(ogg_int32_t)1<<candx+15;
            if(hit_cache[candy+15]&hitbit)continue;
            hit_cache[candy+15]|=hitbit;
            err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
             frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
            if(err<best_err){
              best_err=err;
              best_site=site;
            }
            if(_frame==OC_FRAME_PREV){
              for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
                best_block_err[bi]=block_err[bi];
                best_block_vec[bi][0]=candx;
                best_block_vec[bi][1]=candy;
              }
            }
          }
          if(best_site==4)break;
          best_vec[0]+=OC_SQUARE_DX[best_site];
          best_vec[1]+=OC_SQUARE_DY[best_site];
        }
        /*Final 4-MV search.*/
        /*Simply use 1/4 of the macro block set A and B threshold as the
           individual block threshold.*/
        if(_frame==OC_FRAME_PREV){
          t2>>=2;
          for(bi=0;bi<4;bi++){
            if(best_block_err[bi]>t2){
              /*Square pattern search.
                We do this in a slightly interesting manner.
                We continue to check the SAD of all four blocks in the
                 macro block.
                This gives us two things:
                 1) We can continue to use the hit_cache to avoid duplicate
                     checks.
                    Otherwise we could continue to read it, but not write to it
                     without saving and restoring it for each block.
                    Note that we could still eliminate a large number of
                     duplicate checks by taking into account the site we came
                     from when choosing the site list.
                    We can still do that to avoid extra hit_cache queries, and
                     it might even be a speed win.
                 2) It gives us a slightly better chance of escaping local
                     minima.
                    We would not be here if we weren't doing a fairly bad job
                     in finding a good vector, and checking these vectors can
                     save us from 100 to several thousand points off our SAD 1
                     in 15 times.
                TODO: Is this a good idea?
                Who knows.
                It needs more testing.*/
              for(;;){
                int bestx;
                int besty;
                int bj;
                bestx=best_block_vec[bi][0];
                besty=best_block_vec[bi][1];
                /*Compose the bit flags for boundary conditions.*/
                b=OC_DIV16(-bestx+1)|OC_DIV16(bestx+1)<<1|
                 OC_DIV16(-besty+1)<<2|OC_DIV16(besty+1)<<3;
                nsites=OC_SQUARE_NSITES[b];
                for(sitei=0;sitei<nsites;sitei++){
                  site=OC_SQUARE_SITES[b][sitei];
                  candx=bestx+OC_SQUARE_DX[site];
                  candy=besty+OC_SQUARE_DY[site];
                  hitbit=(ogg_int32_t)1<<candx+15;
                  if(hit_cache[candy+15]&hitbit)continue;
                  hit_cache[candy+15]|=hitbit;
                  err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
                   frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
                  if(err<best_err){
                    best_err=err;
                    best_vec[0]=candx;
                    best_vec[1]=candy;
                  }
                  for(bj=0;bj<4;bj++)if(block_err[bj]<best_block_err[bj]){
                    best_block_err[bj]=block_err[bj];
                    best_block_vec[bj][0]=candx;
                    best_block_vec[bj][1]=candy;
                  }
                }
                if(best_block_vec[bi][0]==bestx&&best_block_vec[bi][1]==besty){
                  break;
                }
              }
            }
          }
        }
      }
예제 #11
0
파일: rate.c 프로젝트: Asvarox/Unvanquished
void oc_enc_rc_resize(oc_enc_ctx *_enc){
  /*If encoding has not yet begun, reset the buffer state.*/
  if(_enc->state.curframe_num<0)oc_enc_rc_reset(_enc);
  else{
    int idt;
    /*Otherwise, update the bounds on the buffer, but not the current
       fullness.*/
    _enc->rc.bits_per_frame=(_enc->state.info.target_bitrate*
     (ogg_int64_t)_enc->state.info.fps_denominator)/
     _enc->state.info.fps_numerator;
    /*Insane framerates or frame sizes mean insane bitrates.
      Let's not get carried away.*/
    if(_enc->rc.bits_per_frame>0x400000000000LL){
      _enc->rc.bits_per_frame=(ogg_int64_t)0x400000000000LL;
    }
    else if(_enc->rc.bits_per_frame<32)_enc->rc.bits_per_frame=32;
    _enc->rc.buf_delay=OC_MAXI(_enc->rc.buf_delay,12);
    _enc->rc.max=_enc->rc.bits_per_frame*_enc->rc.buf_delay;
    _enc->rc.target=(_enc->rc.max+1>>1)+(_enc->rc.bits_per_frame+2>>2)*
     OC_MINI(_enc->keyframe_frequency_force,_enc->rc.buf_delay);
    /*Update the INTER-frame scale filter delay.
      We jump to it immediately if we've already seen enough frames; otherwise
       it is simply set as the new target.*/
    _enc->rc.inter_delay_target=idt=OC_MAXI(_enc->rc.buf_delay>>1,10);
    if(idt<OC_MINI(_enc->rc.inter_delay,_enc->rc.inter_count)){
      oc_iir_filter_init(&_enc->rc.scalefilter[1],idt,
       _enc->rc.scalefilter[1].y[0]);
      _enc->rc.inter_delay=idt;
    }
  }
  /*If we're in pass-2 mode, make sure the frame metrics array is big enough
     to hold frame statistics for the full buffer.*/
  if(_enc->rc.twopass==2){
    int cfm;
    int buf_delay;
    int reset_window;
    buf_delay=_enc->rc.buf_delay;
    reset_window=_enc->rc.frame_metrics==NULL&&(_enc->rc.frames_total[0]==0||
     buf_delay<_enc->rc.frames_total[0]+_enc->rc.frames_total[1]
     +_enc->rc.frames_total[2]);
    cfm=_enc->rc.cframe_metrics;
    /*Only try to resize the frame metrics buffer if a) it's too small and
       b) we were using a finite buffer, or are about to start.*/
    if(cfm<buf_delay&&(_enc->rc.frame_metrics!=NULL||reset_window)){
      oc_frame_metrics *fm;
      int               nfm;
      int               fmh;
      fm=(oc_frame_metrics *)_ogg_realloc(_enc->rc.frame_metrics,
       buf_delay*sizeof(*_enc->rc.frame_metrics));
      if(fm==NULL){
        /*We failed to allocate a finite buffer.*/
        /*If we don't have a valid 2-pass header yet, just return; we'll reset
           the buffer size when we read the header.*/
        if(_enc->rc.frames_total[0]==0)return;
        /*Otherwise revert to the largest finite buffer previously set, or to
           whole-file buffering if we were still using that.*/
        _enc->rc.buf_delay=_enc->rc.frame_metrics!=NULL?
         cfm:_enc->rc.frames_total[0]+_enc->rc.frames_total[1]
         +_enc->rc.frames_total[2];
        oc_enc_rc_resize(_enc);
        return;
      }
      _enc->rc.frame_metrics=fm;
      _enc->rc.cframe_metrics=buf_delay;
      /*Re-organize the circular buffer.*/
      fmh=_enc->rc.frame_metrics_head;
      nfm=_enc->rc.nframe_metrics;
      if(fmh+nfm>cfm){
        int shift;
        shift=OC_MINI(fmh+nfm-cfm,buf_delay-cfm);
        memcpy(fm+cfm,fm,OC_MINI(fmh+nfm-cfm,buf_delay-cfm)*sizeof(*fm));
        if(fmh+nfm>buf_delay)memmove(fm,fm+shift,fmh+nfm-buf_delay);
      }
    }
    /*We were using whole-file buffering; now we're not.*/
    if(reset_window){
      _enc->rc.nframes[0]=_enc->rc.nframes[1]=_enc->rc.nframes[2]=0;
      _enc->rc.scale_sum[0]=_enc->rc.scale_sum[1]=0;
      _enc->rc.scale_window_end=_enc->rc.scale_window0=
       _enc->state.curframe_num+_enc->prev_dup_count+1;
      if(_enc->rc.twopass_buffer_bytes){
        int qti;
        /*We already read the metrics for the first frame in the window.*/
        *(_enc->rc.frame_metrics)=*&_enc->rc.cur_metrics;
        _enc->rc.nframe_metrics++;
        qti=_enc->rc.cur_metrics.frame_type;
        _enc->rc.nframes[qti]++;
        _enc->rc.nframes[2]+=_enc->rc.cur_metrics.dup_count;
        _enc->rc.scale_sum[qti]+=oc_bexp_q24(_enc->rc.cur_metrics.log_scale);
        _enc->rc.scale_window_end+=_enc->rc.cur_metrics.dup_count+1;
        if(_enc->rc.scale_window_end-_enc->rc.scale_window0<buf_delay){
          /*We need more frame data.*/
          _enc->rc.twopass_buffer_bytes=0;
        }
      }
    }
    /*Otherwise, we could shrink the size of the current window, if necessary,
       but leaving it like it is lets us adapt to the new buffer size more
       gracefully.*/
  }
}
예제 #12
0
파일: rate.c 프로젝트: Asvarox/Unvanquished
static void oc_enc_rc_reset(oc_enc_ctx *_enc){
  ogg_int64_t npixels;
  ogg_int64_t ibpp;
  int         inter_delay;
  /*TODO: These parameters should be exposed in a th_encode_ctl() API.*/
  _enc->rc.bits_per_frame=(_enc->state.info.target_bitrate*
   (ogg_int64_t)_enc->state.info.fps_denominator)/
   _enc->state.info.fps_numerator;
  /*Insane framerates or frame sizes mean insane bitrates.
    Let's not get carried away.*/
  if(_enc->rc.bits_per_frame>0x400000000000LL){
    _enc->rc.bits_per_frame=(ogg_int64_t)0x400000000000LL;
  }
  else if(_enc->rc.bits_per_frame<32)_enc->rc.bits_per_frame=32;
  _enc->rc.buf_delay=OC_MAXI(_enc->rc.buf_delay,12);
  _enc->rc.max=_enc->rc.bits_per_frame*_enc->rc.buf_delay;
  /*Start with a buffer fullness of 50% plus 25% of the amount we plan to spend
     on a single keyframe interval.
    We can require fully half the bits in an interval for a keyframe, so this
     initial level gives us maximum flexibility for over/under-shooting in
     subsequent frames.*/
  _enc->rc.target=(_enc->rc.max+1>>1)+(_enc->rc.bits_per_frame+2>>2)*
   OC_MINI(_enc->keyframe_frequency_force,_enc->rc.buf_delay);
  _enc->rc.fullness=_enc->rc.target;
  /*Pick exponents and initial scales for quantizer selection.*/
  npixels=_enc->state.info.frame_width*
   (ogg_int64_t)_enc->state.info.frame_height;
  _enc->rc.log_npixels=oc_blog64(npixels);
  ibpp=npixels/_enc->rc.bits_per_frame;
  if(ibpp<1){
    _enc->rc.exp[0]=59;
    _enc->rc.log_scale[0]=oc_blog64(1997)-OC_Q57(8);
  }
  else if(ibpp<2){
    _enc->rc.exp[0]=55;
    _enc->rc.log_scale[0]=oc_blog64(1604)-OC_Q57(8);
  }
  else{
    _enc->rc.exp[0]=48;
    _enc->rc.log_scale[0]=oc_blog64(834)-OC_Q57(8);
  }
  if(ibpp<4){
    _enc->rc.exp[1]=100;
    _enc->rc.log_scale[1]=oc_blog64(2249)-OC_Q57(8);
  }
  else if(ibpp<8){
    _enc->rc.exp[1]=95;
    _enc->rc.log_scale[1]=oc_blog64(1751)-OC_Q57(8);
  }
  else{
    _enc->rc.exp[1]=73;
    _enc->rc.log_scale[1]=oc_blog64(1260)-OC_Q57(8);
  }
  _enc->rc.prev_drop_count=0;
  _enc->rc.log_drop_scale=OC_Q57(0);
  /*Set up second order followers, initialized according to corresponding
     time constants.*/
  oc_iir_filter_init(&_enc->rc.scalefilter[0],4,
   oc_q57_to_q24(_enc->rc.log_scale[0]));
  inter_delay=(_enc->rc.twopass?
   OC_MAXI(_enc->keyframe_frequency_force,12):_enc->rc.buf_delay)>>1;
  _enc->rc.inter_count=0;
  /*We clamp the actual inter_delay to a minimum of 10 to work within the range
     of values where later incrementing the delay works as designed.
    10 is not an exact choice, but rather a good working trade-off.*/
  _enc->rc.inter_delay=10;
  _enc->rc.inter_delay_target=inter_delay;
  oc_iir_filter_init(&_enc->rc.scalefilter[1],_enc->rc.inter_delay,
   oc_q57_to_q24(_enc->rc.log_scale[1]));
  oc_iir_filter_init(&_enc->rc.vfrfilter,4,
   oc_bexp64_q24(_enc->rc.log_drop_scale));
}