示例#1
0
void dsp_mmx_recon_init(DspFunctions *funcs)
{
  TH_DEBUG("enabling accelerated x86_32 mmx recon functions.\n");
  funcs->copy8x8 = copy8x8__mmx;
  funcs->recon_intra8x8 = recon_intra8x8__mmx;
  funcs->recon_inter8x8 = recon_inter8x8__mmx;
  funcs->recon_inter8x8_half = recon_inter8x8_half__mmx;
}
示例#2
0
/*Initializes the dequantization tables from a set of quantizer info.
  Currently the dequantizer (and elsewhere enquantizer) tables are expected to
   be initialized as pointing to the storage reserved for them in the
   oc_theora_state (resp. oc_enc_ctx) structure.
  If some tables are duplicates of others, the pointers will be adjusted to
   point to a single copy of the tables, but the storage for them will not be
   freed.
  If you're concerned about the memory footprint, the obvious thing to do is
   to move the storage out of its fixed place in the structures and allocate
   it on demand.
  However, a much, much better option is to only store the quantization
   matrices being used for the current frame, and to recalculate these as the
   qi values change between frames (this is what VP3 did).*/
void oc_dequant_tables_init(oc_quant_table *_dequant[2][3],
 int _pp_dc_scale[64],const th_quant_info *_qinfo){
  int          qti; /* coding mode: intra or inter */
  int          pli; /* Y U V */
  for(qti=0;qti<2;qti++){
    for(pli=0;pli<3;pli++){
      oc_quant_tables stage;

      int qi;  /* quality index */
      int qri; /* range iterator */
      
      for(qi=0,qri=0; qri<=_qinfo->qi_ranges[qti][pli].nranges; qri++){
	th_quant_base base;
	
	ogg_uint32_t      q;
	int               qi_start;
	int               qi_end;
	int               ci;
	memcpy(base,_qinfo->qi_ranges[qti][pli].base_matrices[qri],
	       sizeof(base));

	qi_start=qi;
	if(qri==_qinfo->qi_ranges[qti][pli].nranges)
	  qi_end=qi+1;
	else 
	  qi_end=qi+_qinfo->qi_ranges[qti][pli].sizes[qri];
	
	/* Iterate over quality indicies in this range */
	for(;;){
	  
	  /*In the original VP3.2 code, the rounding offset and the size of the
	    dead zone around 0 were controlled by a "sharpness" parameter.
	    The size of our dead zone is now controlled by the per-coefficient
	    quality thresholds returned by our HVS module.
	    We round down from a more accurate value when the quality of the
	    reconstruction does not fall below our threshold and it saves bits.
	    Hence, all of that VP3.2 code is gone from here, and the remaining
	    floating point code has been implemented as equivalent integer code
	    with exact precision.*/

	  /* for postprocess, not dequant */
	  if(_pp_dc_scale!=NULL)
	    _pp_dc_scale[qi]=(int)((ogg_uint32_t)_qinfo->dc_scale[qi]*base[0]/160);

	  /*Scale DC the coefficient from the proper table.*/
	  q=((ogg_uint32_t)_qinfo->dc_scale[qi]*base[0]/100)<<2;
	  q=OC_CLAMPI(OC_DC_QUANT_MIN[qti],q,OC_QUANT_MAX);
	  stage[qi][0]=(ogg_uint16_t)q;
	  
	  /*Now scale AC coefficients from the proper table.*/
	  for(ci=1;ci<64;ci++){
	    q=((ogg_uint32_t)_qinfo->ac_scale[qi]*base[ci]/100)<<2;
	    q=OC_CLAMPI(OC_AC_QUANT_MIN[qti],q,OC_QUANT_MAX);
	    stage[qi][ci]=(ogg_uint16_t)q;
	  }
	  
	  if(++qi>=qi_end)break;
	  
	  /*Interpolate the next base matrix.*/
	  for(ci=0;ci<64;ci++){
	    base[ci]=(unsigned char)
	      ((2*((qi_end-qi)*_qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+
		   (qi-qi_start)*_qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci])
		+_qinfo->qi_ranges[qti][pli].sizes[qri])/
	       (2*_qinfo->qi_ranges[qti][pli].sizes[qri]));
	  }
	}
      }

      /* Staging matricies complete; commit to memory only if this
	 isn't a duplicate of a preceeding plane. This simple check
	 helps us improve cache coherency later.*/
      {
	int dupe = 0;
	int i,j;
	for(i=0;i<=qti;i++){
	  for(j=0;j<(i<qti?3:pli);j++){
	    if(!memcmp(stage,_dequant[i][j],sizeof(stage))){
	      dupe = 1;
	      break;
	    }
	  }
	  if(dupe)break;
	}
	if(dupe){
	  _dequant[qti][pli]=_dequant[i][j];
	}else{
	  memcpy(_dequant[qti][pli],stage,sizeof(stage));
	}
      }
    }
  }

#ifdef _TH_DEBUG_
  int i, j, k, l;
  /* dump the calculated quantizer tables */
  for(i=0;i<2;i++){
    for(j=0;j<3;j++){
      for(k=0;k<64;k++){
	TH_DEBUG("quantizer table [%s][%s][Q%d] = {",
		 (i==0?"intra":"inter"),(j==0?"Y":(j==1?"U":"V")),k);
	for(l=0;l<64;l++){
	  if((l&7)==0)
	    TH_DEBUG("\n   ");
	  TH_DEBUG("%4d ",_dequant[i][j][k][l]);
	}
	TH_DEBUG("}\n");
      }
    }
  }
#endif

}
示例#3
0
int oc_quant_params_unpack(oggpack_buffer *_opb,
 th_quant_info *_qinfo){
  th_quant_base *base_mats;
  long           val;
  int            nbase_mats;
  int            sizes[64];
  int            indices[64];
  int            nbits;
  int            bmi;
  int            ci;
  int            qti;
  int            pli;
  int            qri;
  int            qi;
  int            i;
  theorapackB_read(_opb,3,&val);
  nbits=(int)val;
  for(qi=0;qi<64;qi++){
    theorapackB_read(_opb,nbits,&val);
    _qinfo->loop_filter_limits[qi]=(unsigned char)val;
  }
  theorapackB_read(_opb,4,&val);
  nbits=(int)val+1;
  for(qi=0;qi<64;qi++){
    theorapackB_read(_opb,nbits,&val);
    _qinfo->ac_scale[qi]=(ogg_uint16_t)val;
  }
  theorapackB_read(_opb,4,&val);
  nbits=(int)val+1;
  for(qi=0;qi<64;qi++){
    theorapackB_read(_opb,nbits,&val);
    _qinfo->dc_scale[qi]=(ogg_uint16_t)val;
  }
  theorapackB_read(_opb,9,&val);
  nbase_mats=(int)val+1;
  base_mats=_ogg_malloc(nbase_mats*sizeof(base_mats[0]));
  for(bmi=0;bmi<nbase_mats;bmi++){
    for(ci=0;ci<64;ci++){
      theorapackB_read(_opb,8,&val);
      base_mats[bmi][ci]=(unsigned char)val;
    }
  }
  nbits=oc_ilog(nbase_mats-1);
  for(i=0;i<6;i++){
    th_quant_ranges *qranges;
    th_quant_base   *qrbms;
    int             *qrsizes;
    qti=i/3;
    pli=i%3;
    qranges=_qinfo->qi_ranges[qti]+pli;
    if(i>0){
      theorapackB_read1(_opb,&val);
      if(!val){
        int qtj;
        int plj;
        if(qti>0){
          theorapackB_read1(_opb,&val);
          if(val){
            qtj=qti-1;
            plj=pli;
          }
          else{
            qtj=(i-1)/3;
            plj=(i-1)%3;
          }
        }
        else{
          qtj=(i-1)/3;
          plj=(i-1)%3;
        }
        *qranges=*(_qinfo->qi_ranges[qtj]+plj);
        continue;
      }
    }
    theorapackB_read(_opb,nbits,&val);
    indices[0]=(int)val;
    for(qi=qri=0;qi<63;){
      theorapackB_read(_opb,oc_ilog(62-qi),&val);
      sizes[qri]=(int)val+1;
      qi+=(int)val+1;
      theorapackB_read(_opb,nbits,&val);
      indices[++qri]=(int)val;
    }
    /*Note: The caller is responsible for cleaning up any partially
       constructed qinfo.*/
    if(qi>63){
      _ogg_free(base_mats);
      return TH_EBADHEADER;
    }
    qranges->nranges=qri;
    qranges->sizes=qrsizes=(int *)_ogg_malloc(qri*sizeof(qrsizes[0]));
    memcpy(qrsizes,sizes,qri*sizeof(qrsizes[0]));
    qrbms=(th_quant_base *)_ogg_malloc((qri+1)*sizeof(qrbms[0]));
    qranges->base_matrices=(const th_quant_base *)qrbms;
    do{
      bmi=indices[qri];
      /*Note: The caller is responsible for cleaning up any partially
         constructed qinfo.*/
      if(bmi>=nbase_mats){
        _ogg_free(base_mats);
        return TH_EBADHEADER;
      }
      memcpy(qrbms[qri],base_mats[bmi],sizeof(qrbms[qri]));
    }
    while(qri-->0);
  }

#ifdef _TH_DEBUG_
  /* dump the tables */
  {
    int i, j, k, l, m;
    TH_DEBUG("loop filter limits = {");
    for(i=0;i<64;){
      TH_DEBUG("\n        ");
      for(j=0;j<16;i++,j++)
	TH_DEBUG("%3d ",_qinfo->loop_filter_limits[i]);
    }
    TH_DEBUG("\n}\n\n");

    TH_DEBUG("ac scale = {");
    for(i=0;i<64;){
      TH_DEBUG("\n        ");
      for(j=0;j<16;i++,j++)
	TH_DEBUG("%3d ",_qinfo->ac_scale[i]);
    }
    TH_DEBUG("\n}\n\n");

    TH_DEBUG("dc scale = {");
    for(i=0;i<64;){
      TH_DEBUG("\n        ");
      for(j=0;j<16;i++,j++)
	TH_DEBUG("%3d ",_qinfo->dc_scale[i]);
    }
    TH_DEBUG("\n}\n\n");

    for(k=0;k<2;k++)
      for(l=0;l<3;l++){
	char *name[2][3]={
	  {"intra Y bases","intra U bases", "intra V bases"},
	  {"inter Y bases","inter U bases", "inter V bases"}
	};

	th_quant_ranges *r = &_qinfo->qi_ranges[k][l];
	TH_DEBUG("%s = {\n",name[k][l]);
	TH_DEBUG("        ranges = %d\n",r->nranges);
	TH_DEBUG("        intervals = { ");
	for(i=0;i<r->nranges;i++)
	  TH_DEBUG("%3d ",r->sizes[i]);
	TH_DEBUG("}\n");
	TH_DEBUG("\n        matricies = { ");
	for(m=0;m<r->nranges+1;m++){
	  TH_DEBUG("\n          { ");
	  for(i=0;i<64;){
	    TH_DEBUG("\n            ");
	    for(j=0;j<8;i++,j++)
	      TH_DEBUG("%3d ",r->base_matrices[m][i]);
	  }
	  TH_DEBUG("\n          }");
	}
	TH_DEBUG("\n        }\n");
      }
  }
    
#endif

  _ogg_free(base_mats);
  return 0;
}
示例#4
0
ogg_uint32_t oc_cpu_flags_get(void){
  ogg_uint32_t flags = 0;
  ogg_uint32_t eax;
  ogg_uint32_t ebx;
  ogg_uint32_t ecx;
  ogg_uint32_t edx;

# if !defined(_MSC_VER) && !defined(__amd64__) && !defined(__x86_64__)
  /* check for cpuid */
  __asm__ __volatile__(
   "pushfl\n\t"
   "pushfl\n\t"
   "popl          %0\n\t"
   "movl          %0,%1\n\t"
   "xorl   $0x200000,%0\n\t"
   "pushl         %0\n\t"
   "popfl\n\t"
   "pushfl\n\t"
   "popl          %0\n\t"
   "popfl\n\t"
   :"=r" (eax),
    "=r" (ebx)
   :
   :"cc"
  );
  /*No cpuid.*/
  if(eax==ebx)return 0;
# endif /* GCC, x86_32 */

  cpuid(0,eax,ebx,ecx,edx);
  if(ebx==0x756e6547&&edx==0x49656e69&&ecx==0x6c65746e){
    /*Intel:*/
inteltest:
    cpuid(1,eax,ebx,ecx,edx);
    if((edx&0x00800000)==0)return 0;
    flags=OC_CPU_X86_MMX;
    if(edx&0x02000000)flags|=OC_CPU_X86_MMXEXT|OC_CPU_X86_SSE;
    if(edx&0x04000000)flags|=OC_CPU_X86_SSE2;
  }
  else if(ebx==0x68747541&&edx==0x69746e65&&ecx==0x444d4163 ||
          ebx==0x646f6547&&edx==0x79622065&&ecx==0x43534e20){
    /*AMD:*/
    /*Geode:*/
    cpuid(0x80000000,eax,ebx,ecx,edx);
    if(eax<0x80000001)goto inteltest;
    cpuid(0x80000001,eax,ebx,ecx,edx);
    if((edx&0x00800000)==0)return 0;
    flags=OC_CPU_X86_MMX;
    if(edx&0x80000000)flags|=OC_CPU_X86_3DNOW;
    if(edx&0x40000000)flags|=OC_CPU_X86_3DNOWEXT;
    if(edx&0x00400000)flags|=OC_CPU_X86_MMXEXT;
  }
  else{
    /*Implement me.*/
    flags=0;
  }

# ifdef DEBUG
  if (flags) {
    TH_DEBUG("vectorized instruction sets supported:");
    if (flags & OC_CPU_X86_MMX)      TH_DEBUG(" mmx");
    if (flags & OC_CPU_X86_MMXEXT)   TH_DEBUG(" mmxext");
    if (flags & OC_CPU_X86_SSE)      TH_DEBUG(" sse");
    if (flags & OC_CPU_X86_SSE2)     TH_DEBUG(" sse2");
    if (flags & OC_CPU_X86_3DNOW)    TH_DEBUG(" 3dnow");
    if (flags & OC_CPU_X86_3DNOWEXT) TH_DEBUG(" 3dnowext");
    TH_DEBUG("\n");
  }
# endif

  return flags;
}