C++ (Cpp) S32MUL Exemples

Exemple #1

0

Afficher le fichier

Fichier : dequant_mpeg.c Projet : DanielGit/Intrisit201202

uint32_t
dequant_mpeg_intra_mxu(int16_t * data,
		       // const int16_t * coeff,
					 const uint32_t quant,
					 const uint32_t dcscalar,
					 const uint16_t * mpeg_quant_matrices)
{
	const uint16_t *intra_matrix = mpeg_quant_matrices;
	int32_t i = 0;
	/* deal with data[0] then save to xr6  */
	
	S32I2M(xr3,-2048);
	S32I2M(xr4,2047);
    	S32I2M(xr5,quant);

	S32MUL(xr0,xr6,(int32_t)data[0],dcscalar);
	S32LUI(xr9,1,0);
	D16MUL_WW(xr0,xr6,xr9,xr6);
	S32MIN(xr6,xr6,xr4);
	S32MAX(xr6,xr6,xr3);

       	data-=2;
	intra_matrix-=2;
	

	for (i = 0; i < 32; i++) {
	    S32LDI(xr1,data,4);
	    S32LDI(xr2,intra_matrix,4);

	    D16MUL_LW(xr13,xr9,xr1,xr14); // resave values of data[i] and data[i+1] 
	    D16CPS(xr1,xr1,xr1);         

	    /* abs(level) *( intra_matrix[i]*quant) >> 3   */
	    D16MUL_LW(xr7,xr5,xr2,xr8);
	    S32SFL(xr15,xr7,xr8,xr2,3);
	    D16MUL_WW(xr7,xr1,xr2,xr8);
	    D32SLR(xr7,xr7,xr8,xr8,3); 
	   
	    /* -2048 < data[i+1] < 2047  */
	    S32CPS(xr7,xr7,xr13);
	    S32MAX(xr10,xr7,xr3);
	    S32MIN(xr10,xr10,xr4);

            /* -2048 < data[i] < 2047  */
	    S32CPS(xr8,xr8,xr14);
	    S32MAX(xr11,xr8,xr3);
	    S32MIN(xr11,xr11,xr4);

            S32SFL(xr0,xr10,xr11,xr12,3);
	    S32STD(xr12,data,0);	    
        } 
 
	S16STD(xr6,data,-62*2,0);//xr6 to data[0]
	return(0);
}

Exemple #2

0

Afficher le fichier

Fichier : mdct_fix.c Projet : DanielGit/Intrisit8000

void imdct_half_fix_c(MDCTContext_fix *s, FFTSample_fix *output,
                      const FFTSample_fix *input)
{
    //PMON_ON(qmf);
    int k, n8, n4, n2, n, j,j1;
    const FFTSample_fix *in1, *in2;
    const unsigned short *revtab = s->fft.revtab;
    const FFTSample_fix *tcos = s->tcos;
    const FFTSample_fix *tsin = s->tsin;
    FFTComplex_fix *z = (FFTComplex_fix *)output;

    n = 1 << s->nbits;//64
    n2 = n >> 1;//32
    n4 = n >> 2;//16
    n8 = n >> 3;//8
    /* pre rotation */
    in1 = input; //head
    in2 = input + n2 - 1;//tail
    for(k = 0; k < n8; k++) {
#if 0
        j=revtab[k];
        FFT_CMUL_fix(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]);
        in1 += 2;
        in2 -= 2;
#else
        FFTSample_fix _are,_bre,_aim,_bim,are,aim;
        _are = *in2;
        _bre = tcos[k];
        _aim = *in1;
        _bim = tsin[k];
        j=revtab[k];
        n=n4-k-1;
        j1=revtab[n];
        S32MUL(xr1,xr2, _are, _bre);
        S32MUL(xr3, xr4, _are, _bim);
        in2--;
        are = *in2;
        S32MUL(xr7,xr8, are, _bre);
        S32MUL(xr9, xr10, are, _bim);
        S32MSUB(xr1, xr2, _aim, _bim);
        S32MADD(xr3, xr4, _aim, _bre);        ;
        in1++;
        aim = *in1;
        D32SLL(xr5,xr1,xr3,xr6,1);
        S32MSUB(xr7, xr8, aim, _bim);
        S32MADD(xr9, xr10, aim, _bre);
        z[j].re=S32M2I(xr5);
        D32SLL(xr11,xr7,xr9,xr12,1);
        z[j].im=S32M2I(xr6);
        in1++;
        in2--;
        z[j1].re=S32M2I(xr11);
        z[j1].im=S32M2I(xr12);
#endif
    }

    s->fft.fft_calc(&s->fft, z);

    /* post rotation + reordering */
    /* XXX: optimize */
    for(k = 0; k < n8; k++) {
        FFTSample_fix r0, i0, r1, i1;
        FFT_CMUL_fix(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]);
        FFT_CMUL_fix(r1, i0, z[n8+k  ].im, z[n8+k  ].re, tsin[n8+k  ], tcos[n8+k  ]);
        z[n8-k-1].re = r0;
        z[n8-k-1].im = i0;
        z[n8+k  ].re = r1;
        z[n8+k  ].im = i1;
    }
    //PMON_OFF(qmf);
}

Exemple #3

0

Afficher le fichier

void fft_calc_fix_inverse(FFTContext_fix *s, FFTComplex_fix *z)
{
    int ln = s->nbits;
    int j, np, np2;
    int nblocks, nloops;
    register FFTComplex_fix *p, *q;
    FFTComplex_fix *exptab = s->exptab;
    int l;
    FFTSample_fix tmp_re, tmp_im;
    np = 1 << ln;
      /* function is :butterfly  all 4 step ,N=16 */
    /* pass 0 */
#if 0
    p=&z[0];
    j=(np >> 1);
    do {
      /*
	 X(k) = G(k)+H(k)*W  (= e j*0)
      */
        FFT_BF_fix(p[0].re, p[0].im, p[1].re, p[1].im,
           p[0].re, p[0].im, p[1].re, p[1].im);

        p+=2;
    } while (--j);
#endif

    /* pass 1 */
    p=&z[0];
    j=np >> 2;
    do {
#if 1
      S32LDD(xr1,p,0);
      S32LDD(xr2,p,4);
      S32LDD(xr3,p,8);
      S32LDD(xr4,p,12);
      S32LDD(xr5,p,16);
      S32LDD(xr6,p,20);
      S32LDD(xr7,p,24);
      S32LDD(xr8,p,28);
      D32ADD_AS(xr1,xr1,xr3,xr3);
      D32ADD_AS(xr2,xr2,xr4,xr4);
      D32ADD_AS(xr5,xr5,xr7,xr7);
      D32ADD_AS(xr6,xr6,xr8,xr8);
      D32ADD_AS(xr1,xr1,xr5,xr5);
      D32ADD_AS(xr2,xr2,xr6,xr6);
      D32ADD_SA(xr3,xr3,xr8,xr9);
      D32ADD_AS(xr4,xr4,xr7,xr8);
      S32STD(xr1,p,0);
      S32STD(xr2,p,4);
      S32STD(xr3,p,8);
      S32STD(xr4,p,12);
      S32STD(xr5,p,16);
      S32STD(xr6,p,20);
      S32STD(xr9,p,24);
      S32STD(xr8,p,28);
#else
      FFT_BF_fix(p[0].re, p[0].im, p[1].re, p[1].im,
		 p[0].re, p[0].im, p[1].re, p[1].im);
      FFT_BF_fix(p[2].re, p[2].im, p[3].re, p[3].im,
		 p[2].re, p[2].im, p[3].re, p[3].im);

      FFT_BF_fix(p[0].re, p[0].im, p[2].re, p[2].im,
		 p[0].re, p[0].im, p[2].re, p[2].im);
      FFT_BF_fix(p[1].re, p[1].im, p[3].re, p[3].im,
		 p[1].re, p[1].im, -p[3].im, p[3].re);
#endif
      p+=4;
    } while (--j);

    /* pass 2 .. ln-1 */
    nblocks = np >> 3;
    nloops = 1 << 2;
    np2 = np >> 1;
    do {
        p = z;
        q = z + nloops;
        for (j = 0; j < nblocks; ++j) {
#if 1
	  S32LDD(xr1,p,0);
	  S32LDD(xr2,p,4);
	  S32LDD(xr3,q,0);
	  S32LDD(xr4,q,4);
	  D32ADD_AS(xr1,xr1,xr3,xr3);
	  D32ADD_AS(xr2,xr2,xr4,xr4);
	  S32STD(xr1,p,0);
	  S32STD(xr2,p,4);
	  S32STD(xr3,q,0);
	  S32STD(xr4,q,4);
#else
	  FFT_BF_fix(p->re, p->im, q->re, q->im,
		     p->re, p->im, q->re, q->im);
#endif

	  p++;
	  q++;
	  for(l = nblocks; l < np2; l += nblocks) {
	    /* FFT_CMUL_fix( ) fuction is :
	       
	    (-j 2*PI/N *km)
	    H(i) * E
	    */
#if 1
	    FFTSample_fix _are = exptab[l].re;
	    FFTSample_fix _bre = q->re;
	    FFTSample_fix _aim = exptab[l].im;
	    FFTSample_fix _bim = q->im;

	    S32MUL(xr1, xr2, _are, _bre);	    
            S32MUL(xr5, xr6, _are, _bim);
	    S32LDD(xr7,p,0);	    
            S32MSUB(xr1, xr2, _aim, _bim);	
	    S32MADD(xr5, xr6, _aim, _bre);	
	    S32LDD(xr8,p,4);
	    D32SLL(xr1, xr1, xr5, xr5, 1);	

	    D32ADD_AS(xr7,xr7,xr1,xr1);
	    D32ADD_AS(xr8,xr8,xr5,xr5);
	    S32STD(xr7,p,0);
	    S32STD(xr8,p,4);
	    S32STD(xr1,q,0);
	    S32STD(xr5,q,4);

#else
	    FFT_CMUL_fix(tmp_re, tmp_im, exptab[l].re, exptab[l].im, q->re, q->im);
	    FFT_BF_fix(p->re, p->im, q->re, q->im,
		       p->re, p->im, tmp_re, tmp_im);
#endif
	    p++;
	    q++;
	  }
	  p += nloops;
	  q += nloops;
        }
        nblocks = nblocks >> 1;
        nloops = nloops << 1;
    } while (nblocks);
}

Exemple #4

0

Afficher le fichier

Fichier : dequant_h263.c Projet : DanielGit/Intrisit201202

uint32_t
dequant_h263_intra_mxu(int16_t * data,	uint8_t yuv_len,				
					 const uint32_t quant,
					 const uint32_t dcscalar,
					 const uint16_t * mpeg_quant_matrices)
{     
 	uint32_t i = 0; 
	
	S32LUI(xr9,1,0);
	S32I2M(xr1,quant);
	
	D32SLL(xr5,xr1,xr0,xr0,1);// quant_m_2

	/* quant_add  */
	S32AND(xr15,xr1,xr9);
	S32MOVN(xr2,xr15,xr1);
	D32ADD_SS(xr1,xr1,xr9,xr3);
	S32MOVZ(xr2,xr15,xr1);

	S32I2M(xr3,-2048);
	S32I2M(xr4,2047);

	/* part1 */
	//S32MUL(xr4,xr6,*data,dcscalar);
	S32MUL(xr0,xr6,(int32_t)data[0],dcscalar);
	D16MUL_WW(xr0,xr6,xr9,xr6);

	S32MIN(xr6,xr6,xr4);
	S32MAX(xr6,xr6,xr3);  

	/* part2 */
	yuv_len = ((yuv_len&~1)+3)>>1;
	data-=2;    
	for (i = 0; i < yuv_len; i++) {
	    S32LDI(xr1,data,4);
      
	    D16MUL_LW(xr13,xr9,xr1,xr14);// resave sign of data[i] and data[i+1] 	    
	    D16CPS(xr1,xr1,xr1); 

	    /*  quant_m_2 * acLevel + quant_add */
	    D16MUL_LW(xr7,xr5,xr1,xr8);
            D32ADD_AA(xr7,xr7,xr2,xr0);
	    D32ADD_AA(xr8,xr8,xr2,xr0);

#if 0	   
	    /* -2048 < data[i+1] <2047  */
            S32CPS(xr7,xr7,xr13);
	    S32MAX(xr10,xr7,xr3);
	    S32MIN(xr10,xr10,xr4);
	    S32MOVZ(xr10,xr13,xr13);
	    
	    /* -2048 < data[i] <2047  */
	    S32CPS(xr8,xr8,xr14);
	    S32MAX(xr11,xr8,xr3);
	    S32MIN(xr11,xr11,xr4);
	    S32MOVZ(xr11,xr14,xr14);
#else

	    /* -2048 < data[i+1] <2047  */
	    S32AND(xr7,xr7,xr4);
            S32CPS(xr10,xr7,xr13);
	    S32MOVZ(xr10,xr13,xr13);
	    
	    /* -2048 < data[i] <2047  */
	    S32AND(xr8,xr8,xr4);
	    S32CPS(xr11,xr8,xr14);
	    S32MOVZ(xr11,xr14,xr14);

#endif
	   
            S32SFL(xr0,xr10,xr11,xr12,3);
	   
	       S32STD(xr12,data,0);
        }  
	S16STD(xr6,data-(yuv_len*2-2),0,0);// data[0]

	return(0);
}

Exemple #5

0

Afficher le fichier

Fichier : layer12.c Projet : knone1/hardware-ingenic-xb4780

/*
 * NAME:	layer->II()
 * DESCRIPTION:	decode a single Layer II frame
 */
int mad_layer_II(struct mad_stream *stream, struct mad_frame *frame)
{
  struct mad_header *header = &frame->header;
  struct mad_bitptr start;
  unsigned int index, sblimit, nbal, nch, bound, gr, ch, s, sb;
  unsigned char const *offsets;
  unsigned char allocation[2][32], scfsi[2][32], scalefactor[2][32][3];
  mad_fixed_t samples[3];

  nch = MAD_NCHANNELS(header);

  if (header->flags & MAD_FLAG_LSF_EXT)
    index = 4;
  else if (header->flags & MAD_FLAG_FREEFORMAT)
    goto freeformat;
  else {
    unsigned long bitrate_per_channel;

    bitrate_per_channel = header->bitrate;
    if (nch == 2) {
      bitrate_per_channel /= 2;

# if defined(OPT_STRICT)
      /*
       * ISO/IEC 11172-3 allows only single channel mode for 32, 48, 56, and
       * 80 kbps bitrates in Layer II, but some encoders ignore this
       * restriction. We enforce it if OPT_STRICT is defined.
       */
      if (bitrate_per_channel <= 28000 || bitrate_per_channel == 40000) {
	stream->error = MAD_ERROR_BADMODE;
	return -1;
      }
# endif
    }
    else {  /* nch == 1 */
      if (bitrate_per_channel > 192000 && bitrate_per_channel != 320000) {
	/*
	 * ISO/IEC 11172-3 does not allow single channel mode for 224, 256,
	 * 320, or 384 kbps bitrates in Layer II.
	 */
	stream->error = MAD_ERROR_BADMODE;
	return -1;
      }
    }

    if (bitrate_per_channel <= 48000)
      index = (header->samplerate == 32000) ? 3 : 2;
    else if (bitrate_per_channel <= 80000)
      index = 0;
    else {
    freeformat:
      index = (header->samplerate == 48000) ? 0 : 1;
    }
  }

  sblimit = sbquant_table[index].sblimit;
  offsets = sbquant_table[index].offsets;

  bound = 32;
  if (header->mode == MAD_MODE_JOINT_STEREO) {
    header->flags |= MAD_FLAG_I_STEREO;
    bound = 4 + header->mode_extension * 4;
  }

  if (bound > sblimit)
    bound = sblimit;

  start = stream->ptr;

  /* decode bit allocations */

  for (sb = 0; sb < bound; ++sb) {
    nbal = bitalloc_table[offsets[sb]].nbal;

    for (ch = 0; ch < nch; ++ch)
      allocation[ch][sb] = mad_bit_read(&stream->ptr, nbal);
  }

  for (sb = bound; sb < sblimit; ++sb) {
    nbal = bitalloc_table[offsets[sb]].nbal;

    allocation[0][sb] =
    allocation[1][sb] = mad_bit_read(&stream->ptr, nbal);
  }

  /* decode scalefactor selection info */

  for (sb = 0; sb < sblimit; ++sb) {
    for (ch = 0; ch < nch; ++ch) {
      if (allocation[ch][sb])
	scfsi[ch][sb] = mad_bit_read(&stream->ptr, 2);
    }
  }

  /* check CRC word */

  if (header->flags & MAD_FLAG_PROTECTION) {
    header->crc_check =
      mad_bit_crc(start, mad_bit_length(&start, &stream->ptr),
		  header->crc_check);

    if (header->crc_check != header->crc_target &&
	!(frame->options & MAD_OPTION_IGNORECRC)) {
      stream->error = MAD_ERROR_BADCRC;
      return -1;
    }
  }

  /* decode scalefactors */

  for (sb = 0; sb < sblimit; ++sb) {
    for (ch = 0; ch < nch; ++ch) {
      if (allocation[ch][sb]) {
	scalefactor[ch][sb][0] = mad_bit_read(&stream->ptr, 6);

	switch (scfsi[ch][sb]) {
	case 2:
	  scalefactor[ch][sb][2] =
	  scalefactor[ch][sb][1] =
	  scalefactor[ch][sb][0];
	  break;

	case 0:
	  scalefactor[ch][sb][1] = mad_bit_read(&stream->ptr, 6);
	  /* fall through */

	case 1:
	case 3:
	  scalefactor[ch][sb][2] = mad_bit_read(&stream->ptr, 6);
	}

	if (scfsi[ch][sb] & 1)
	  scalefactor[ch][sb][1] = scalefactor[ch][sb][scfsi[ch][sb] - 1];

# if defined(OPT_STRICT)
	/*
	 * Scalefactor index 63 does not appear in Table B.1 of
	 * ISO/IEC 11172-3. Nonetheless, other implementations accept it,
	 * so we only reject it if OPT_STRICT is defined.
	 */
	if (scalefactor[ch][sb][0] == 63 ||
	    scalefactor[ch][sb][1] == 63 ||
	    scalefactor[ch][sb][2] == 63) {
	  stream->error = MAD_ERROR_BADSCALEFACTOR;
	  return -1;
	}
# endif
      }
    }
  }

  /* decode samples */

  for (gr = 0; gr < 12; ++gr) {
    for (sb = 0; sb < bound; ++sb) {
      for (ch = 0; ch < nch; ++ch) {
	if ((index = allocation[ch][sb])) {
#ifdef JZ4750_OPT
          mad_fixed_t sf_val;
          mad_fixed_t *sb_ptr;
          sb_ptr = &(frame->sbsample[ch][3*gr-1][sb]);
          sf_val = sf_table[scalefactor[ch][sb][gr/4]];
	  index = offset_table[bitalloc_table[offsets[sb]].offset][index - 1];

	  II_samples(&stream->ptr, &qc_table[index], samples);
          S32MUL(xr1,xr2, samples[0], sf_val);
          S32MUL(xr3,xr4, samples[1], sf_val);
          S32MUL(xr5,xr6, samples[2], sf_val);
          S32EXTR(xr1,xr2,(32 - MAD_F_SCALEBITS), 31);
          S32EXTR(xr3,xr4,(32 - MAD_F_SCALEBITS), 31);
          S32EXTR(xr5,xr6,(32 - MAD_F_SCALEBITS), 31);
          D32SLL(xr1,xr1,xr3,xr3,1);
          D32SLL(xr5,xr5,xr0,xr0,1);
          S32SDIV(xr1, sb_ptr, 32, 2);
          S32SDIV(xr3, sb_ptr, 32, 2);
          S32SDIV(xr5, sb_ptr, 32, 2);
#else
	  index = offset_table[bitalloc_table[offsets[sb]].offset][index - 1];

	  II_samples(&stream->ptr, &qc_table[index], samples);

	  for (s = 0; s < 3; ++s) {
	    frame->sbsample[ch][3 * gr + s][sb] =
	      mad_f_mul(samples[s], sf_table[scalefactor[ch][sb][gr / 4]]);
	  }
#endif
	}
	else {
	  for (s = 0; s < 3; ++s)
	    frame->sbsample[ch][3 * gr + s][sb] = 0;
	}
      }
    }

    for (sb = bound; sb < sblimit; ++sb) {
      if ((index = allocation[0][sb])) {
	index = offset_table[bitalloc_table[offsets[sb]].offset][index - 1];

	II_samples(&stream->ptr, &qc_table[index], samples);

	for (ch = 0; ch < nch; ++ch) {
#ifdef JZ4750_OPT
          mad_fixed_t sf_val;
          mad_fixed_t *sb_ptr;
          sb_ptr = &(frame->sbsample[ch][3*gr-1][sb]);
          sf_val = sf_table[scalefactor[ch][sb][gr/4]];
          S32MUL(xr1,xr2, samples[0], sf_val);
          S32MUL(xr3,xr4, samples[1], sf_val);
          S32MUL(xr5,xr6, samples[2], sf_val);
          S32EXTR(xr1,xr2,(32 - MAD_F_SCALEBITS), 31);
          S32EXTR(xr3,xr4,(32 - MAD_F_SCALEBITS), 31);
          S32EXTR(xr5,xr6,(32 - MAD_F_SCALEBITS), 31);
          D32SLL(xr1,xr1,xr3,xr3,1);
          D32SLL(xr5,xr5,xr0,xr0,1);
          S32SDIV(xr1, sb_ptr, 32, 2);
          S32SDIV(xr3, sb_ptr, 32, 2);
          S32SDIV(xr5, sb_ptr, 32, 2);
#else
	  for (s = 0; s < 3; ++s) {
	    frame->sbsample[ch][3 * gr + s][sb] =
	      mad_f_mul(samples[s], sf_table[scalefactor[ch][sb][gr / 4]]);
	  }
#endif
	}
      }
      else {
	for (ch = 0; ch < nch; ++ch) {
	  for (s = 0; s < 3; ++s)
	    frame->sbsample[ch][3 * gr + s][sb] = 0;
	}
      }
    }

    for (ch = 0; ch < nch; ++ch) {
      for (s = 0; s < 3; ++s) {
	for (sb = sblimit; sb < 32; ++sb)
	  frame->sbsample[ch][3 * gr + s][sb] = 0;
      }
    }
  }

  return 0;
}