void imdct_half_fix_c(MDCTContext_fix *s, FFTSample_fix *output, const FFTSample_fix *input) { //PMON_ON(qmf); int k, n8, n4, n2, n, j,j1; const FFTSample_fix *in1, *in2; const unsigned short *revtab = s->fft.revtab; const FFTSample_fix *tcos = s->tcos; const FFTSample_fix *tsin = s->tsin; FFTComplex_fix *z = (FFTComplex_fix *)output; n = 1 << s->nbits;//64 n2 = n >> 1;//32 n4 = n >> 2;//16 n8 = n >> 3;//8 /* pre rotation */ in1 = input; //head in2 = input + n2 - 1;//tail for(k = 0; k < n8; k++) { #if 0 j=revtab[k]; FFT_CMUL_fix(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]); in1 += 2; in2 -= 2; #else FFTSample_fix _are,_bre,_aim,_bim,are,aim; _are = *in2; _bre = tcos[k]; _aim = *in1; _bim = tsin[k]; j=revtab[k]; n=n4-k-1; j1=revtab[n]; S32MUL(xr1,xr2, _are, _bre); S32MUL(xr3, xr4, _are, _bim); in2--; are = *in2; S32MUL(xr7,xr8, are, _bre); S32MUL(xr9, xr10, are, _bim); S32MSUB(xr1, xr2, _aim, _bim); S32MADD(xr3, xr4, _aim, _bre); ; in1++; aim = *in1; D32SLL(xr5,xr1,xr3,xr6,1); S32MSUB(xr7, xr8, aim, _bim); S32MADD(xr9, xr10, aim, _bre); z[j].re=S32M2I(xr5); D32SLL(xr11,xr7,xr9,xr12,1); z[j].im=S32M2I(xr6); in1++; in2--; z[j1].re=S32M2I(xr11); z[j1].im=S32M2I(xr12); #endif } s->fft.fft_calc(&s->fft, z); /* post rotation + reordering */ /* XXX: optimize */ for(k = 0; k < n8; k++) { FFTSample_fix r0, i0, r1, i1; FFT_CMUL_fix(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]); FFT_CMUL_fix(r1, i0, z[n8+k ].im, z[n8+k ].re, tsin[n8+k ], tcos[n8+k ]); z[n8-k-1].re = r0; z[n8-k-1].im = i0; z[n8+k ].re = r1; z[n8+k ].im = i1; } //PMON_OFF(qmf); }
void fft_calc_fix_inverse(FFTContext_fix *s, FFTComplex_fix *z) { int ln = s->nbits; int j, np, np2; int nblocks, nloops; register FFTComplex_fix *p, *q; FFTComplex_fix *exptab = s->exptab; int l; FFTSample_fix tmp_re, tmp_im; np = 1 << ln; /* function is :butterfly all 4 step ,N=16 */ /* pass 0 */ #if 0 p=&z[0]; j=(np >> 1); do { /* X(k) = G(k)+H(k)*W (= e j*0) */ FFT_BF_fix(p[0].re, p[0].im, p[1].re, p[1].im, p[0].re, p[0].im, p[1].re, p[1].im); p+=2; } while (--j); #endif /* pass 1 */ p=&z[0]; j=np >> 2; do { #if 1 S32LDD(xr1,p,0); S32LDD(xr2,p,4); S32LDD(xr3,p,8); S32LDD(xr4,p,12); S32LDD(xr5,p,16); S32LDD(xr6,p,20); S32LDD(xr7,p,24); S32LDD(xr8,p,28); D32ADD_AS(xr1,xr1,xr3,xr3); D32ADD_AS(xr2,xr2,xr4,xr4); D32ADD_AS(xr5,xr5,xr7,xr7); D32ADD_AS(xr6,xr6,xr8,xr8); D32ADD_AS(xr1,xr1,xr5,xr5); D32ADD_AS(xr2,xr2,xr6,xr6); D32ADD_SA(xr3,xr3,xr8,xr9); D32ADD_AS(xr4,xr4,xr7,xr8); S32STD(xr1,p,0); S32STD(xr2,p,4); S32STD(xr3,p,8); S32STD(xr4,p,12); S32STD(xr5,p,16); S32STD(xr6,p,20); S32STD(xr9,p,24); S32STD(xr8,p,28); #else FFT_BF_fix(p[0].re, p[0].im, p[1].re, p[1].im, p[0].re, p[0].im, p[1].re, p[1].im); FFT_BF_fix(p[2].re, p[2].im, p[3].re, p[3].im, p[2].re, p[2].im, p[3].re, p[3].im); FFT_BF_fix(p[0].re, p[0].im, p[2].re, p[2].im, p[0].re, p[0].im, p[2].re, p[2].im); FFT_BF_fix(p[1].re, p[1].im, p[3].re, p[3].im, p[1].re, p[1].im, -p[3].im, p[3].re); #endif p+=4; } while (--j); /* pass 2 .. ln-1 */ nblocks = np >> 3; nloops = 1 << 2; np2 = np >> 1; do { p = z; q = z + nloops; for (j = 0; j < nblocks; ++j) { #if 1 S32LDD(xr1,p,0); S32LDD(xr2,p,4); S32LDD(xr3,q,0); S32LDD(xr4,q,4); D32ADD_AS(xr1,xr1,xr3,xr3); D32ADD_AS(xr2,xr2,xr4,xr4); S32STD(xr1,p,0); S32STD(xr2,p,4); S32STD(xr3,q,0); S32STD(xr4,q,4); #else FFT_BF_fix(p->re, p->im, q->re, q->im, p->re, p->im, q->re, q->im); #endif p++; q++; for(l = nblocks; l < np2; l += nblocks) { /* FFT_CMUL_fix( ) fuction is : (-j 2*PI/N *km) H(i) * E */ #if 1 FFTSample_fix _are = exptab[l].re; FFTSample_fix _bre = q->re; FFTSample_fix _aim = exptab[l].im; FFTSample_fix _bim = q->im; S32MUL(xr1, xr2, _are, _bre); S32MUL(xr5, xr6, _are, _bim); S32LDD(xr7,p,0); S32MSUB(xr1, xr2, _aim, _bim); S32MADD(xr5, xr6, _aim, _bre); S32LDD(xr8,p,4); D32SLL(xr1, xr1, xr5, xr5, 1); D32ADD_AS(xr7,xr7,xr1,xr1); D32ADD_AS(xr8,xr8,xr5,xr5); S32STD(xr7,p,0); S32STD(xr8,p,4); S32STD(xr1,q,0); S32STD(xr5,q,4); #else FFT_CMUL_fix(tmp_re, tmp_im, exptab[l].re, exptab[l].im, q->re, q->im); FFT_BF_fix(p->re, p->im, q->re, q->im, p->re, p->im, tmp_re, tmp_im); #endif p++; q++; } p += nloops; q += nloops; } nblocks = nblocks >> 1; nloops = nloops << 1; } while (nblocks); }