static inline void ifft4(buf) { uint32_t tm4=(uint32_t)(buf); S32LDI(xr1, tm4, 0); S32LDI(xr2, tm4, 8); S32LDI(xr3, tm4, 8); S32LDI(xr4, tm4, 8); D32ADD_AS(xr5, xr1, xr2, xr6); D32ADD_AS(xr7, xr4, xr3, xr8); D32ADD_AS(xr9, xr5, xr7, xr10); S32SDI(xr10,tm4,-8); S32SDI(xr9,tm4,-16); S32LDI(xr1, tm4, 4); S32LDI(xr2, tm4, 8); S32LDI(xr3, tm4, 8); S32LDI(xr4, tm4, 8); D32ADD_AS(xr5, xr1, xr2, xr9); D32ADD_AS(xr7, xr3, xr4, xr10); D32ADD_AS(xr1,xr5,xr7,xr2); D32ADD_AS(xr11,xr6,xr10,xr12); D32ADD_AS(xr13,xr9,xr8,xr14); S32SDI(xr14,tm4,0); S32SDI(xr12,tm4,-4); S32SDI(xr2, tm4,-4); S32SDI(xr13,tm4,-8); S32SDI(xr11,tm4,-4); S32SDI(xr1, tm4,-4); }
static inline void ifft2(buf) { S32LDD(xr1, buf, 0); S32LDD(xr3, buf, 8); S32LDD(xr2, buf, 4); S32LDD(xr4, buf, 12); D32ADD_AS(xr5, xr1, xr3, xr7); D32ADD_AS(xr6, xr2, xr4, xr8); S32STD(xr5, buf, 0); S32STD(xr7, buf, 8); S32STD(xr6, buf, 4); S32STD(xr8, buf, 12); }
void fft_calc_fix_inverse(FFTContext_fix *s, FFTComplex_fix *z) { int ln = s->nbits; int j, np, np2; int nblocks, nloops; register FFTComplex_fix *p, *q; FFTComplex_fix *exptab = s->exptab; int l; FFTSample_fix tmp_re, tmp_im; np = 1 << ln; /* function is :butterfly all 4 step ,N=16 */ /* pass 0 */ #if 0 p=&z[0]; j=(np >> 1); do { /* X(k) = G(k)+H(k)*W (= e j*0) */ FFT_BF_fix(p[0].re, p[0].im, p[1].re, p[1].im, p[0].re, p[0].im, p[1].re, p[1].im); p+=2; } while (--j); #endif /* pass 1 */ p=&z[0]; j=np >> 2; do { #if 1 S32LDD(xr1,p,0); S32LDD(xr2,p,4); S32LDD(xr3,p,8); S32LDD(xr4,p,12); S32LDD(xr5,p,16); S32LDD(xr6,p,20); S32LDD(xr7,p,24); S32LDD(xr8,p,28); D32ADD_AS(xr1,xr1,xr3,xr3); D32ADD_AS(xr2,xr2,xr4,xr4); D32ADD_AS(xr5,xr5,xr7,xr7); D32ADD_AS(xr6,xr6,xr8,xr8); D32ADD_AS(xr1,xr1,xr5,xr5); D32ADD_AS(xr2,xr2,xr6,xr6); D32ADD_SA(xr3,xr3,xr8,xr9); D32ADD_AS(xr4,xr4,xr7,xr8); S32STD(xr1,p,0); S32STD(xr2,p,4); S32STD(xr3,p,8); S32STD(xr4,p,12); S32STD(xr5,p,16); S32STD(xr6,p,20); S32STD(xr9,p,24); S32STD(xr8,p,28); #else FFT_BF_fix(p[0].re, p[0].im, p[1].re, p[1].im, p[0].re, p[0].im, p[1].re, p[1].im); FFT_BF_fix(p[2].re, p[2].im, p[3].re, p[3].im, p[2].re, p[2].im, p[3].re, p[3].im); FFT_BF_fix(p[0].re, p[0].im, p[2].re, p[2].im, p[0].re, p[0].im, p[2].re, p[2].im); FFT_BF_fix(p[1].re, p[1].im, p[3].re, p[3].im, p[1].re, p[1].im, -p[3].im, p[3].re); #endif p+=4; } while (--j); /* pass 2 .. ln-1 */ nblocks = np >> 3; nloops = 1 << 2; np2 = np >> 1; do { p = z; q = z + nloops; for (j = 0; j < nblocks; ++j) { #if 1 S32LDD(xr1,p,0); S32LDD(xr2,p,4); S32LDD(xr3,q,0); S32LDD(xr4,q,4); D32ADD_AS(xr1,xr1,xr3,xr3); D32ADD_AS(xr2,xr2,xr4,xr4); S32STD(xr1,p,0); S32STD(xr2,p,4); S32STD(xr3,q,0); S32STD(xr4,q,4); #else FFT_BF_fix(p->re, p->im, q->re, q->im, p->re, p->im, q->re, q->im); #endif p++; q++; for(l = nblocks; l < np2; l += nblocks) { /* FFT_CMUL_fix( ) fuction is : (-j 2*PI/N *km) H(i) * E */ #if 1 FFTSample_fix _are = exptab[l].re; FFTSample_fix _bre = q->re; FFTSample_fix _aim = exptab[l].im; FFTSample_fix _bim = q->im; S32MUL(xr1, xr2, _are, _bre); S32MUL(xr5, xr6, _are, _bim); S32LDD(xr7,p,0); S32MSUB(xr1, xr2, _aim, _bim); S32MADD(xr5, xr6, _aim, _bre); S32LDD(xr8,p,4); D32SLL(xr1, xr1, xr5, xr5, 1); D32ADD_AS(xr7,xr7,xr1,xr1); D32ADD_AS(xr8,xr8,xr5,xr5); S32STD(xr7,p,0); S32STD(xr8,p,4); S32STD(xr1,q,0); S32STD(xr5,q,4); #else FFT_CMUL_fix(tmp_re, tmp_im, exptab[l].re, exptab[l].im, q->re, q->im); FFT_BF_fix(p->re, p->im, q->re, q->im, p->re, p->im, tmp_re, tmp_im); #endif p++; q++; } p += nloops; q += nloops; } nblocks = nblocks >> 1; nloops = nloops << 1; } while (nblocks); }