static inline void TRANSFORM(FFTComplex * z, unsigned int n, FFTSample wre, FFTSample wim) { register FFTSample t1,t2,t5,t6,r_re,r_im; r_re = z[n*2].re; r_im = z[n*2].im; XPROD31_R(r_re, r_im, wre, wim, t1,t2); r_re = z[n*3].re; r_im = z[n*3].im; XNPROD31_R(r_re, r_im, wre, wim, t5,t6); BUTTERFLIES(z[0],z[n],z[n*2],z[n*3]); }
void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input) { int n8, n4, n2, n, j; const fixed32 *in1, *in2; n = 1 << nbits; n2 = n >> 1; n4 = n >> 2; n8 = n >> 3; FFTComplex *z = (FFTComplex *)output; /* pre rotation */ in1 = input; in2 = input + n2 - 1; /* revtab comes from the fft; revtab table is sized for N=4096 size fft = 2^12. The fft is size N/4 so s->nbits-2, so our shift needs to be (12-(nbits-2)) */ const int revtab_shift = (14- nbits); /* bitreverse reorder the input and rotate; result here is in OUTPUT ... */ /* (note that when using the current split radix, the bitreverse ordering is complex, meaning that this reordering cannot easily be done in-place) */ /* Using the following pdf, you can see that it is possible to rearrange the 'classic' pre/post rotate with an alternative one that enables us to use fewer distinct twiddle factors. http://www.eurasip.org/Proceedings/Eusipco/Eusipco2006/papers/1568980508.pdf For prerotation, the factors are just sin,cos(2PI*i/N) For postrotation, the factors are sin,cos(2PI*(i+1/4)/N) Therefore, prerotation can immediately reuse the same twiddles as fft (for postrotation it's still a bit complex, we reuse the fft trig tables where we can, or a special table for N=2048, or interpolate between trig tables for N>2048) */ const int32_t *T = sincos_lookup0; const int step = 2<<(12-nbits); const uint16_t * p_revtab=revtab; { const uint16_t * const p_revtab_end = p_revtab + n8; while(LIKELY(p_revtab < p_revtab_end)) { j = (*p_revtab)>>revtab_shift; XNPROD31(*in2, *in1, T[1], T[0], &z[j].re, &z[j].im ); T += step; in1 += 2; in2 -= 2; p_revtab++; j = (*p_revtab)>>revtab_shift; XNPROD31(*in2, *in1, T[1], T[0], &z[j].re, &z[j].im ); T += step; in1 += 2; in2 -= 2; p_revtab++; } } { const uint16_t * const p_revtab_end = p_revtab + n8; while(LIKELY(p_revtab < p_revtab_end)) { j = (*p_revtab)>>revtab_shift; XNPROD31(*in2, *in1, T[0], T[1], &z[j].re, &z[j].im); T -= step; in1 += 2; in2 -= 2; p_revtab++; j = (*p_revtab)>>revtab_shift; XNPROD31(*in2, *in1, T[0], T[1], &z[j].re, &z[j].im); T -= step; in1 += 2; in2 -= 2; p_revtab++; } } /* ... and so fft runs in OUTPUT buffer */ ff_fft_calc_c(nbits-2, z); /* post rotation + reordering. now keeps the result within the OUTPUT buffer */ switch( nbits ) { default: { fixed32 * z1 = (fixed32 *)(&z[0]); fixed32 * z2 = (fixed32 *)(&z[n4-1]); int magic_step = step>>2; int newstep; if(n<=1024) { T = sincos_lookup0 + magic_step; newstep = step>>1; } else { T = sincos_lookup1; newstep = 2; } while(z1<z2) { fixed32 r0,i0,r1,i1; XNPROD31_R(z1[1], z1[0], T[0], T[1], r0, i1 ); T+=newstep; XNPROD31_R(z2[1], z2[0], T[1], T[0], r1, i0 ); T+=newstep; z1[0] = -r0; z1[1] = -i0; z2[0] = -r1; z2[1] = -i1; z1+=2; z2-=2; } break; }