int main (void) { #define PRECISION 16 #define FFT_SIZE 1024 #define ftofix32(x) ((fixed32)((x) * (float)(1 << PRECISION) + ((x) < 0 ? -0.5 : 0.5))) #define itofix32(x) ((x) << PRECISION) #define fixtoi32(x) ((x) >> PRECISION) int j; const long N = FFT_SIZE; double r[FFT_SIZE] = {0.0}, i[FFT_SIZE] = {0.0}; long n; double t; double amp, phase; clock_t start, end; double exec_time = 0; FFTContext s; FFTComplex z[FFT_SIZE]; memset(z, 0, 64*sizeof(FFTComplex)); /* Generate saw-tooth test data */ for (n = 0; n < FFT_SIZE; n++) { t = (2 * M_PI * n)/N; /*z[n].re = 1.1 + sin( t) + 0.5 * sin(2.0 * t) + (1.0/3.0) * sin(3.0 * t) + 0.25 * sin(4.0 * t) + 0.2 * sin(5.0 * t) + (1.0/6.0) * sin(6.0 * t) + (1.0/7.0) * sin(7.0 * t) ;*/ z[n].re = ftofix32(cos(2*M_PI*n/64)); //printf("z[%d] = %f\n", n, z[n].re); //getchar(); } ff_fft_init(&s, 10, 1); //start = clock(); //for(n = 0; n < 1000000; n++) ff_fft_permute_c(&s, z); ff_fft_calc_c(&s, z); //end = clock(); //exec_time = (((double)end-(double)start)/CLOCKS_PER_SEC); for(j = 0; j < FFT_SIZE; j++) { printf("%8.4f\n", sqrt(pow(fixtof32(z[j].re),2)+ pow(fixtof32(z[j].im), 2))); //getchar(); } printf("muls = %d, adds = %d\n", muls, adds); //printf(" Time elapsed = %f\n", exec_time); //ff_fft_end(&s); }
void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input) { int n8, n4, n2, n, j; const fixed32 *in1, *in2; n = 1 << nbits; n2 = n >> 1; n4 = n >> 2; n8 = n >> 3; FFTComplex *z = (FFTComplex *)output; /* pre rotation */ in1 = input; in2 = input + n2 - 1; /* revtab comes from the fft; revtab table is sized for N=4096 size fft = 2^12. The fft is size N/4 so s->nbits-2, so our shift needs to be (12-(nbits-2)) */ const int revtab_shift = (14- nbits); /* bitreverse reorder the input and rotate; result here is in OUTPUT ... */ /* (note that when using the current split radix, the bitreverse ordering is complex, meaning that this reordering cannot easily be done in-place) */ /* Using the following pdf, you can see that it is possible to rearrange the 'classic' pre/post rotate with an alternative one that enables us to use fewer distinct twiddle factors. http://www.eurasip.org/Proceedings/Eusipco/Eusipco2006/papers/1568980508.pdf For prerotation, the factors are just sin,cos(2PI*i/N) For postrotation, the factors are sin,cos(2PI*(i+1/4)/N) Therefore, prerotation can immediately reuse the same twiddles as fft (for postrotation it's still a bit complex, we reuse the fft trig tables where we can, or a special table for N=2048, or interpolate between trig tables for N>2048) */ const int32_t *T = sincos_lookup0; const int step = 2<<(12-nbits); const uint16_t * p_revtab=revtab; { const uint16_t * const p_revtab_end = p_revtab + n8; while(LIKELY(p_revtab < p_revtab_end)) { j = (*p_revtab)>>revtab_shift; XNPROD31(*in2, *in1, T[1], T[0], &z[j].re, &z[j].im ); T += step; in1 += 2; in2 -= 2; p_revtab++; j = (*p_revtab)>>revtab_shift; XNPROD31(*in2, *in1, T[1], T[0], &z[j].re, &z[j].im ); T += step; in1 += 2; in2 -= 2; p_revtab++; } } { const uint16_t * const p_revtab_end = p_revtab + n8; while(LIKELY(p_revtab < p_revtab_end)) { j = (*p_revtab)>>revtab_shift; XNPROD31(*in2, *in1, T[0], T[1], &z[j].re, &z[j].im); T -= step; in1 += 2; in2 -= 2; p_revtab++; j = (*p_revtab)>>revtab_shift; XNPROD31(*in2, *in1, T[0], T[1], &z[j].re, &z[j].im); T -= step; in1 += 2; in2 -= 2; p_revtab++; } } /* ... and so fft runs in OUTPUT buffer */ ff_fft_calc_c(nbits-2, z); /* post rotation + reordering. now keeps the result within the OUTPUT buffer */ switch( nbits ) { default: { fixed32 * z1 = (fixed32 *)(&z[0]); fixed32 * z2 = (fixed32 *)(&z[n4-1]); int magic_step = step>>2; int newstep; if(n<=1024) { T = sincos_lookup0 + magic_step; newstep = step>>1; } else { T = sincos_lookup1; newstep = 2; } while(z1<z2) { fixed32 r0,i0,r1,i1; XNPROD31_R(z1[1], z1[0], T[0], T[1], r0, i1 ); T+=newstep; XNPROD31_R(z2[1], z2[0], T[1], T[0], r1, i0 ); T+=newstep; z1[0] = -r0; z1[1] = -i0; z2[0] = -r1; z2[1] = -i1; z1+=2; z2-=2; } break; }