static void kf_bfly4 (kiss_fft_s16_cpx * Fout, const size_t fstride, const kiss_fft_s16_cfg st, const size_t m) { kiss_fft_s16_cpx *tw1, *tw2, *tw3; kiss_fft_s16_cpx scratch[6]; size_t k = m; const size_t m2 = 2 * m; const size_t m3 = 3 * m; tw3 = tw2 = tw1 = st->twiddles; do { C_FIXDIV (*Fout, 4); C_FIXDIV (Fout[m], 4); C_FIXDIV (Fout[m2], 4); C_FIXDIV (Fout[m3], 4); C_MUL (scratch[0], Fout[m], *tw1); C_MUL (scratch[1], Fout[m2], *tw2); C_MUL (scratch[2], Fout[m3], *tw3); C_SUB (scratch[5], *Fout, scratch[1]); C_ADDTO (*Fout, scratch[1]); C_ADD (scratch[3], scratch[0], scratch[2]); C_SUB (scratch[4], scratch[0], scratch[2]); C_SUB (Fout[m2], *Fout, scratch[3]); tw1 += fstride; tw2 += fstride * 2; tw3 += fstride * 3; C_ADDTO (*Fout, scratch[3]); if (st->inverse) { Fout[m].r = scratch[5].r - scratch[4].i; Fout[m].i = scratch[5].i + scratch[4].r; Fout[m3].r = scratch[5].r + scratch[4].i; Fout[m3].i = scratch[5].i - scratch[4].r; } else { Fout[m].r = scratch[5].r + scratch[4].i; Fout[m].i = scratch[5].i - scratch[4].r; Fout[m3].r = scratch[5].r - scratch[4].i; Fout[m3].i = scratch[5].i + scratch[4].r; } ++Fout; } while (--k); }
static void kf_bfly3( kiss_fft_cpx * Fout, const size_t fstride, const kiss_fft_cfg st, size_t m ) { size_t k=m; const size_t m2 = 2*m; kiss_fft_cpx *tw1,*tw2; kiss_fft_cpx scratch[5]; kiss_fft_cpx epi3; epi3 = st->twiddles[fstride*m]; tw1=tw2=st->twiddles; do{ if (!st->inverse) { C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); } C_MUL(scratch[1],Fout[m] , *tw1); C_MUL(scratch[2],Fout[m2] , *tw2); C_ADD(scratch[3],scratch[1],scratch[2]); C_SUB(scratch[0],scratch[1],scratch[2]); tw1 += fstride; tw2 += fstride*2; Fout[m].r = Fout->r - HALF_OF(scratch[3].r); Fout[m].i = Fout->i - HALF_OF(scratch[3].i); C_MULBYSCALAR( scratch[0] , epi3.i ); C_ADDTO(*Fout,scratch[3]); Fout[m2].r = Fout[m].r + scratch[0].i; Fout[m2].i = Fout[m].i - scratch[0].r; Fout[m].r -= scratch[0].i; Fout[m].i += scratch[0].r; ++Fout; }while(--k); }
static void kf_bfly2(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, int m) { kiss_fft_cpx *Fout2; kiss_fft_cpx *tw1 = st->twiddles; kiss_fft_cpx t; Fout2 = Fout + m; do { C_FIXDIV(*Fout, 2); C_FIXDIV(*Fout2, 2); C_MUL(t, *Fout2, *tw1); tw1 += fstride; C_SUB(*Fout2, *Fout, t); C_ADDTO(*Fout, t); ++Fout2; ++Fout; } while (--m); }
/* perform the butterfly for one stage of a mixed radix FFT */ static void kf_bfly_generic( kiss_fft_cpx * Fout, const size_t fstride, const kiss_fft_cfg st, int m, int p ) { int u,k,q1,q; kiss_fft_cpx * twiddles = st->twiddles; kiss_fft_cpx t; kiss_fft_cpx scratchbuf[17]; int Norig = st->nfft; /*CHECKBUF(scratchbuf,nscratchbuf,p);*/ if (p>17) ms_fatal("KissFFT: max radix supported is 17"); for ( u=0; u<m; ++u ) { k=u; for ( q1=0 ; q1<p ; ++q1 ) { scratchbuf[q1] = Fout[ k ]; if (!st->inverse) { C_FIXDIV(scratchbuf[q1],p); } k += m; } k=u; for ( q1=0 ; q1<p ; ++q1 ) { int twidx=0; Fout[ k ] = scratchbuf[0]; for (q=1;q<p;++q ) { twidx += fstride * k; if (twidx>=Norig) twidx-=Norig; C_MUL(t,scratchbuf[q] , twiddles[twidx] ); C_ADDTO( Fout[ k ] ,t); } k += m; } } }
/* perform the butterfly for one stage of a mixed radix FFT */ static void kf_bfly_generic( kiss_fft_cpx * Fout, const size_t fstride, const kiss_fft_cfg st, int m, int p ) { int u,k,q1,q; kiss_fft_cpx * twiddles = st->twiddles; kiss_fft_cpx t; int Norig = st->nfft; kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p); // see http://sourceforge.net/p/kissfft/bugs/9/ #pragma omp critical for ( u=0; u<m; ++u ) { k=u; for ( q1=0 ; q1<p ; ++q1 ) { scratch[q1] = Fout[ k ]; C_FIXDIV(scratch[q1],p); k += m; } k=u; for ( q1=0 ; q1<p ; ++q1 ) { int twidx=0; Fout[ k ] = scratch[0]; for (q=1;q<p;++q ) { twidx += fstride * k; if (twidx>=Norig) twidx-=Norig; C_MUL(t,scratch[q] , twiddles[twidx] ); C_ADDTO( Fout[ k ] ,t); } k += m; } } KISS_FFT_TMP_FREE(scratch); }
/* perform the butterfly for one stage of a mixed radix FFT */ static void kf_bfly_generic( kiss_fft_cpx * Fout, const size_t fstride, const kiss_fft_cfg st, int m, int p ) { int u,k,q1,q; kiss_fft_cpx * twiddles = st->twiddles; kiss_fft_cpx t; int Norig = st->nfft; kiss_fft_cpx *scratchbuf=(kiss_fft_cpx *)malloc( sizeof(kiss_fft_cpx) * p ); for ( u=0; u<m; ++u ) { k=u; for ( q1=0 ; q1<p ; ++q1 ) { scratchbuf[q1] = Fout[ k ]; C_FIXDIV(scratchbuf[q1],p); k += m; } k=u; for ( q1=0 ; q1<p ; ++q1 ) { int twidx=0; Fout[ k ] = scratchbuf[0]; for (q=1; q<p; ++q ) { twidx += fstride * k; if (twidx>=Norig) twidx-=Norig; C_MUL(t,scratchbuf[q] , twiddles[twidx] ); C_ADDTO( Fout[ k ] ,t); } k += m; } } free( scratchbuf ); }
static void kf_bfly5( kiss_fft_cpx * Fout, const size_t fstride, const kiss_fft_cfg st, int m ) { kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; int u; kiss_fft_cpx scratch[13]; kiss_fft_cpx * twiddles = st->twiddles; kiss_fft_cpx *tw; kiss_fft_cpx ya,yb; ya = twiddles[fstride*m]; yb = twiddles[fstride*2*m]; Fout0=Fout; Fout1=Fout0+m; Fout2=Fout0+2*m; Fout3=Fout0+3*m; Fout4=Fout0+4*m; tw=st->twiddles; for ( u=0; u<m; ++u ) { if (!st->inverse) { C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5); } scratch[0] = *Fout0; C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); C_ADD( scratch[7],scratch[1],scratch[4]); C_SUB( scratch[10],scratch[1],scratch[4]); C_ADD( scratch[8],scratch[2],scratch[3]); C_SUB( scratch[9],scratch[2],scratch[3]); Fout0->r += scratch[7].r + scratch[8].r; Fout0->i += scratch[7].i + scratch[8].i; scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i); scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i); C_SUB(*Fout1,scratch[5],scratch[6]); C_ADD(*Fout4,scratch[5],scratch[6]); scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); C_ADD(*Fout2,scratch[11],scratch[12]); C_SUB(*Fout3,scratch[11],scratch[12]); ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; } }