static void kf_bfly4 (kiss_fft_s16_cpx * Fout, const size_t fstride, const kiss_fft_s16_cfg st, const size_t m) { kiss_fft_s16_cpx *tw1, *tw2, *tw3; kiss_fft_s16_cpx scratch[6]; size_t k = m; const size_t m2 = 2 * m; const size_t m3 = 3 * m; tw3 = tw2 = tw1 = st->twiddles; do { C_FIXDIV (*Fout, 4); C_FIXDIV (Fout[m], 4); C_FIXDIV (Fout[m2], 4); C_FIXDIV (Fout[m3], 4); C_MUL (scratch[0], Fout[m], *tw1); C_MUL (scratch[1], Fout[m2], *tw2); C_MUL (scratch[2], Fout[m3], *tw3); C_SUB (scratch[5], *Fout, scratch[1]); C_ADDTO (*Fout, scratch[1]); C_ADD (scratch[3], scratch[0], scratch[2]); C_SUB (scratch[4], scratch[0], scratch[2]); C_SUB (Fout[m2], *Fout, scratch[3]); tw1 += fstride; tw2 += fstride * 2; tw3 += fstride * 3; C_ADDTO (*Fout, scratch[3]); if (st->inverse) { Fout[m].r = scratch[5].r - scratch[4].i; Fout[m].i = scratch[5].i + scratch[4].r; Fout[m3].r = scratch[5].r + scratch[4].i; Fout[m3].i = scratch[5].i - scratch[4].r; } else { Fout[m].r = scratch[5].r + scratch[4].i; Fout[m].i = scratch[5].i - scratch[4].r; Fout[m3].r = scratch[5].r - scratch[4].i; Fout[m3].i = scratch[5].i + scratch[4].r; } ++Fout; } while (--k); }
static void ki_bfly4( kiss_fft_cpx * Fout, const size_t fstride, const kiss_fft_state *st, int m, int N, int mm ) { const kiss_twiddle_cpx *tw1,*tw2,*tw3; kiss_fft_cpx scratch[6]; const size_t m2=2*m; const size_t m3=3*m; int i, j; kiss_fft_cpx * Fout_beg = Fout; for (i=0;i<N;i++) { Fout = Fout_beg + i*mm; tw3 = tw2 = tw1 = st->twiddles; for (j=0;j<m;j++) { C_MULC(scratch[0],Fout[m] , *tw1 ); C_MULC(scratch[1],Fout[m2] , *tw2 ); C_MULC(scratch[2],Fout[m3] , *tw3 ); C_SUB( scratch[5] , *Fout, scratch[1] ); C_ADDTO(*Fout, scratch[1]); C_ADD( scratch[3] , scratch[0] , scratch[2] ); C_SUB( scratch[4] , scratch[0] , scratch[2] ); C_SUB( Fout[m2], *Fout, scratch[3] ); tw1 += fstride; tw2 += fstride*2; tw3 += fstride*3; C_ADDTO( *Fout , scratch[3] ); Fout[m].r = scratch[5].r - scratch[4].i; Fout[m].i = scratch[5].i + scratch[4].r; Fout[m3].r = scratch[5].r + scratch[4].i; Fout[m3].i = scratch[5].i - scratch[4].r; ++Fout; } } }
static void kf_bfly2( kiss_fft_cpx * Fout, const size_t fstride, const kiss_fft_cfg st, int m, int N, int mm ) { kiss_fft_cpx * Fout2; kiss_fft_cpx * tw1; kiss_fft_cpx t; if (!st->inverse) { int i,j; kiss_fft_cpx * Fout_beg = Fout; for (i=0;i<N;i++) { Fout = Fout_beg + i*mm; Fout2 = Fout + m; tw1 = st->twiddles; for(j=0;j<m;j++) { /* Almost the same as the code path below, except that we divide the input by two (while keeping the best accuracy possible) */ ms_word32_t tr, ti; tr = SHR32(SUB32(MULT16_16(Fout2->r , tw1->r),MULT16_16(Fout2->i , tw1->i)), 1); ti = SHR32(ADD32(MULT16_16(Fout2->i , tw1->r),MULT16_16(Fout2->r , tw1->i)), 1); tw1 += fstride; Fout2->r = PSHR32(SUB32(SHL32(EXTEND32(Fout->r), 14), tr), 15); Fout2->i = PSHR32(SUB32(SHL32(EXTEND32(Fout->i), 14), ti), 15); Fout->r = PSHR32(ADD32(SHL32(EXTEND32(Fout->r), 14), tr), 15); Fout->i = PSHR32(ADD32(SHL32(EXTEND32(Fout->i), 14), ti), 15); ++Fout2; ++Fout; } } } else { int i,j; kiss_fft_cpx * Fout_beg = Fout; for (i=0;i<N;i++) { Fout = Fout_beg + i*mm; Fout2 = Fout + m; tw1 = st->twiddles; for(j=0;j<m;j++) { C_MUL (t, *Fout2 , *tw1); tw1 += fstride; C_SUB( *Fout2 , *Fout , t ); C_ADDTO( *Fout , t ); ++Fout2; ++Fout; } } } }
static void kf_bfly3( kiss_fft_cpx * Fout, const size_t fstride, const kiss_fft_state *st, int m, int N, int mm ) { int i; size_t k; const size_t m2 = 2*m; const kiss_twiddle_cpx *tw1,*tw2; kiss_fft_cpx scratch[5]; kiss_twiddle_cpx epi3; kiss_fft_cpx * Fout_beg = Fout; #ifdef FIXED_POINT /*epi3.r = -16384;*/ /* Unused */ epi3.i = -28378; #else epi3 = st->twiddles[fstride*m]; #endif for (i=0;i<N;i++) { Fout = Fout_beg + i*mm; tw1=tw2=st->twiddles; /* For non-custom modes, m is guaranteed to be a multiple of 4. */ k=m; do { C_MUL(scratch[1],Fout[m] , *tw1); C_MUL(scratch[2],Fout[m2] , *tw2); C_ADD(scratch[3],scratch[1],scratch[2]); C_SUB(scratch[0],scratch[1],scratch[2]); tw1 += fstride; tw2 += fstride*2; Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r)); Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i)); C_MULBYSCALAR( scratch[0] , epi3.i ); C_ADDTO(*Fout,scratch[3]); Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i); Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r); Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i); Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r); ++Fout; } while(--k); } }
static void kf_bfly3( kiss_fft_cpx * Fout, const size_t fstride, const kiss_fft_state *st, int m, int N, int mm ) { int i; size_t k; const size_t m2 = 2*m; const kiss_twiddle_cpx *tw1,*tw2; kiss_fft_cpx scratch[5]; kiss_twiddle_cpx epi3; kiss_fft_cpx * Fout_beg = Fout; epi3 = st->twiddles[fstride*m]; for (i=0;i<N;i++) { Fout = Fout_beg + i*mm; tw1=tw2=st->twiddles; k=m; do { C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); C_MUL(scratch[1],Fout[m] , *tw1); C_MUL(scratch[2],Fout[m2] , *tw2); C_ADD(scratch[3],scratch[1],scratch[2]); C_SUB(scratch[0],scratch[1],scratch[2]); tw1 += fstride; tw2 += fstride*2; Fout[m].r = Fout->r - HALF_OF(scratch[3].r); Fout[m].i = Fout->i - HALF_OF(scratch[3].i); C_MULBYSCALAR( scratch[0] , epi3.i ); C_ADDTO(*Fout,scratch[3]); Fout[m2].r = Fout[m].r + scratch[0].i; Fout[m2].i = Fout[m].i - scratch[0].r; Fout[m].r -= scratch[0].i; Fout[m].i += scratch[0].r; ++Fout; } while(--k); } }
static void kf_bfly3( kiss_fft_cpx * Fout, const size_t fstride, const kiss_fft_cfg st, size_t m ) { size_t k=m; const size_t m2 = 2*m; kiss_fft_cpx *tw1,*tw2; kiss_fft_cpx scratch[5]; kiss_fft_cpx epi3; epi3 = st->twiddles[fstride*m]; tw1=tw2=st->twiddles; do { if (!st->inverse) { C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); } C_MUL(scratch[1],Fout[m] , *tw1); C_MUL(scratch[2],Fout[m2] , *tw2); C_ADD(scratch[3],scratch[1],scratch[2]); C_SUB(scratch[0],scratch[1],scratch[2]); tw1 += fstride; tw2 += fstride*2; Fout[m].r = Fout->r - HALF_OF(scratch[3].r); Fout[m].i = Fout->i - HALF_OF(scratch[3].i); C_MULBYSCALAR( scratch[0] , epi3.i ); C_ADDTO(*Fout,scratch[3]); Fout[m2].r = Fout[m].r + scratch[0].i; Fout[m2].i = Fout[m].i - scratch[0].r; Fout[m].r -= scratch[0].i; Fout[m].i += scratch[0].r; ++Fout; } while(--k); }
static void kf_bfly2(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, int m) { kiss_fft_cpx *Fout2; kiss_fft_cpx *tw1 = st->twiddles; kiss_fft_cpx t; Fout2 = Fout + m; do { C_FIXDIV(*Fout, 2); C_FIXDIV(*Fout2, 2); C_MUL(t, *Fout2, *tw1); tw1 += fstride; C_SUB(*Fout2, *Fout, t); C_ADDTO(*Fout, t); ++Fout2; ++Fout; } while (--m); }
/* perform the butterfly for one stage of a mixed radix FFT */ static void kf_bfly_generic( kiss_fft_cpx * Fout, const size_t fstride, const kiss_fft_cfg st, int m, int p ) { int u,k,q1,q; kiss_fft_cpx * twiddles = st->twiddles; kiss_fft_cpx t; kiss_fft_cpx scratchbuf[17]; int Norig = st->nfft; /*CHECKBUF(scratchbuf,nscratchbuf,p);*/ if (p>17) ms_fatal("KissFFT: max radix supported is 17"); for ( u=0; u<m; ++u ) { k=u; for ( q1=0 ; q1<p ; ++q1 ) { scratchbuf[q1] = Fout[ k ]; if (!st->inverse) { C_FIXDIV(scratchbuf[q1],p); } k += m; } k=u; for ( q1=0 ; q1<p ; ++q1 ) { int twidx=0; Fout[ k ] = scratchbuf[0]; for (q=1;q<p;++q ) { twidx += fstride * k; if (twidx>=Norig) twidx-=Norig; C_MUL(t,scratchbuf[q] , twiddles[twidx] ); C_ADDTO( Fout[ k ] ,t); } k += m; } } }
/* perform the butterfly for one stage of a mixed radix FFT */ static void kf_bfly_generic( kiss_fft_cpx * Fout, const size_t fstride, const kiss_fft_cfg st, int m, int p ) { int u,k,q1,q; kiss_fft_cpx * twiddles = st->twiddles; kiss_fft_cpx t; int Norig = st->nfft; kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p); // see http://sourceforge.net/p/kissfft/bugs/9/ #pragma omp critical for ( u=0; u<m; ++u ) { k=u; for ( q1=0 ; q1<p ; ++q1 ) { scratch[q1] = Fout[ k ]; C_FIXDIV(scratch[q1],p); k += m; } k=u; for ( q1=0 ; q1<p ; ++q1 ) { int twidx=0; Fout[ k ] = scratch[0]; for (q=1;q<p;++q ) { twidx += fstride * k; if (twidx>=Norig) twidx-=Norig; C_MUL(t,scratch[q] , twiddles[twidx] ); C_ADDTO( Fout[ k ] ,t); } k += m; } } KISS_FFT_TMP_FREE(scratch); }
/* perform the butterfly for one stage of a mixed radix FFT */ static void kf_bfly_generic( kiss_fft_cpx * Fout, const size_t fstride, const kiss_fft_cfg st, int m, int p ) { int u,k,q1,q; kiss_fft_cpx * twiddles = st->twiddles; kiss_fft_cpx t; int Norig = st->nfft; kiss_fft_cpx *scratchbuf=(kiss_fft_cpx *)malloc( sizeof(kiss_fft_cpx) * p ); for ( u=0; u<m; ++u ) { k=u; for ( q1=0 ; q1<p ; ++q1 ) { scratchbuf[q1] = Fout[ k ]; C_FIXDIV(scratchbuf[q1],p); k += m; } k=u; for ( q1=0 ; q1<p ; ++q1 ) { int twidx=0; Fout[ k ] = scratchbuf[0]; for (q=1; q<p; ++q ) { twidx += fstride * k; if (twidx>=Norig) twidx-=Norig; C_MUL(t,scratchbuf[q] , twiddles[twidx] ); C_ADDTO( Fout[ k ] ,t); } k += m; } } free( scratchbuf ); }
static void kf_bfly4( kiss_fft_cpx * Fout, const size_t fstride, const kiss_fft_cfg st, const size_t m ) { kiss_fft_cpx *tw1,*tw2,*tw3; kiss_fft_cpx scratch[6]; size_t k=m; const size_t m2=2*m; const size_t m3=3*m; tw3 = tw2 = tw1 = st->twiddles; if (!st->inverse) { int i; kiss_fft_cpx *x=Fout; for (i=0; i<4*m; i++) { //C_FIXDIV(x[i],4); x[i].r = PSHR16(x[i].r,2); x[i].i = PSHR16(x[i].i,2); } } if (st->inverse) { do { C_MUL(scratch[0],Fout[m] , *tw1 ); C_MUL(scratch[1],Fout[m2] , *tw2 ); C_MUL(scratch[2],Fout[m3] , *tw3 ); C_SUB( scratch[5] , *Fout, scratch[1] ); C_ADDTO(*Fout, scratch[1]); C_ADD( scratch[3] , scratch[0] , scratch[2] ); C_SUB( scratch[4] , scratch[0] , scratch[2] ); C_SUB( Fout[m2], *Fout, scratch[3] ); tw1 += fstride; tw2 += fstride*2; tw3 += fstride*3; C_ADDTO( *Fout , scratch[3] ); Fout[m].r = scratch[5].r - scratch[4].i; Fout[m].i = scratch[5].i + scratch[4].r; Fout[m3].r = scratch[5].r + scratch[4].i; Fout[m3].i = scratch[5].i - scratch[4].r; ++Fout; } while(--k); } else { do { C_MUL(scratch[0],Fout[m] , *tw1 ); C_MUL(scratch[1],Fout[m2] , *tw2 ); C_MUL(scratch[2],Fout[m3] , *tw3 ); C_SUB( scratch[5] , *Fout, scratch[1] ); C_ADDTO(*Fout, scratch[1]); C_ADD( scratch[3] , scratch[0] , scratch[2] ); C_SUB( scratch[4] , scratch[0] , scratch[2] ); C_SUB( Fout[m2], *Fout, scratch[3] ); tw1 += fstride; tw2 += fstride*2; tw3 += fstride*3; C_ADDTO( *Fout , scratch[3] ); Fout[m].r = scratch[5].r + scratch[4].i; Fout[m].i = scratch[5].i - scratch[4].r; Fout[m3].r = scratch[5].r - scratch[4].i; Fout[m3].i = scratch[5].i + scratch[4].r; ++Fout; } while(--k); } }
static void kf_bfly4( kiss_fft_cpx * Fout, const size_t fstride, const kiss_fft_state *st, int m, int N, int mm ) { int i; if (m==1) { /* Degenerate case where all the twiddles are 1. */ for (i=0;i<N;i++) { kiss_fft_cpx scratch0, scratch1; C_SUB( scratch0 , *Fout, Fout[2] ); C_ADDTO(*Fout, Fout[2]); C_ADD( scratch1 , Fout[1] , Fout[3] ); C_SUB( Fout[2], *Fout, scratch1 ); C_ADDTO( *Fout , scratch1 ); C_SUB( scratch1 , Fout[1] , Fout[3] ); Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i); Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r); Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i); Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r); Fout+=4; } } else { int j; kiss_fft_cpx scratch[6]; const kiss_twiddle_cpx *tw1,*tw2,*tw3; const int m2=2*m; const int m3=3*m; kiss_fft_cpx * Fout_beg = Fout; for (i=0;i<N;i++) { Fout = Fout_beg + i*mm; tw3 = tw2 = tw1 = st->twiddles; /* m is guaranteed to be a multiple of 4. */ for (j=0;j<m;j++) { C_MUL(scratch[0],Fout[m] , *tw1 ); C_MUL(scratch[1],Fout[m2] , *tw2 ); C_MUL(scratch[2],Fout[m3] , *tw3 ); C_SUB( scratch[5] , *Fout, scratch[1] ); C_ADDTO(*Fout, scratch[1]); C_ADD( scratch[3] , scratch[0] , scratch[2] ); C_SUB( scratch[4] , scratch[0] , scratch[2] ); C_SUB( Fout[m2], *Fout, scratch[3] ); tw1 += fstride; tw2 += fstride*2; tw3 += fstride*3; C_ADDTO( *Fout , scratch[3] ); Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i); Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r); Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i); Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r); ++Fout; } } } }