Exemple #1
0
void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias)
{
    int i, k;
    sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2;
    const sample_t * window = a52_imdct_window;
    complex_t buf[128];
	
    for (i = 0; i < 128; i++) {
	k = fftorder[i];
	t_r = pre1[i].real;
	t_i = pre1[i].imag;
	BUTTERFLY_0 (buf[i].real, buf[i].imag, t_r, t_i, data[k], data[255-k]);
    }

    ifft128 (buf);

    /* Post IFFT complex multiply plus IFFT complex conjugate*/
    /* Window and convert to real valued signal */
    for (i = 0; i < 64; i++) {
	/* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */
	t_r = post1[i].real;
	t_i = post1[i].imag;
	BUTTERFLY_0 (a_r, a_i, t_i, t_r, buf[i].imag, buf[i].real);
	BUTTERFLY_0 (b_r, b_i, t_r, t_i, buf[127-i].imag, buf[127-i].real);

	w_1 = window[2*i];
	w_2 = window[255-2*i];
	BUTTERFLY_B (data[255-2*i], data[2*i], w_2, w_1, a_r, delay[2*i]);
	delay[2*i] = a_i;

	w_1 = window[2*i+1];
	w_2 = window[254-2*i];
	BUTTERFLY_B (data[2*i+1], data[254-2*i], w_1, w_2, b_r, delay[2*i+1]);
	delay[2*i+1] = b_i;
    }
}
Exemple #2
0
void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias)
{
    int i, k;
    sample_t t_r, t_i, a_r, a_i, b_r, b_i, c_r, c_i, d_r, d_i, w_1, w_2;
    const sample_t * window = a52_imdct_window;
    complex_t buf1[64], buf2[64];

    /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
    for (i = 0; i < 64; i++) {
	k = fftorder[i];
	t_r = pre2[i].real;
	t_i = pre2[i].imag;
	BUTTERFLY_0 (buf1[i].real, buf1[i].imag, t_r, t_i, data[k], data[254-k]);
	BUTTERFLY_0 (buf2[i].real, buf2[i].imag, t_r, t_i, data[k+1], data[255-k]);
    }

    ifft64 (buf1);
    ifft64 (buf2);

    /* Post IFFT complex multiply */
    /* Window and convert to real valued signal */
    for (i = 0; i < 32; i++) {
	/* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */ 
	t_r = post2[i].real;
	t_i = post2[i].imag;
	BUTTERFLY_0 (a_r, a_i, t_i, t_r, buf1[i].imag, buf1[i].real);
	BUTTERFLY_0 (b_r, b_i, t_r, t_i, buf1[63-i].imag, buf1[63-i].real);
	BUTTERFLY_0 (c_r, c_i, t_i, t_r, buf2[i].imag, buf2[i].real);
	BUTTERFLY_0 (d_r, d_i, t_r, t_i, buf2[63-i].imag, buf2[63-i].real);

	w_1 = window[2*i];
	w_2 = window[255-2*i];
	BUTTERFLY_B (data[255-2*i], data[2*i], w_2, w_1, a_r, delay[2*i]);
	delay[2*i] = c_i;

	w_1 = window[128+2*i];
	w_2 = window[127-2*i];
	BUTTERFLY_B (data[128+2*i], data[127-2*i], w_1, w_2, a_i, delay[127-2*i]);
	delay[127-2*i] = c_r;

	w_1 = window[2*i+1];
	w_2 = window[254-2*i];
	BUTTERFLY_B (data[254-2*i], data[2*i+1], w_2, w_1, b_i, delay[2*i+1]);
	delay[2*i+1] = d_r;

	w_1 = window[129+2*i];
	w_2 = window[126-2*i];
	BUTTERFLY_B (data[129+2*i], data[126-2*i], w_1, w_2, b_r, delay[126-2*i]);
	delay[126-2*i] = d_i;
    }
}
Exemple #3
0
void fft64_2way( void *a )
{
  __m256i* const A = a;
  register __m256i X0, X1, X2, X3, X4, X5, X6, X7;

#define X(i) X##i

  X0 = A[0];
  X1 = A[1];
  X2 = A[2];
  X3 = A[3];
  X4 = A[4];
  X5 = A[5];
  X6 = A[6];
  X7 = A[7];

#define DO_REDUCE(i)   X(i) = REDUCE( X(i) )

   // Begin with 8 parallels DIF FFT_8
   //
   // FFT_8 using w=4 as 8th root of unity
   //  Unrolled decimation in frequency (DIF) radix-2 NTT.
   //  Output data is in revbin_permuted order.

  static const int w[] = {0, 2, 4, 6};
//   __m256i *Twiddle = (__m256i*)FFT64_Twiddle;


#define BUTTERFLY_0( i,j ) \
do { \
    __m256i v = X(j); \
    X(j) = _mm256_add_epi16( X(i), X(j) ); \
    X(i) = _mm256_sub_epi16( X(i), v ); \
} while(0)

#define BUTTERFLY_N( i,j,n ) \
do { \
    __m256i v = X(j); \
    X(j) = _mm256_add_epi16( X(i), X(j) ); \
    X(i) = _mm256_slli_epi16( _mm256_sub_epi16( X(i), v ), w[n] ); \
} while(0)

  BUTTERFLY_0( 0, 4 );
  BUTTERFLY_N( 1, 5, 1 );
  BUTTERFLY_N( 2, 6, 2 );
  BUTTERFLY_N( 3, 7, 3 );

  DO_REDUCE( 2 );
  DO_REDUCE( 3 );

  BUTTERFLY_0( 0, 2 );
  BUTTERFLY_0( 4, 6 );
  BUTTERFLY_N( 1, 3, 2 );
  BUTTERFLY_N( 5, 7, 2 );

  DO_REDUCE( 1 );

  BUTTERFLY_0( 0, 1 );
  BUTTERFLY_0( 2, 3 );
  BUTTERFLY_0( 4, 5 );
  BUTTERFLY_0( 6, 7 );

  /* We don't need to reduce X(7) */
  DO_REDUCE_FULL_S( 0 );
  DO_REDUCE_FULL_S( 1 );
  DO_REDUCE_FULL_S( 2 );
  DO_REDUCE_FULL_S( 3 );
  DO_REDUCE_FULL_S( 4 );
  DO_REDUCE_FULL_S( 5 );
  DO_REDUCE_FULL_S( 6 );

#undef BUTTERFLY_0
#undef BUTTERFLY_N

  // Multiply by twiddle factors
  X(6) = _mm256_mullo_epi16( X(6), FFT64_Twiddle[0].m256i );
  X(5) = _mm256_mullo_epi16( X(5), FFT64_Twiddle[1].m256i );
  X(4) = _mm256_mullo_epi16( X(4), FFT64_Twiddle[2].m256i );
  X(3) = _mm256_mullo_epi16( X(3), FFT64_Twiddle[3].m256i );
  X(2) = _mm256_mullo_epi16( X(2), FFT64_Twiddle[4].m256i );
  X(1) = _mm256_mullo_epi16( X(1), FFT64_Twiddle[5].m256i );
  X(0) = _mm256_mullo_epi16( X(0), FFT64_Twiddle[6].m256i );

  // Transpose the FFT state with a revbin order permutation
  // on the rows and the column.
  // This will make the full FFT_64 in order.
#define INTERLEAVE(i,j) \
  do { \
    __m256i t1= X(i); \
    __m256i t2= X(j); \
    X(i) = _mm256_unpacklo_epi16( t1, t2 ); \
    X(j) = _mm256_unpackhi_epi16( t1, t2 ); \
  } while(0)

  INTERLEAVE( 1, 0 );
  INTERLEAVE( 3, 2 );
  INTERLEAVE( 5, 4 );
  INTERLEAVE( 7, 6 );

  INTERLEAVE( 2, 0 );
  INTERLEAVE( 3, 1 );
  INTERLEAVE( 6, 4 );
  INTERLEAVE( 7, 5 );

  INTERLEAVE( 4, 0 );
  INTERLEAVE( 5, 1 );
  INTERLEAVE( 6, 2 );
  INTERLEAVE( 7, 3 );

#undef INTERLEAVE

   //Finish with 8 parallels DIT FFT_8
   //FFT_8 using w=4 as 8th root of unity
   // Unrolled decimation in time (DIT) radix-2 NTT.
   // Input data is in revbin_permuted order.

#define BUTTERFLY_0( i,j ) \
do { \
   __m256i u = X(j); \
   X(j) = _mm256_sub_epi16( X(j), X(i) ); \
   X(i) = _mm256_add_epi16( u, X(i) ); \
} while(0)


#define BUTTERFLY_N( i,j,n ) \
do { \
   __m256i u = X(j); \
   X(i) = _mm256_slli_epi16( X(i), w[n] ); \
   X(j) = _mm256_sub_epi16( X(j), X(i) ); \
   X(i) = _mm256_add_epi16( u, X(i) ); \
} while(0)

  DO_REDUCE( 0 );
  DO_REDUCE( 1 );
  DO_REDUCE( 2 );
  DO_REDUCE( 3 );
  DO_REDUCE( 4 );
  DO_REDUCE( 5 );
  DO_REDUCE( 6 );
  DO_REDUCE( 7 );

  BUTTERFLY_0( 0, 1 );
  BUTTERFLY_0( 2, 3 );
  BUTTERFLY_0( 4, 5 );
  BUTTERFLY_0( 6, 7 );

  BUTTERFLY_0( 0, 2 );
  BUTTERFLY_0( 4, 6 );
  BUTTERFLY_N( 1, 3, 2 );
  BUTTERFLY_N( 5, 7, 2 );

  DO_REDUCE( 3 );

  BUTTERFLY_0( 0, 4 );
  BUTTERFLY_N( 1, 5, 1 );
  BUTTERFLY_N( 2, 6, 2 );
  BUTTERFLY_N( 3, 7, 3 );

  DO_REDUCE_FULL_S( 0 );
  DO_REDUCE_FULL_S( 1 );
  DO_REDUCE_FULL_S( 2 );
  DO_REDUCE_FULL_S( 3 );
  DO_REDUCE_FULL_S( 4 );
  DO_REDUCE_FULL_S( 5 );
  DO_REDUCE_FULL_S( 6 );
  DO_REDUCE_FULL_S( 7 );

#undef BUTTERFLY

  A[0] = X0;
  A[1] = X1;
  A[2] = X2;
  A[3] = X3;
  A[4] = X4;
  A[5] = X5;
  A[6] = X6;
  A[7] = X7;

#undef X
}