Example #1
0
main()
{
    /*
       Example of Apple Altivec coded binary radix FFT
       using intrinsics from Petersen and Arbenz "Intro.
       to Parallel Computing," Section 3.6

       This is an expanded version of a generic work-space
       FFT: steps are in-line. cfft2(n,x,y,w,sign) takes complex
       n-array "x" (Fortran real,aimag,real,aimag,... order)
       and writes its DFT in "y". Both input "x" and the
       original contents of "y" are destroyed. Initialization
       for array "w" (size n/2 complex of twiddle factors
       (exp(twopi*i*k/n), for k=0..n/2-1)) is computed once
       by cffti(n,w).

                          WPP, SAM. Math. ETHZ, 1 June, 2002
    */

    int first,i,icase,it,ln2,n;
    int nits=1000000;
    static float seed = 331.0;
    float error,fnm1,sign,z0,z1,ggl();
    float *x,*y,*z,*w;
    double t1,mflops;
    /* allocate storage for x,y,z,w on 4-word bndr. */
    x = (float *) malloc(8*N);
    y = (float *) malloc(8*N);
    z = (float *) malloc(8*N);
    w = (float *) malloc(4*N);
    n     = 2;
    for(ln2=1; ln2<21; ln2++) {
        first = 1;
        for(icase=0; icase<2; icase++) {
            if(first) {
                for(i=0; i<2*n; i+=2) {
                    z0 = ggl(&seed);     /* real part of array */
                    z1 = ggl(&seed);     /* imaginary part of array */
                    x[i] = z0;
                    z[i] = z0;           /* copy of initial real data */
                    x[i+1] = z1;
                    z[i+1] = z1;         /* copy of initial imag. data */
                }
            } else {
                for(i=0; i<2*n; i+=2) {
                    z0 = 0;              /* real part of array */
                    z1 = 0;              /* imaginary part of array */
                    x[i] = z0;
                    z[i] = z0;           /* copy of initial real data */
                    x[i+1] = z1;
                    z[i+1] = z1;         /* copy of initial imag. data */
                }
            }
            /* initialize sine/cosine tables */
            cffti(n,w);
            /* transform forward, back */
            if(first) {
                sign = 1.0;
                cfft2(n,x,y,w,sign);
                sign = -1.0;
                cfft2(n,y,x,w,sign);
                /* results should be same as initial multiplied by n */
                fnm1 = 1.0/((float) n);
                error = 0.0;
                for(i=0; i<2*n; i+=2) {
                    error += (z[i] - fnm1*x[i])*(z[i] - fnm1*x[i]) +
                             (z[i+1] - fnm1*x[i+1])*(z[i+1] - fnm1*x[i+1]);
                }
                error = sqrt(fnm1*error);
                printf(" for n=%d, fwd/bck error=%e\n",n,error);
                first = 0;
            } else {
                for(it=0; it<nits; it++) {
                    sign = +1.0;
                    cfft2(n,x,y,w,sign);
                    sign = -1.0;
                    cfft2(n,y,x,w,sign);
                }
            }
        }
        if((ln2%4)==0) nits /= 10;
        n *= 2;
    }
    return 0;
}
Example #2
0
int main()
{
/* 
   SSE version of cfft2 - uses INTEL intrinsics
       W. Petersen, SAM. Math. ETHZ 2 May, 2002 
*/
   int first,i,icase,it,n;
   float seed,error,fnm1,sign,z0,z1,ggl();
   float t1,ln2,mflops;
   void cffti(),cfft2();

   first = 1;
   seed  = 331.0;
   for(icase=0;icase<2;icase++){
   if(first){
      for(i=0;i<2*N;i+=2){
         z0 = ggl(&seed);     /* real part of array */
         z1 = ggl(&seed);     /* imaginary part of array */
         x[i] = z0;
         z[i] = z0;           /* copy of initial real data */
         x[i+1] = z1;
         z[i+1] = z1;         /* copy of initial imag. data */
      }
   } else {
      for(i=0;i<2*N;i+=2){
         z0 = 0;              /* real part of array */
         z1 = 0;              /* imaginary part of array */
         x[i] = z0;
         z[i] = z0;           /* copy of initial real data */
         x[i+1] = z1;
         z[i+1] = z1;         /* copy of initial imag. data */
      }
   }
/* initialize sine/cosine tables */
   n = N;
   cffti(n,w);
/* transform forward, back */
   if(first){
      sign = 1.0;
      cfft2(n,x,y,w,sign);
      sign = -1.0;
      cfft2(n,y,x,w,sign);
/* results should be same as initial multiplied by N */
      fnm1 = 1.0/((float) n);
      error = 0.0;
      for(i=0;i<2*N;i+=2){
         error += (z[i] - fnm1*x[i])*(z[i] - fnm1*x[i]) +
                  (z[i+1] - fnm1*x[i+1])*(z[i+1] - fnm1*x[i+1]);
      }
      error = sqrt(fnm1*error);
      printf(" for n=%d, fwd/bck error=%e\n",N,error);
      first = 0;
   } else {
      unsigned j = 0;
      for(it=0;it<20000;it++){
         sign = +1.0;
         cfft2(n,x,y,w,sign);
         sign = -1.0;
         cfft2(n,y,x,w,sign);
      }
      printf(" for n=%d\n",n);
      for (i = 0; i<N; ++i) {
        printf("%g  ", w[i]);
        j++;
        if (j == 4) {
          printf("\n");
          j = 0;
        }
      }
   }
   }
   return 0;
}
Example #3
0
int main ( void )

/******************************************************************************/
/* 
  Purpose:

    MAIN is the main program for FFT_SERIAL.

  Discussion:

    The "complex" vector A is actually stored as a double vector B.

    The "complex" vector entry A[I] is stored as:

      B[I*2+0], the real part,
      B[I*2+1], the imaginary part.

  Modified:

    23 March 2009

  Author:

    Original C version by Wesley Petersen.
    This C version by John Burkardt.

  Reference:

    Wesley Petersen, Peter Arbenz, 
    Introduction to Parallel Computing - A practical guide with examples in C,
    Oxford University Press,
    ISBN: 0-19-851576-6,
    LC: QA76.58.P47.
*/
{
  double ctime;
  double ctime1;
  double ctime2;
  double error;
  int first;
  double flops;
  double fnm1;
  int i;
  int icase;
  int it;
  int ln2;
  double mflops;
  int n;
  int nits = 10000;
  static double seed;
  double sgn;
  double *w;
  double *x;
  double *y;
  double *z;
  double z0;
  double z1;

  timestamp ( );
  printf ( "\n" );
  printf ( "FFT_SERIAL\n" );
  printf ( "  C version\n" );
  printf ( "\n" );
  printf ( "  Demonstrate an implementation of the Fast Fourier Transform\n" );
  printf ( "  of a complex data vector.\n" );
/*
  Prepare for tests.
*/
  printf ( "\n" );
  printf ( "  Accuracy check:\n" );
  printf ( "\n" );
  printf ( "    FFT ( FFT ( X(1:N) ) ) == N * X(1:N)\n" );
  printf ( "\n" );
  printf ( "             N      NITS    Error         Time          Time/Call     MFLOPS\n" );
  printf ( "\n" );

  seed  = 331.0;
  n = 1;
/*
  LN2 is the log base 2 of N.  Each increase of LN2 doubles N.
*/
  for ( ln2 = 1; ln2 <= 20; ln2++ )
  {
    n = 2 * n;
/*
  Allocate storage for the complex arrays W, X, Y, Z.  

  We handle the complex arithmetic,
  and store a complex number as a pair of doubles, a complex vector as a doubly
  dimensioned array whose second dimension is 2. 
*/
    w = ( double * ) malloc (     n * sizeof ( double ) );
    x = ( double * ) malloc ( 2 * n * sizeof ( double ) );
    y = ( double * ) malloc ( 2 * n * sizeof ( double ) );
    z = ( double * ) malloc ( 2 * n * sizeof ( double ) );

    first = 1;

    for ( icase = 0; icase < 2; icase++ )
    {
      if ( first )
      {
        for ( i = 0; i < 2 * n; i = i + 2 )
        {
          z0 = ggl ( &seed );
          z1 = ggl ( &seed );
          x[i] = z0;
          z[i] = z0;
          x[i+1] = z1;
          z[i+1] = z1;
        }
      } 
      else
      {
        for ( i = 0; i < 2 * n; i = i + 2 )
        {
          z0 = 0.0;              /* real part of array */
          z1 = 0.0;              /* imaginary part of array */
          x[i] = z0;
          z[i] = z0;           /* copy of initial real data */
          x[i+1] = z1;
          z[i+1] = z1;         /* copy of initial imag. data */
        }
      }
/* 
  Initialize the sine and cosine tables.
*/
      cffti ( n, w );
/* 
  Transform forward, back 
*/
      if ( first )
      {
        sgn = + 1.0;
        cfft2 ( n, x, y, w, sgn );
        sgn = - 1.0;
        cfft2 ( n, y, x, w, sgn );
/* 
  Results should be same as the initial data multiplied by N.
*/
        fnm1 = 1.0 / ( double ) n;
        error = 0.0;
        for ( i = 0; i < 2 * n; i = i + 2 )
        {
          error = error 
          + pow ( z[i]   - fnm1 * x[i], 2 )
          + pow ( z[i+1] - fnm1 * x[i+1], 2 );
        }
        error = sqrt ( fnm1 * error );
        printf ( "  %12d  %8d  %12e", n, nits, error );
        first = 0;
      }
      else
      {
        ctime1 = cpu_time ( );
        for ( it = 0; it < nits; it++ )
        {
          sgn = + 1.0;
          cfft2 ( n, x, y, w, sgn );
          sgn = - 1.0;
          cfft2 ( n, y, x, w, sgn );
        }
        ctime2 = cpu_time ( );
        ctime = ctime2 - ctime1;

        flops = 2.0 * ( double ) nits * ( 5.0 * ( double ) n * ( double ) ln2 );

        mflops = flops / 1.0E+06 / ctime;

        printf ( "  %12e  %12e  %12f\n", ctime, ctime / ( double ) ( 2 * nits ), mflops );
      }
    }
    if ( ( ln2 % 4 ) == 0 ) 
    {
      nits = nits / 10;
    }
    if ( nits < 1 ) 
    {
      nits = 1;
    }
    free ( w );
    free ( x );
    free ( y );
    free ( z );
  }
  printf ( "\n" );
  printf ( "FFT_SERIAL:\n" );
  printf ( "  Normal end of execution.\n" );
  printf ( "\n" );
  timestamp ( );

  return 0;
}