main() { /* Example of Apple Altivec coded binary radix FFT using intrinsics from Petersen and Arbenz "Intro. to Parallel Computing," Section 3.6 This is an expanded version of a generic work-space FFT: steps are in-line. cfft2(n,x,y,w,sign) takes complex n-array "x" (Fortran real,aimag,real,aimag,... order) and writes its DFT in "y". Both input "x" and the original contents of "y" are destroyed. Initialization for array "w" (size n/2 complex of twiddle factors (exp(twopi*i*k/n), for k=0..n/2-1)) is computed once by cffti(n,w). WPP, SAM. Math. ETHZ, 1 June, 2002 */ int first,i,icase,it,ln2,n; int nits=1000000; static float seed = 331.0; float error,fnm1,sign,z0,z1,ggl(); float *x,*y,*z,*w; double t1,mflops; /* allocate storage for x,y,z,w on 4-word bndr. */ x = (float *) malloc(8*N); y = (float *) malloc(8*N); z = (float *) malloc(8*N); w = (float *) malloc(4*N); n = 2; for(ln2=1; ln2<21; ln2++) { first = 1; for(icase=0; icase<2; icase++) { if(first) { for(i=0; i<2*n; i+=2) { z0 = ggl(&seed); /* real part of array */ z1 = ggl(&seed); /* imaginary part of array */ x[i] = z0; z[i] = z0; /* copy of initial real data */ x[i+1] = z1; z[i+1] = z1; /* copy of initial imag. data */ } } else { for(i=0; i<2*n; i+=2) { z0 = 0; /* real part of array */ z1 = 0; /* imaginary part of array */ x[i] = z0; z[i] = z0; /* copy of initial real data */ x[i+1] = z1; z[i+1] = z1; /* copy of initial imag. data */ } } /* initialize sine/cosine tables */ cffti(n,w); /* transform forward, back */ if(first) { sign = 1.0; cfft2(n,x,y,w,sign); sign = -1.0; cfft2(n,y,x,w,sign); /* results should be same as initial multiplied by n */ fnm1 = 1.0/((float) n); error = 0.0; for(i=0; i<2*n; i+=2) { error += (z[i] - fnm1*x[i])*(z[i] - fnm1*x[i]) + (z[i+1] - fnm1*x[i+1])*(z[i+1] - fnm1*x[i+1]); } error = sqrt(fnm1*error); printf(" for n=%d, fwd/bck error=%e\n",n,error); first = 0; } else { for(it=0; it<nits; it++) { sign = +1.0; cfft2(n,x,y,w,sign); sign = -1.0; cfft2(n,y,x,w,sign); } } } if((ln2%4)==0) nits /= 10; n *= 2; } return 0; }
int main() { /* SSE version of cfft2 - uses INTEL intrinsics W. Petersen, SAM. Math. ETHZ 2 May, 2002 */ int first,i,icase,it,n; float seed,error,fnm1,sign,z0,z1,ggl(); float t1,ln2,mflops; void cffti(),cfft2(); first = 1; seed = 331.0; for(icase=0;icase<2;icase++){ if(first){ for(i=0;i<2*N;i+=2){ z0 = ggl(&seed); /* real part of array */ z1 = ggl(&seed); /* imaginary part of array */ x[i] = z0; z[i] = z0; /* copy of initial real data */ x[i+1] = z1; z[i+1] = z1; /* copy of initial imag. data */ } } else { for(i=0;i<2*N;i+=2){ z0 = 0; /* real part of array */ z1 = 0; /* imaginary part of array */ x[i] = z0; z[i] = z0; /* copy of initial real data */ x[i+1] = z1; z[i+1] = z1; /* copy of initial imag. data */ } } /* initialize sine/cosine tables */ n = N; cffti(n,w); /* transform forward, back */ if(first){ sign = 1.0; cfft2(n,x,y,w,sign); sign = -1.0; cfft2(n,y,x,w,sign); /* results should be same as initial multiplied by N */ fnm1 = 1.0/((float) n); error = 0.0; for(i=0;i<2*N;i+=2){ error += (z[i] - fnm1*x[i])*(z[i] - fnm1*x[i]) + (z[i+1] - fnm1*x[i+1])*(z[i+1] - fnm1*x[i+1]); } error = sqrt(fnm1*error); printf(" for n=%d, fwd/bck error=%e\n",N,error); first = 0; } else { unsigned j = 0; for(it=0;it<20000;it++){ sign = +1.0; cfft2(n,x,y,w,sign); sign = -1.0; cfft2(n,y,x,w,sign); } printf(" for n=%d\n",n); for (i = 0; i<N; ++i) { printf("%g ", w[i]); j++; if (j == 4) { printf("\n"); j = 0; } } } } return 0; }
int main ( void ) /******************************************************************************/ /* Purpose: MAIN is the main program for FFT_SERIAL. Discussion: The "complex" vector A is actually stored as a double vector B. The "complex" vector entry A[I] is stored as: B[I*2+0], the real part, B[I*2+1], the imaginary part. Modified: 23 March 2009 Author: Original C version by Wesley Petersen. This C version by John Burkardt. Reference: Wesley Petersen, Peter Arbenz, Introduction to Parallel Computing - A practical guide with examples in C, Oxford University Press, ISBN: 0-19-851576-6, LC: QA76.58.P47. */ { double ctime; double ctime1; double ctime2; double error; int first; double flops; double fnm1; int i; int icase; int it; int ln2; double mflops; int n; int nits = 10000; static double seed; double sgn; double *w; double *x; double *y; double *z; double z0; double z1; timestamp ( ); printf ( "\n" ); printf ( "FFT_SERIAL\n" ); printf ( " C version\n" ); printf ( "\n" ); printf ( " Demonstrate an implementation of the Fast Fourier Transform\n" ); printf ( " of a complex data vector.\n" ); /* Prepare for tests. */ printf ( "\n" ); printf ( " Accuracy check:\n" ); printf ( "\n" ); printf ( " FFT ( FFT ( X(1:N) ) ) == N * X(1:N)\n" ); printf ( "\n" ); printf ( " N NITS Error Time Time/Call MFLOPS\n" ); printf ( "\n" ); seed = 331.0; n = 1; /* LN2 is the log base 2 of N. Each increase of LN2 doubles N. */ for ( ln2 = 1; ln2 <= 20; ln2++ ) { n = 2 * n; /* Allocate storage for the complex arrays W, X, Y, Z. We handle the complex arithmetic, and store a complex number as a pair of doubles, a complex vector as a doubly dimensioned array whose second dimension is 2. */ w = ( double * ) malloc ( n * sizeof ( double ) ); x = ( double * ) malloc ( 2 * n * sizeof ( double ) ); y = ( double * ) malloc ( 2 * n * sizeof ( double ) ); z = ( double * ) malloc ( 2 * n * sizeof ( double ) ); first = 1; for ( icase = 0; icase < 2; icase++ ) { if ( first ) { for ( i = 0; i < 2 * n; i = i + 2 ) { z0 = ggl ( &seed ); z1 = ggl ( &seed ); x[i] = z0; z[i] = z0; x[i+1] = z1; z[i+1] = z1; } } else { for ( i = 0; i < 2 * n; i = i + 2 ) { z0 = 0.0; /* real part of array */ z1 = 0.0; /* imaginary part of array */ x[i] = z0; z[i] = z0; /* copy of initial real data */ x[i+1] = z1; z[i+1] = z1; /* copy of initial imag. data */ } } /* Initialize the sine and cosine tables. */ cffti ( n, w ); /* Transform forward, back */ if ( first ) { sgn = + 1.0; cfft2 ( n, x, y, w, sgn ); sgn = - 1.0; cfft2 ( n, y, x, w, sgn ); /* Results should be same as the initial data multiplied by N. */ fnm1 = 1.0 / ( double ) n; error = 0.0; for ( i = 0; i < 2 * n; i = i + 2 ) { error = error + pow ( z[i] - fnm1 * x[i], 2 ) + pow ( z[i+1] - fnm1 * x[i+1], 2 ); } error = sqrt ( fnm1 * error ); printf ( " %12d %8d %12e", n, nits, error ); first = 0; } else { ctime1 = cpu_time ( ); for ( it = 0; it < nits; it++ ) { sgn = + 1.0; cfft2 ( n, x, y, w, sgn ); sgn = - 1.0; cfft2 ( n, y, x, w, sgn ); } ctime2 = cpu_time ( ); ctime = ctime2 - ctime1; flops = 2.0 * ( double ) nits * ( 5.0 * ( double ) n * ( double ) ln2 ); mflops = flops / 1.0E+06 / ctime; printf ( " %12e %12e %12f\n", ctime, ctime / ( double ) ( 2 * nits ), mflops ); } } if ( ( ln2 % 4 ) == 0 ) { nits = nits / 10; } if ( nits < 1 ) { nits = 1; } free ( w ); free ( x ); free ( y ); free ( z ); } printf ( "\n" ); printf ( "FFT_SERIAL:\n" ); printf ( " Normal end of execution.\n" ); printf ( "\n" ); timestamp ( ); return 0; }