mdct_info *faad_mdct_init(uint16_t N) { mdct_info *mdct = (mdct_info*)faad_malloc(sizeof(mdct_info)); assert(N % 8 == 0); mdct->N = N; /* NOTE: For "small framelengths" in FIXED_POINT the coefficients need to be * scaled by sqrt("(nearest power of 2) > N" / N) */ /* RE(mdct->sincos[k]) = scale*(real_t)(cos(2.0*M_PI*(k+1./8.) / (real_t)N)); * IM(mdct->sincos[k]) = scale*(real_t)(sin(2.0*M_PI*(k+1./8.) / (real_t)N)); */ /* scale is 1 for fixed point, sqrt(N) for floating point */ switch (N) { case 2048: mdct->sincos = (complex_t*)mdct_tab_2048; break; case 256: mdct->sincos = (complex_t*)mdct_tab_256; break; #ifdef LD_DEC case 1024: mdct->sincos = (complex_t*)mdct_tab_1024; break; #endif #ifdef ALLOW_SMALL_FRAMELENGTH case 1920: mdct->sincos = (complex_t*)mdct_tab_1920; break; case 240: mdct->sincos = (complex_t*)mdct_tab_240; break; #ifdef LD_DEC case 960: mdct->sincos = (complex_t*)mdct_tab_960; break; #endif #endif #ifdef SSR_DEC case 512: mdct->sincos = (complex_t*)mdct_tab_512; break; case 64: mdct->sincos = (complex_t*)mdct_tab_64; break; #endif } /* initialise fft */ mdct->cfft = cffti(N/4); #ifdef PROFILE mdct->cycles = 0; mdct->fft_cycles = 0; #endif return mdct; }
mdct_info *faad_mdct_init(uint16_t N) { uint16_t k; #ifdef FIXED_POINT uint16_t N_idx; real_t cangle, sangle, c, s, cold; #endif real_t scale; mdct_info *mdct = (mdct_info*)faad_malloc(sizeof(mdct_info)); assert(N % 8 == 0); mdct->N = N; mdct->sincos = (complex_t*)faad_malloc(N/4*sizeof(complex_t)); #ifdef FIXED_POINT N_idx = map_N_to_idx(N); scale = const_tab[N_idx][0]; cangle = const_tab[N_idx][1]; sangle = const_tab[N_idx][2]; c = const_tab[N_idx][3]; s = const_tab[N_idx][4]; #else scale = (real_t)sqrt(2.0 / (real_t)N); #endif /* (co)sine table build using recurrence relations */ /* this can also be done using static table lookup or */ /* some form of interpolation */ for (k = 0; k < N/4; k++) { #ifdef FIXED_POINT RE(mdct->sincos[k]) = c; //MUL_C_C(c,scale); IM(mdct->sincos[k]) = s; //MUL_C_C(s,scale); cold = c; c = MUL_F(c,cangle) - MUL_F(s,sangle); s = MUL_F(s,cangle) + MUL_F(cold,sangle); #else /* no recurrence, just sines */ RE(mdct->sincos[k]) = scale*(real_t)(cos(2.0*M_PI*(k+1./8.) / (real_t)N)); IM(mdct->sincos[k]) = scale*(real_t)(sin(2.0*M_PI*(k+1./8.) / (real_t)N)); #endif } /* initialise fft */ mdct->cfft = cffti(N/4); #ifdef PROFILE mdct->cycles = 0; mdct->fft_cycles = 0; #endif return mdct; }
complex_plan make_complex_plan (int length) { complex_plan plan = (complex_plan) malloc(sizeof(complex_plan_i)); int pfsum = prime_factor_sum(length); double comp1 = length*pfsum; double comp2 = 2*3*length*log(3.*length); plan->length=length; plan->bluestein = (comp2<comp1); if (plan->bluestein) bluestein_i (length,&(plan->work)); else { plan->work=(double *)malloc((4*length+15)*sizeof(double)); cffti(length, plan->work); } return plan; }
complex_plan make_complex_plan (size_t length) { complex_plan plan = RALLOC(complex_plan_i,1); size_t pfsum = prime_factor_sum(length); double comp1 = length*pfsum; double comp2 = 2*3*length*log(3.*length); comp2*=3.; /* fudge factor that appears to give good overall performance */ plan->length=length; plan->bluestein = (comp2<comp1); if (plan->bluestein) bluestein_i (length,&(plan->work)); else { plan->work=RALLOC(double,4*length+15); cffti(length, plan->work); } return plan; }
void bluestein_i (size_t n, double **tstorage, size_t *worksize) { static const double pi=3.14159265358979323846; size_t n2=good_size(n*2-1); size_t m, coeff; double angle, xn2; double *bk, *bkf, *work; double pibyn=pi/n; *worksize=2+2*n+8*n2+16; *tstorage = RALLOC(double,2+2*n+8*n2+16); ((size_t *)(*tstorage))[0]=n2; bk = *tstorage+2; bkf = *tstorage+2+2*n; work= *tstorage+2+2*(n+n2); /* initialize b_k */ bk[0] = 1; bk[1] = 0; coeff=0; for (m=1; m<n; ++m) { coeff+=2*m-1; if (coeff>=2*n) coeff-=2*n; angle = pibyn*coeff; bk[2*m] = cos(angle); bk[2*m+1] = sin(angle); } /* initialize the zero-padded, Fourier transformed b_k. Add normalisation. */ xn2 = 1./n2; bkf[0] = bk[0]*xn2; bkf[1] = bk[1]*xn2; for (m=2; m<2*n; m+=2) { bkf[m] = bkf[2*n2-m] = bk[m] *xn2; bkf[m+1] = bkf[2*n2-m+1] = bk[m+1] *xn2; } for (m=2*n;m<=(2*n2-2*n+1);++m) bkf[m]=0.; cffti (n2,work); cfftf (n2,bkf,work); }
static PyObject * fftpack_cffti(PyObject *NPY_UNUSED(self), PyObject *args) { PyArrayObject *op; npy_intp dim; long n; if (!PyArg_ParseTuple(args, "l", &n)) { return NULL; } /*Magic size needed by cffti*/ dim = 4*n + 15; /*Create a 1 dimensional array of dimensions of type double*/ op = (PyArrayObject *)PyArray_SimpleNew(1, &dim, NPY_DOUBLE); if (op == NULL) { return NULL; } NPY_SIGINT_ON; cffti(n, (double *)PyArray_DATA((PyArrayObject*)op)); NPY_SIGINT_OFF; return (PyObject *)op; }
Datum fft_main(PG_FUNCTION_ARGS) { int i,n; double sgn; double *w; double wtime; double *x,*y,*z; int32 arg = PG_GETARG_INT32(0); timestamp(); ereport(INFO,(errmsg(" Number of processors available = %d\n", omp_get_num_procs()))); ereport(INFO,(errmsg(" Number of threads = %d\n", omp_get_max_threads()))); //Prepare for tests. ereport(INFO,(errmsg(" N Time\n"))); n = 4; w = (double *) malloc( n * sizeof(double)); x = (double *) malloc(2 * n * sizeof(double)); y = (double *) malloc(2 * n * sizeof(double)); z = (double *) malloc(2 * n * sizeof(double)); //初始化数据 x[0]=1.0; x[1]=0.0; x[2]=2.0; x[3]=0.0; x[4]=4.0; x[5]=0.0; x[6]=3.0; x[7]=0.0; ereport(INFO,(errmsg("x="))); for(i=0; i<2*n; i++){ ereport(INFO,(errmsg("%f,",x[i]))); } //Initialize the sine and cosine tables. cffti(n, w); wtime = omp_get_wtime(); //Transform forward sgn = + 1.0; //fft计算 cfft2( n, x, y, w, sgn ); //输出结果 ereport(INFO,(errmsg("y="))); for(i=0; i<2*n; i++){ ereport(INFO,(errmsg("%f,",y[i]))); } //元素个数 ereport(INFO,(errmsg(" %12d", n))); //运行时间 wtime = omp_get_wtime() - wtime; ereport(INFO,(errmsg(" %12e\n", wtime))); free(w); free(x); free(y); //Terminate. ereport(INFO,(errmsg(" Normal end of execution.\n"))); timestamp(); PG_RETURN_INT32(arg); }
int main() { /* SSE version of cfft2 - uses INTEL intrinsics W. Petersen, SAM. Math. ETHZ 2 May, 2002 */ int first,i,icase,it,n; float seed,error,fnm1,sign,z0,z1,ggl(); float t1,ln2,mflops; void cffti(),cfft2(); first = 1; seed = 331.0; for(icase=0;icase<2;icase++){ if(first){ for(i=0;i<2*N;i+=2){ z0 = ggl(&seed); /* real part of array */ z1 = ggl(&seed); /* imaginary part of array */ x[i] = z0; z[i] = z0; /* copy of initial real data */ x[i+1] = z1; z[i+1] = z1; /* copy of initial imag. data */ } } else { for(i=0;i<2*N;i+=2){ z0 = 0; /* real part of array */ z1 = 0; /* imaginary part of array */ x[i] = z0; z[i] = z0; /* copy of initial real data */ x[i+1] = z1; z[i+1] = z1; /* copy of initial imag. data */ } } /* initialize sine/cosine tables */ n = N; cffti(n,w); /* transform forward, back */ if(first){ sign = 1.0; cfft2(n,x,y,w,sign); sign = -1.0; cfft2(n,y,x,w,sign); /* results should be same as initial multiplied by N */ fnm1 = 1.0/((float) n); error = 0.0; for(i=0;i<2*N;i+=2){ error += (z[i] - fnm1*x[i])*(z[i] - fnm1*x[i]) + (z[i+1] - fnm1*x[i+1])*(z[i+1] - fnm1*x[i+1]); } error = sqrt(fnm1*error); printf(" for n=%d, fwd/bck error=%e\n",N,error); first = 0; } else { unsigned j = 0; for(it=0;it<20000;it++){ sign = +1.0; cfft2(n,x,y,w,sign); sign = -1.0; cfft2(n,y,x,w,sign); } printf(" for n=%d\n",n); for (i = 0; i<N; ++i) { printf("%g ", w[i]); j++; if (j == 4) { printf("\n"); j = 0; } } } } return 0; }
main() { /* Example of Apple Altivec coded binary radix FFT using intrinsics from Petersen and Arbenz "Intro. to Parallel Computing," Section 3.6 This is an expanded version of a generic work-space FFT: steps are in-line. cfft2(n,x,y,w,sign) takes complex n-array "x" (Fortran real,aimag,real,aimag,... order) and writes its DFT in "y". Both input "x" and the original contents of "y" are destroyed. Initialization for array "w" (size n/2 complex of twiddle factors (exp(twopi*i*k/n), for k=0..n/2-1)) is computed once by cffti(n,w). WPP, SAM. Math. ETHZ, 1 June, 2002 */ int first,i,icase,it,ln2,n; int nits=1000000; static float seed = 331.0; float error,fnm1,sign,z0,z1,ggl(); float *x,*y,*z,*w; double t1,mflops; /* allocate storage for x,y,z,w on 4-word bndr. */ x = (float *) malloc(8*N); y = (float *) malloc(8*N); z = (float *) malloc(8*N); w = (float *) malloc(4*N); n = 2; for(ln2=1; ln2<21; ln2++) { first = 1; for(icase=0; icase<2; icase++) { if(first) { for(i=0; i<2*n; i+=2) { z0 = ggl(&seed); /* real part of array */ z1 = ggl(&seed); /* imaginary part of array */ x[i] = z0; z[i] = z0; /* copy of initial real data */ x[i+1] = z1; z[i+1] = z1; /* copy of initial imag. data */ } } else { for(i=0; i<2*n; i+=2) { z0 = 0; /* real part of array */ z1 = 0; /* imaginary part of array */ x[i] = z0; z[i] = z0; /* copy of initial real data */ x[i+1] = z1; z[i+1] = z1; /* copy of initial imag. data */ } } /* initialize sine/cosine tables */ cffti(n,w); /* transform forward, back */ if(first) { sign = 1.0; cfft2(n,x,y,w,sign); sign = -1.0; cfft2(n,y,x,w,sign); /* results should be same as initial multiplied by n */ fnm1 = 1.0/((float) n); error = 0.0; for(i=0; i<2*n; i+=2) { error += (z[i] - fnm1*x[i])*(z[i] - fnm1*x[i]) + (z[i+1] - fnm1*x[i+1])*(z[i+1] - fnm1*x[i+1]); } error = sqrt(fnm1*error); printf(" for n=%d, fwd/bck error=%e\n",n,error); first = 0; } else { for(it=0; it<nits; it++) { sign = +1.0; cfft2(n,x,y,w,sign); sign = -1.0; cfft2(n,y,x,w,sign); } } } if((ln2%4)==0) nits /= 10; n *= 2; } return 0; }
/* Main program */ int MAIN__(void) { /* Initialized data */ static integer nd[10] = { 120,54,49,32,4,3,2 }; /* Format strings */ static char fmt_1001[] = "(\0020N\002,i5,\002 RFFTF \002,e10.3,\002 RFF" "TB \002,e10.3,\002 RFFTFB \002,e10.3,\002 SINT \002,e10.3," "\002 SINTFB \002,e10.3,\002 COST \002,e10.3/7x,\002 COSTFB " "\002,e10.3,\002 SINQF \002,e10.3,\002 SINQB \002,e10.3,\002 SI" "NQFB \002,e10.3,\002 COSQF \002,e10.3,\002 COSQB \002,e10.3/7x," "\002 COSQFB \002,e10.3,\002 DEZF \002,e10.3,\002 DEZB \002,e" "10.3,\002 DEZFB \002,e10.3,\002 CFFTF \002,e10.3,\002 CFFTB " " \002,e10.3/7x,\002 CFFTFB \002,e10.3)"; /* System generated locals */ integer i__1, i__2, i__3, i__4, i__5, i__6; doublereal d__1, d__2, d__3, d__4; doublecomplex z__1, z__2, z__3; /* Builtin functions */ double sqrt(doublereal), sin(doublereal), cos(doublereal); integer pow_ii(integer *, integer *); double atan(doublereal), z_abs(doublecomplex *); integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ doublereal a[100], b[100]; integer i__, j, k, n; doublereal w[2000], x[200], y[200], ah[100], bh[100], cf, fn, dt, pi; doublecomplex cx[200], cy[200]; doublereal xh[200]; integer nz, nm1, np1, ns2; doublereal arg, tfn, tpi; integer nns; doublereal sum, arg1, arg2; integer ns2m; doublereal sum1, sum2, dcfb; integer ifac[64], modn; doublereal rftb, rftf; extern /* Subroutine */ void cost(integer *, doublereal *, doublereal *, integer *), sint(integer *, doublereal *, doublereal *, integer * ); doublereal dezb1, dezf1, sqrt2; extern /* Subroutine */ void cfftb(integer *, doublecomplex *, doublereal *, integer *), cfftf(integer *, doublecomplex *, doublereal *, integer *); doublereal dezfb; extern /* Subroutine */ void cffti(integer *, doublereal *, integer *), rfftb(integer *, doublereal *, doublereal *, integer *); doublereal rftfb; extern /* Subroutine */ void rfftf(integer *, doublereal *, doublereal *, integer *), cosqb(integer *, doublereal *, doublereal *, integer *), rffti(integer *, doublereal *, integer *), cosqf(integer *, doublereal *, doublereal *, integer *), sinqb(integer *, doublereal *, doublereal *, integer *), cosqi(integer *, doublereal *, integer *), sinqf(integer *, doublereal *, doublereal *, integer *), costi(integer *, doublereal *, integer *); doublereal azero; extern /* Subroutine */ void sinqi(integer *, doublereal *, integer *), sinti(integer *, doublereal *, integer *); doublereal costt, sintt, dcfftb, dcfftf, cosqfb, costfb; extern /* Subroutine */ void ezfftb(integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *); doublereal sinqfb; extern /* Subroutine */ void ezfftf(integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *); doublereal sintfb; extern /* Subroutine */ void ezffti(integer *, doublereal *, integer *); doublereal azeroh, cosqbt, cosqft, sinqbt, sinqft; /* Fortran I/O blocks */ static cilist io___58 = { 0, 6, 0, fmt_1001, 0 }; /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ /* VERSION 4 APRIL 1985 */ /* A TEST DRIVER FOR */ /* A PACKAGE OF FORTRAN SUBPROGRAMS FOR THE FAST FOURIER */ /* TRANSFORM OF PERIODIC AND OTHER SYMMETRIC SEQUENCES */ /* BY */ /* PAUL N SWARZTRAUBER */ /* NATIONAL CENTER FOR ATMOSPHERIC RESEARCH BOULDER,COLORADO 80307 */ /* WHICH IS SPONSORED BY THE NATIONAL SCIENCE FOUNDATION */ /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ /* THIS PROGRAM TESTS THE PACKAGE OF FAST FOURIER */ /* TRANSFORMS FOR BOTH COMPLEX AND REAL PERIODIC SEQUENCES AND */ /* CERTIAN OTHER SYMMETRIC SEQUENCES THAT ARE LISTED BELOW. */ /* 1. RFFTI INITIALIZE RFFTF AND RFFTB */ /* 2. RFFTF FORWARD TRANSFORM OF A REAL PERIODIC SEQUENCE */ /* 3. RFFTB BACKWARD TRANSFORM OF A REAL COEFFICIENT ARRAY */ /* 4. EZFFTI INITIALIZE EZFFTF AND EZFFTB */ /* 5. EZFFTF A SIMPLIFIED REAL PERIODIC FORWARD TRANSFORM */ /* 6. EZFFTB A SIMPLIFIED REAL PERIODIC BACKWARD TRANSFORM */ /* 7. SINTI INITIALIZE SINT */ /* 8. SINT SINE TRANSFORM OF A REAL ODD SEQUENCE */ /* 9. COSTI INITIALIZE COST */ /* 10. COST COSINE TRANSFORM OF A REAL EVEN SEQUENCE */ /* 11. SINQI INITIALIZE SINQF AND SINQB */ /* 12. SINQF FORWARD SINE TRANSFORM WITH ODD WAVE NUMBERS */ /* 13. SINQB UNNORMALIZED INVERSE OF SINQF */ /* 14. COSQI INITIALIZE COSQF AND COSQB */ /* 15. COSQF FORWARD COSINE TRANSFORM WITH ODD WAVE NUMBERS */ /* 16. COSQB UNNORMALIZED INVERSE OF COSQF */ /* 17. CFFTI INITIALIZE CFFTF AND CFFTB */ /* 18. CFFTF FORWARD TRANSFORM OF A COMPLEX PERIODIC SEQUENCE */ /* 19. CFFTB UNNORMALIZED INVERSE OF CFFTF */ sqrt2 = sqrt(2.0); nns = 7; i__1 = nns; for (nz = 1; nz <= i__1; ++nz) { n = nd[nz - 1]; modn = n % 2; fn = (real) n; tfn = fn + fn; np1 = n + 1; nm1 = n - 1; i__2 = np1; for (j = 1; j <= i__2; ++j) { x[j - 1] = sin((real) j * sqrt2); y[j - 1] = x[j - 1]; xh[j - 1] = x[j - 1]; /* L101: */ } /* TEST SUBROUTINES RFFTI,RFFTF AND RFFTB */ rffti(&n, w, ifac); pi = 3.141592653589793238462643383279502884197169399375108209749445923; dt = (pi + pi) / fn; ns2 = (n + 1) / 2; if (ns2 < 2) { goto L104; } i__2 = ns2; for (k = 2; k <= i__2; ++k) { sum1 = 0.0; sum2 = 0.0; arg = (real) (k - 1) * dt; i__3 = n; for (i__ = 1; i__ <= i__3; ++i__) { arg1 = (real) (i__ - 1) * arg; sum1 += x[i__ - 1] * cos(arg1); sum2 += x[i__ - 1] * sin(arg1); /* L102: */ } y[(k << 1) - 3] = sum1; y[(k << 1) - 2] = -sum2; /* L103: */ } L104: sum1 = 0.0; sum2 = 0.0; i__2 = nm1; for (i__ = 1; i__ <= i__2; i__ += 2) { sum1 += x[i__ - 1]; sum2 += x[i__]; /* L105: */ } if (modn == 1) { sum1 += x[n - 1]; } y[0] = sum1 + sum2; if (modn == 0) { y[n - 1] = sum1 - sum2; } rfftf(&n, x, w, ifac); rftf = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = rftf, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1)); rftf = max(d__2,d__3); x[i__ - 1] = xh[i__ - 1]; /* L106: */ } rftf /= fn; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { sum = x[0] * 0.5; arg = (real) (i__ - 1) * dt; if (ns2 < 2) { goto L108; } i__3 = ns2; for (k = 2; k <= i__3; ++k) { arg1 = (real) (k - 1) * arg; sum = sum + x[(k << 1) - 3] * cos(arg1) - x[(k << 1) - 2] * sin(arg1); /* L107: */ } L108: if (modn == 0) { i__3 = i__ - 1; sum += (real) pow_ii(&c_n1, &i__3) * 0.5 * x[n - 1]; } y[i__ - 1] = sum + sum; /* L109: */ } rfftb(&n, x, w, ifac); rftb = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = rftb, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1)); rftb = max(d__2,d__3); x[i__ - 1] = xh[i__ - 1]; y[i__ - 1] = xh[i__ - 1]; /* L110: */ } rfftb(&n, y, w, ifac); rfftf(&n, y, w, ifac); cf = 1.0 / fn; rftfb = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = rftfb, d__3 = (d__1 = cf * y[i__ - 1] - x[i__ - 1], abs( d__1)); rftfb = max(d__2,d__3); /* L111: */ } /* TEST SUBROUTINES SINTI AND SINT */ dt = pi / fn; i__2 = nm1; for (i__ = 1; i__ <= i__2; ++i__) { x[i__ - 1] = xh[i__ - 1]; /* L112: */ } i__2 = nm1; for (i__ = 1; i__ <= i__2; ++i__) { y[i__ - 1] = 0.0; arg1 = (real) i__ * dt; i__3 = nm1; for (k = 1; k <= i__3; ++k) { y[i__ - 1] += x[k - 1] * sin((real) k * arg1); /* L113: */ } y[i__ - 1] += y[i__ - 1]; /* L114: */ } sinti(&nm1, w, ifac); sint(&nm1, x, w, ifac); cf = 0.5 / fn; sintt = 0.0; i__2 = nm1; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = sintt, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1)); sintt = max(d__2,d__3); x[i__ - 1] = xh[i__ - 1]; y[i__ - 1] = x[i__ - 1]; /* L115: */ } sintt = cf * sintt; sint(&nm1, x, w, ifac); sint(&nm1, x, w, ifac); sintfb = 0.0; i__2 = nm1; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = sintfb, d__3 = (d__1 = cf * x[i__ - 1] - y[i__ - 1], abs( d__1)); sintfb = max(d__2,d__3); /* L116: */ } /* TEST SUBROUTINES COSTI AND COST */ i__2 = np1; for (i__ = 1; i__ <= i__2; ++i__) { x[i__ - 1] = xh[i__ - 1]; /* L117: */ } i__2 = np1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + 1; y[i__ - 1] = (x[0] + (real) pow_ii(&c_n1, &i__3) * x[n]) * 0.5; arg = (real) (i__ - 1) * dt; i__3 = n; for (k = 2; k <= i__3; ++k) { y[i__ - 1] += x[k - 1] * cos((real) (k - 1) * arg); /* L118: */ } y[i__ - 1] += y[i__ - 1]; /* L119: */ } costi(&np1, w, ifac); cost(&np1, x, w, ifac); costt = 0.0; i__2 = np1; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = costt, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1)); costt = max(d__2,d__3); x[i__ - 1] = xh[i__ - 1]; y[i__ - 1] = xh[i__ - 1]; /* L120: */ } costt = cf * costt; cost(&np1, x, w, ifac); cost(&np1, x, w, ifac); costfb = 0.0; i__2 = np1; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = costfb, d__3 = (d__1 = cf * x[i__ - 1] - y[i__ - 1], abs( d__1)); costfb = max(d__2,d__3); /* L121: */ } /* TEST SUBROUTINES SINQI,SINQF AND SINQB */ cf = 0.25 / fn; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { y[i__ - 1] = xh[i__ - 1]; /* L122: */ } dt = pi / (fn + fn); i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { x[i__ - 1] = 0.0; arg = dt * (real) i__; i__3 = n; for (k = 1; k <= i__3; ++k) { x[i__ - 1] += y[k - 1] * sin((real) (k + k - 1) * arg); /* L123: */ } x[i__ - 1] *= 4.0; /* L124: */ } sinqi(&n, w, ifac); sinqb(&n, y, w, ifac); sinqbt = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = sinqbt, d__3 = (d__1 = y[i__ - 1] - x[i__ - 1], abs(d__1)); sinqbt = max(d__2,d__3); x[i__ - 1] = xh[i__ - 1]; /* L125: */ } sinqbt = cf * sinqbt; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { arg = (real) (i__ + i__ - 1) * dt; i__3 = i__ + 1; y[i__ - 1] = (real) pow_ii(&c_n1, &i__3) * 0.5 * x[n - 1]; i__3 = nm1; for (k = 1; k <= i__3; ++k) { y[i__ - 1] += x[k - 1] * sin((real) k * arg); /* L126: */ } y[i__ - 1] += y[i__ - 1]; /* L127: */ } sinqf(&n, x, w, ifac); sinqft = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = sinqft, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1)); sinqft = max(d__2,d__3); y[i__ - 1] = xh[i__ - 1]; x[i__ - 1] = xh[i__ - 1]; /* L128: */ } sinqf(&n, y, w, ifac); sinqb(&n, y, w, ifac); sinqfb = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = sinqfb, d__3 = (d__1 = cf * y[i__ - 1] - x[i__ - 1], abs( d__1)); sinqfb = max(d__2,d__3); /* L129: */ } /* TEST SUBROUTINES COSQI,COSQF AND COSQB */ i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { y[i__ - 1] = xh[i__ - 1]; /* L130: */ } i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { x[i__ - 1] = 0.0; arg = (real) (i__ - 1) * dt; i__3 = n; for (k = 1; k <= i__3; ++k) { x[i__ - 1] += y[k - 1] * cos((real) (k + k - 1) * arg); /* L131: */ } x[i__ - 1] *= 4.0; /* L132: */ } cosqi(&n, w, ifac); cosqb(&n, y, w, ifac); cosqbt = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = cosqbt, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1)); cosqbt = max(d__2,d__3); x[i__ - 1] = xh[i__ - 1]; /* L133: */ } cosqbt = cf * cosqbt; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { y[i__ - 1] = x[0] * 0.5; arg = (real) (i__ + i__ - 1) * dt; i__3 = n; for (k = 2; k <= i__3; ++k) { y[i__ - 1] += x[k - 1] * cos((real) (k - 1) * arg); /* L134: */ } y[i__ - 1] += y[i__ - 1]; /* L135: */ } cosqf(&n, x, w, ifac); cosqft = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = cosqft, d__3 = (d__1 = y[i__ - 1] - x[i__ - 1], abs(d__1)); cosqft = max(d__2,d__3); x[i__ - 1] = xh[i__ - 1]; y[i__ - 1] = xh[i__ - 1]; /* L136: */ } cosqft = cf * cosqft; cosqb(&n, x, w, ifac); cosqf(&n, x, w, ifac); cosqfb = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = cosqfb, d__3 = (d__1 = cf * x[i__ - 1] - y[i__ - 1], abs( d__1)); cosqfb = max(d__2,d__3); /* L137: */ } /* TEST PROGRAMS EZFFTI,EZFFTF,EZFFTB */ ezffti(&n, w, ifac); i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { x[i__ - 1] = xh[i__ - 1]; /* L138: */ } tpi = atan(1.0) * 8.0; dt = tpi / (real) n; ns2 = (n + 1) / 2; cf = 2.0 / (real) n; ns2m = ns2 - 1; if (ns2m <= 0) { goto L141; } i__2 = ns2m; for (k = 1; k <= i__2; ++k) { sum1 = 0.0; sum2 = 0.0; arg = (real) k * dt; i__3 = n; for (i__ = 1; i__ <= i__3; ++i__) { arg1 = (real) (i__ - 1) * arg; sum1 += x[i__ - 1] * cos(arg1); sum2 += x[i__ - 1] * sin(arg1); /* L139: */ } a[k - 1] = cf * sum1; b[k - 1] = cf * sum2; /* L140: */ } L141: nm1 = n - 1; sum1 = 0.0; sum2 = 0.0; i__2 = nm1; for (i__ = 1; i__ <= i__2; i__ += 2) { sum1 += x[i__ - 1]; sum2 += x[i__]; /* L142: */ } if (modn == 1) { sum1 += x[n - 1]; } azero = cf * 0.5 * (sum1 + sum2); if (modn == 0) { a[ns2 - 1] = cf * 0.5 * (sum1 - sum2); } ezfftf(&n, x, &azeroh, ah, bh, w, ifac); dezf1 = (d__1 = azeroh - azero, abs(d__1)); if (modn == 0) { /* Computing MAX */ d__2 = dezf1, d__3 = (d__1 = a[ns2 - 1] - ah[ns2 - 1], abs(d__1)); dezf1 = max(d__2,d__3); } if (ns2m <= 0) { goto L144; } i__2 = ns2m; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__3 = dezf1, d__4 = (d__1 = ah[i__ - 1] - a[i__ - 1], abs(d__1)), d__3 = max(d__3,d__4), d__4 = (d__2 = bh[i__ - 1] - b[ i__ - 1], abs(d__2)); dezf1 = max(d__3,d__4); /* L143: */ } L144: ns2 = n / 2; if (modn == 0) { b[ns2 - 1] = 0.0; } i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { sum = azero; arg1 = (real) (i__ - 1) * dt; i__3 = ns2; for (k = 1; k <= i__3; ++k) { arg2 = (real) k * arg1; sum = sum + a[k - 1] * cos(arg2) + b[k - 1] * sin(arg2); /* L145: */ } x[i__ - 1] = sum; /* L146: */ } ezfftb(&n, y, &azero, a, b, w, ifac); dezb1 = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = dezb1, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1)); dezb1 = max(d__2,d__3); x[i__ - 1] = xh[i__ - 1]; /* L147: */ } ezfftf(&n, x, &azero, a, b, w, ifac); ezfftb(&n, y, &azero, a, b, w, ifac); dezfb = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = dezfb, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1)); dezfb = max(d__2,d__3); /* L148: */ } /* TEST CFFTI,CFFTF,CFFTB */ i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ - 1; d__1 = cos(sqrt2 * (real) i__); d__2 = sin(sqrt2 * (real) (i__ * i__)); z__1.r = d__1, z__1.i = d__2; cx[i__3].r = z__1.r, cx[i__3].i = z__1.i; /* L149: */ } dt = (pi + pi) / fn; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { arg1 = -((real) (i__ - 1)) * dt; i__3 = i__ - 1; cy[i__3].r = 0.0, cy[i__3].i = 0.0; i__3 = n; for (k = 1; k <= i__3; ++k) { arg2 = (real) (k - 1) * arg1; i__4 = i__ - 1; i__5 = i__ - 1; d__1 = cos(arg2); d__2 = sin(arg2); z__3.r = d__1, z__3.i = d__2; i__6 = k - 1; z__2.r = z__3.r * cx[i__6].r - z__3.i * cx[i__6].i, z__2.i = z__3.r * cx[i__6].i + z__3.i * cx[i__6].r; z__1.r = cy[i__5].r + z__2.r, z__1.i = cy[i__5].i + z__2.i; cy[i__4].r = z__1.r, cy[i__4].i = z__1.i; /* L150: */ } /* L151: */ } cffti(&n, w, ifac); cfftf(&n, cx, w, ifac); dcfftf = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ i__3 = i__ - 1; i__4 = i__ - 1; z__1.r = cx[i__3].r - cy[i__4].r, z__1.i = cx[i__3].i - cy[i__4] .i; d__1 = dcfftf, d__2 = z_abs(&z__1); dcfftf = max(d__1,d__2); i__3 = i__ - 1; i__4 = i__ - 1; z__1.r = cx[i__4].r / fn, z__1.i = cx[i__4].i / fn; cx[i__3].r = z__1.r, cx[i__3].i = z__1.i; /* L152: */ } dcfftf /= fn; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { arg1 = (real) (i__ - 1) * dt; i__3 = i__ - 1; cy[i__3].r = 0.0, cy[i__3].i = 0.0; i__3 = n; for (k = 1; k <= i__3; ++k) { arg2 = (real) (k - 1) * arg1; i__4 = i__ - 1; i__5 = i__ - 1; d__1 = cos(arg2); d__2 = sin(arg2); z__3.r = d__1, z__3.i = d__2; i__6 = k - 1; z__2.r = z__3.r * cx[i__6].r - z__3.i * cx[i__6].i, z__2.i = z__3.r * cx[i__6].i + z__3.i * cx[i__6].r; z__1.r = cy[i__5].r + z__2.r, z__1.i = cy[i__5].i + z__2.i; cy[i__4].r = z__1.r, cy[i__4].i = z__1.i; /* L153: */ } /* L154: */ } cfftb(&n, cx, w, ifac); dcfftb = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ i__3 = i__ - 1; i__4 = i__ - 1; z__1.r = cx[i__3].r - cy[i__4].r, z__1.i = cx[i__3].i - cy[i__4] .i; d__1 = dcfftb, d__2 = z_abs(&z__1); dcfftb = max(d__1,d__2); i__3 = i__ - 1; i__4 = i__ - 1; cx[i__3].r = cy[i__4].r, cx[i__3].i = cy[i__4].i; /* L155: */ } cf = 1.0 / fn; cfftf(&n, cx, w, ifac); cfftb(&n, cx, w, ifac); dcfb = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ i__3 = i__ - 1; z__2.r = cf * cx[i__3].r, z__2.i = cf * cx[i__3].i; i__4 = i__ - 1; z__1.r = z__2.r - cy[i__4].r, z__1.i = z__2.i - cy[i__4].i; d__1 = dcfb, d__2 = z_abs(&z__1); dcfb = max(d__1,d__2); /* L156: */ } s_wsfe(&io___58); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&rftf, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&rftb, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&rftfb, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&sintt, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&sintfb, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&costt, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&costfb, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&sinqft, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&sinqbt, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&sinqfb, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&cosqft, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&cosqbt, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&cosqfb, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&dezf1, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&dezb1, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&dezfb, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&dcfftf, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&dcfftb, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&dcfb, (ftnlen)sizeof(doublereal)); e_wsfe(); /* L157: */ } return 0; } /* MAIN__ */
int main ( void ) /******************************************************************************/ /* Purpose: MAIN is the main program for FFT_SERIAL. Discussion: The "complex" vector A is actually stored as a double vector B. The "complex" vector entry A[I] is stored as: B[I*2+0], the real part, B[I*2+1], the imaginary part. Modified: 23 March 2009 Author: Original C version by Wesley Petersen. This C version by John Burkardt. Reference: Wesley Petersen, Peter Arbenz, Introduction to Parallel Computing - A practical guide with examples in C, Oxford University Press, ISBN: 0-19-851576-6, LC: QA76.58.P47. */ { double ctime; double ctime1; double ctime2; double error; int first; double flops; double fnm1; int i; int icase; int it; int ln2; double mflops; int n; int nits = 10000; static double seed; double sgn; double *w; double *x; double *y; double *z; double z0; double z1; timestamp ( ); printf ( "\n" ); printf ( "FFT_SERIAL\n" ); printf ( " C version\n" ); printf ( "\n" ); printf ( " Demonstrate an implementation of the Fast Fourier Transform\n" ); printf ( " of a complex data vector.\n" ); /* Prepare for tests. */ printf ( "\n" ); printf ( " Accuracy check:\n" ); printf ( "\n" ); printf ( " FFT ( FFT ( X(1:N) ) ) == N * X(1:N)\n" ); printf ( "\n" ); printf ( " N NITS Error Time Time/Call MFLOPS\n" ); printf ( "\n" ); seed = 331.0; n = 1; /* LN2 is the log base 2 of N. Each increase of LN2 doubles N. */ for ( ln2 = 1; ln2 <= 20; ln2++ ) { n = 2 * n; /* Allocate storage for the complex arrays W, X, Y, Z. We handle the complex arithmetic, and store a complex number as a pair of doubles, a complex vector as a doubly dimensioned array whose second dimension is 2. */ w = ( double * ) malloc ( n * sizeof ( double ) ); x = ( double * ) malloc ( 2 * n * sizeof ( double ) ); y = ( double * ) malloc ( 2 * n * sizeof ( double ) ); z = ( double * ) malloc ( 2 * n * sizeof ( double ) ); first = 1; for ( icase = 0; icase < 2; icase++ ) { if ( first ) { for ( i = 0; i < 2 * n; i = i + 2 ) { z0 = ggl ( &seed ); z1 = ggl ( &seed ); x[i] = z0; z[i] = z0; x[i+1] = z1; z[i+1] = z1; } } else { for ( i = 0; i < 2 * n; i = i + 2 ) { z0 = 0.0; /* real part of array */ z1 = 0.0; /* imaginary part of array */ x[i] = z0; z[i] = z0; /* copy of initial real data */ x[i+1] = z1; z[i+1] = z1; /* copy of initial imag. data */ } } /* Initialize the sine and cosine tables. */ cffti ( n, w ); /* Transform forward, back */ if ( first ) { sgn = + 1.0; cfft2 ( n, x, y, w, sgn ); sgn = - 1.0; cfft2 ( n, y, x, w, sgn ); /* Results should be same as the initial data multiplied by N. */ fnm1 = 1.0 / ( double ) n; error = 0.0; for ( i = 0; i < 2 * n; i = i + 2 ) { error = error + pow ( z[i] - fnm1 * x[i], 2 ) + pow ( z[i+1] - fnm1 * x[i+1], 2 ); } error = sqrt ( fnm1 * error ); printf ( " %12d %8d %12e", n, nits, error ); first = 0; } else { ctime1 = cpu_time ( ); for ( it = 0; it < nits; it++ ) { sgn = + 1.0; cfft2 ( n, x, y, w, sgn ); sgn = - 1.0; cfft2 ( n, y, x, w, sgn ); } ctime2 = cpu_time ( ); ctime = ctime2 - ctime1; flops = 2.0 * ( double ) nits * ( 5.0 * ( double ) n * ( double ) ln2 ); mflops = flops / 1.0E+06 / ctime; printf ( " %12e %12e %12f\n", ctime, ctime / ( double ) ( 2 * nits ), mflops ); } } if ( ( ln2 % 4 ) == 0 ) { nits = nits / 10; } if ( nits < 1 ) { nits = 1; } free ( w ); free ( x ); free ( y ); free ( z ); } printf ( "\n" ); printf ( "FFT_SERIAL:\n" ); printf ( " Normal end of execution.\n" ); printf ( "\n" ); timestamp ( ); return 0; }
/* compare results with the regular fftpack */ void pffft_validate_N(int N, int cplx) { int Nfloat = N*(cplx?2:1); int Nbytes = Nfloat * sizeof(float); float *ref, *in, *out, *tmp, *tmp2; PFFFT_Setup *s = pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); int pass; if (!s) { printf("Skipping N=%d, not supported\n", N); return; } ref = pffft_aligned_malloc(Nbytes); in = pffft_aligned_malloc(Nbytes); out = pffft_aligned_malloc(Nbytes); tmp = pffft_aligned_malloc(Nbytes); tmp2 = pffft_aligned_malloc(Nbytes); for (pass=0; pass < 2; ++pass) { float ref_max = 0; int k; //printf("N=%d pass=%d cplx=%d\n", N, pass, cplx); // compute reference solution with FFTPACK if (pass == 0) { float *wrk = malloc(2*Nbytes+15*sizeof(float)); for (k=0; k < Nfloat; ++k) { ref[k] = in[k] = frand()*2-1; out[k] = 1e30; } if (!cplx) { rffti(N, wrk); rfftf(N, ref, wrk); // use our ordering for real ffts instead of the one of fftpack { float refN=ref[N-1]; for (k=N-2; k >= 1; --k) ref[k+1] = ref[k]; ref[1] = refN; } } else { cffti(N, wrk); cfftf(N, ref, wrk); } free(wrk); } for (k = 0; k < Nfloat; ++k) ref_max = MAX(ref_max, fabs(ref[k])); // pass 0 : non canonical ordering of transform coefficients if (pass == 0) { // test forward transform, with different input / output pffft_transform(s, in, tmp, 0, PFFFT_FORWARD); memcpy(tmp2, tmp, Nbytes); memcpy(tmp, in, Nbytes); pffft_transform(s, tmp, tmp, 0, PFFFT_FORWARD); for (k = 0; k < Nfloat; ++k) { assert(tmp2[k] == tmp[k]); } // test reordering pffft_zreorder(s, tmp, out, PFFFT_FORWARD); pffft_zreorder(s, out, tmp, PFFFT_BACKWARD); for (k = 0; k < Nfloat; ++k) { assert(tmp2[k] == tmp[k]); } pffft_zreorder(s, tmp, out, PFFFT_FORWARD); } else { // pass 1 : canonical ordering of transform coeffs. pffft_transform_ordered(s, in, tmp, 0, PFFFT_FORWARD); memcpy(tmp2, tmp, Nbytes); memcpy(tmp, in, Nbytes); pffft_transform_ordered(s, tmp, tmp, 0, PFFFT_FORWARD); for (k = 0; k < Nfloat; ++k) { assert(tmp2[k] == tmp[k]); } memcpy(out, tmp, Nbytes); } { for (k=0; k < Nfloat; ++k) { if (!(fabs(ref[k] - out[k]) < 1e-3*ref_max)) { printf("%s forward PFFFT mismatch found for N=%d\n", (cplx?"CPLX":"REAL"), N); exit(1); } } if (pass == 0) pffft_transform(s, tmp, out, 0, PFFFT_BACKWARD); else pffft_transform_ordered(s, tmp, out, 0, PFFFT_BACKWARD); memcpy(tmp2, out, Nbytes); memcpy(out, tmp, Nbytes); if (pass == 0) pffft_transform(s, out, out, 0, PFFFT_BACKWARD); else pffft_transform_ordered(s, out, out, 0, PFFFT_BACKWARD); for (k = 0; k < Nfloat; ++k) { assert(tmp2[k] == out[k]); out[k] *= 1.f/N; } for (k = 0; k < Nfloat; ++k) { if (fabs(in[k] - out[k]) > 1e-3 * ref_max) { printf("pass=%d, %s IFFFT does not match for N=%d\n", pass, (cplx?"CPLX":"REAL"), N); break; exit(1); } } } // quick test of the circular convolution in fft domain { float conv_err = 0, conv_max = 0; pffft_zreorder(s, ref, tmp, PFFFT_FORWARD); memset(out, 0, Nbytes); pffft_zconvolve_accumulate(s, ref, ref, out, 1.0); pffft_zreorder(s, out, tmp2, PFFFT_FORWARD); for (k=0; k < Nfloat; k += 2) { float ar = tmp[k], ai=tmp[k+1]; if (cplx || k > 0) { tmp[k] = ar*ar - ai*ai; tmp[k+1] = 2*ar*ai; } else { tmp[0] = ar*ar; tmp[1] = ai*ai; } } for (k=0; k < Nfloat; ++k) { float d = fabs(tmp[k] - tmp2[k]), e = fabs(tmp[k]); if (d > conv_err) conv_err = d; if (e > conv_max) conv_max = e; } if (conv_err > 1e-5*conv_max) { printf("zconvolve error ? %g %g\n", conv_err, conv_max); exit(1); } } } printf("%s PFFFT is OK for N=%d\n", (cplx?"CPLX":"REAL"), N); fflush(stdout); pffft_destroy_setup(s); pffft_aligned_free(ref); pffft_aligned_free(in); pffft_aligned_free(out); pffft_aligned_free(tmp); pffft_aligned_free(tmp2); }
void benchmark_ffts(int N, int cplx) { int Nfloat = (cplx ? N*2 : N); int Nbytes = Nfloat * sizeof(float); float *X = pffft_aligned_malloc(Nbytes), *Y = pffft_aligned_malloc(Nbytes), *Z = pffft_aligned_malloc(Nbytes); double t0, t1, flops; int k; int max_iter = 5120000/N*4; #ifdef __arm__ max_iter /= 4; #endif int iter; for (k = 0; k < Nfloat; ++k) { X[k] = 0; //sqrtf(k+1); } // FFTPack benchmark { float *wrk = malloc(2*Nbytes + 15*sizeof(float)); int max_iter_ = max_iter/pffft_simd_size(); if (max_iter_ == 0) max_iter_ = 1; if (cplx) cffti(N, wrk); else rffti(N, wrk); t0 = uclock_sec(); for (iter = 0; iter < max_iter_; ++iter) { if (cplx) { cfftf(N, X, wrk); cfftb(N, X, wrk); } else { rfftf(N, X, wrk); rfftb(N, X, wrk); } } t1 = uclock_sec(); free(wrk); flops = (max_iter_*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html show_output("FFTPack", N, cplx, flops, t0, t1, max_iter_); } #ifdef HAVE_VECLIB int log2N = (int)(log(N)/log(2) + 0.5f); if (N == (1<<log2N)) { FFTSetup setup; setup = vDSP_create_fftsetup(log2N, FFT_RADIX2); DSPSplitComplex zsamples; zsamples.realp = &X[0]; zsamples.imagp = &X[Nfloat/2]; t0 = uclock_sec(); for (iter = 0; iter < max_iter; ++iter) { if (cplx) { vDSP_fft_zip(setup, &zsamples, 1, log2N, kFFTDirection_Forward); vDSP_fft_zip(setup, &zsamples, 1, log2N, kFFTDirection_Inverse); } else { vDSP_fft_zrip(setup, &zsamples, 1, log2N, kFFTDirection_Forward); vDSP_fft_zrip(setup, &zsamples, 1, log2N, kFFTDirection_Inverse); } } t1 = uclock_sec(); vDSP_destroy_fftsetup(setup); flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html show_output("vDSP", N, cplx, flops, t0, t1, max_iter); } else { show_output("vDSP", N, cplx, -1, -1, -1, -1); } #endif #ifdef HAVE_FFTW { fftwf_plan planf, planb; fftw_complex *in = (fftw_complex*) fftwf_malloc(sizeof(fftw_complex) * N); fftw_complex *out = (fftw_complex*) fftwf_malloc(sizeof(fftw_complex) * N); memset(in, 0, sizeof(fftw_complex) * N); int flags = (N < 40000 ? FFTW_MEASURE : FFTW_ESTIMATE); // measure takes a lot of time on largest ffts //int flags = FFTW_ESTIMATE; if (cplx) { planf = fftwf_plan_dft_1d(N, (fftwf_complex*)in, (fftwf_complex*)out, FFTW_FORWARD, flags); planb = fftwf_plan_dft_1d(N, (fftwf_complex*)in, (fftwf_complex*)out, FFTW_BACKWARD, flags); } else { planf = fftwf_plan_dft_r2c_1d(N, (float*)in, (fftwf_complex*)out, flags); planb = fftwf_plan_dft_c2r_1d(N, (fftwf_complex*)in, (float*)out, flags); } t0 = uclock_sec(); for (iter = 0; iter < max_iter; ++iter) { fftwf_execute(planf); fftwf_execute(planb); } t1 = uclock_sec(); fftwf_destroy_plan(planf); fftwf_destroy_plan(planb); fftwf_free(in); fftwf_free(out); flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html show_output((flags == FFTW_MEASURE ? "FFTW (meas.)" : " FFTW (estim)"), N, cplx, flops, t0, t1, max_iter); } #endif // PFFFT benchmark { PFFFT_Setup *s = pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); if (s) { t0 = uclock_sec(); for (iter = 0; iter < max_iter; ++iter) { pffft_transform(s, X, Z, Y, PFFFT_FORWARD); pffft_transform(s, X, Z, Y, PFFFT_BACKWARD); } t1 = uclock_sec(); pffft_destroy_setup(s); flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html show_output("PFFFT", N, cplx, flops, t0, t1, max_iter); } } if (!array_output_format) { printf("--\n"); } pffft_aligned_free(X); pffft_aligned_free(Y); pffft_aligned_free(Z); }