void complex_plan_forward (complex_plan plan, double *data) { if (plan->bluestein) bluestein (plan->length, data, plan->work, -1); else cfftf (plan->length, data, plan->work); }
void faad_mdct(mdct_info *mdct, real_t *X_in, real_t *X_out) { uint16_t k; complex_t x; ALIGN complex_t Z1[512]; complex_t *sincos = mdct->sincos; uint16_t N = mdct->N; uint16_t N2 = N >> 1; uint16_t N4 = N >> 2; uint16_t N8 = N >> 3; #ifndef FIXED_POINT real_t scale = REAL_CONST(N); #else real_t scale = REAL_CONST(4.0/N); #endif /* pre-FFT complex multiplication */ for (k = 0; k < N8; k++) { uint16_t n = k << 1; RE(x) = X_in[N - N4 - 1 - n] + X_in[N - N4 + n]; IM(x) = X_in[ N4 + n] - X_in[ N4 - 1 - n]; ComplexMult(&RE(Z1[k]), &IM(Z1[k]), RE(x), IM(x), RE(sincos[k]), IM(sincos[k])); RE(Z1[k]) = MUL_R(RE(Z1[k]), scale); IM(Z1[k]) = MUL_R(IM(Z1[k]), scale); RE(x) = X_in[N2 - 1 - n] - X_in[ n]; IM(x) = X_in[N2 + n] + X_in[N - 1 - n]; ComplexMult(&RE(Z1[k + N8]), &IM(Z1[k + N8]), RE(x), IM(x), RE(sincos[k + N8]), IM(sincos[k + N8])); RE(Z1[k + N8]) = MUL_R(RE(Z1[k + N8]), scale); IM(Z1[k + N8]) = MUL_R(IM(Z1[k + N8]), scale); } /* complex FFT, any non-scaling FFT can be used here */ cfftf(mdct->cfft, Z1); /* post-FFT complex multiplication */ for (k = 0; k < N4; k++) { uint16_t n = k << 1; ComplexMult(&RE(x), &IM(x), RE(Z1[k]), IM(Z1[k]), RE(sincos[k]), IM(sincos[k])); X_out[ n] = -RE(x); X_out[N2 - 1 - n] = IM(x); X_out[N2 + n] = -IM(x); X_out[N - 1 - n] = RE(x); } }
PyObject * fftpack_cfftf(PyObject *NPY_UNUSED(self), PyObject *args) { PyObject *op1, *op2; PyArrayObject *data; PyArray_Descr *descr; double *wsave, *dptr; npy_intp nsave; int npts, nrepeats, i; if(!PyArg_ParseTuple(args, "OO", &op1, &op2)) { return NULL; } data = (PyArrayObject *)PyArray_CopyFromObject(op1, NPY_CDOUBLE, 1, 0); if (data == NULL) { return NULL; } descr = PyArray_DescrFromType(NPY_DOUBLE); if (PyArray_AsCArray(&op2, (void *)&wsave, &nsave, 1, descr) == -1) { goto fail; } if (data == NULL) { goto fail; } npts = PyArray_DIM(data, PyArray_NDIM(data) - 1); if (nsave != npts*4 + 15) { PyErr_SetString(ErrorObject, "invalid work array for fft size"); goto fail; } nrepeats = PyArray_SIZE(data)/npts; dptr = (double *)PyArray_DATA(data); NPY_SIGINT_ON; for (i = 0; i < nrepeats; i++) { cfftf(npts, dptr, wsave); dptr += npts*2; } NPY_SIGINT_OFF; PyArray_Free(op2, (char *)wsave); return (PyObject *)data; fail: PyArray_Free(op2, (char *)wsave); Py_DECREF(data); return NULL; }
void bluestein_i (size_t n, double **tstorage, size_t *worksize) { static const double pi=3.14159265358979323846; size_t n2=good_size(n*2-1); size_t m, coeff; double angle, xn2; double *bk, *bkf, *work; double pibyn=pi/n; *worksize=2+2*n+8*n2+16; *tstorage = RALLOC(double,2+2*n+8*n2+16); ((size_t *)(*tstorage))[0]=n2; bk = *tstorage+2; bkf = *tstorage+2+2*n; work= *tstorage+2+2*(n+n2); /* initialize b_k */ bk[0] = 1; bk[1] = 0; coeff=0; for (m=1; m<n; ++m) { coeff+=2*m-1; if (coeff>=2*n) coeff-=2*n; angle = pibyn*coeff; bk[2*m] = cos(angle); bk[2*m+1] = sin(angle); } /* initialize the zero-padded, Fourier transformed b_k. Add normalisation. */ xn2 = 1./n2; bkf[0] = bk[0]*xn2; bkf[1] = bk[1]*xn2; for (m=2; m<2*n; m+=2) { bkf[m] = bkf[2*n2-m] = bk[m] *xn2; bkf[m+1] = bkf[2*n2-m+1] = bk[m+1] *xn2; } for (m=2*n;m<=(2*n2-2*n+1);++m) bkf[m]=0.; cffti (n2,work); cfftf (n2,bkf,work); }
void faad_mdct(mdct_info *mdct, real_t *X_in, real_t *X_out) { uint16_t k; complex_t x; ALIGN complex_t Z1[512]; complex_t *sincos = mdct->sincos; uint16_t N = mdct->N; uint16_t N2 = N >> 1; uint16_t N4 = N >> 2; uint16_t N8 = N >> 3; #ifndef FIXED_POINT real_t scale = REAL_CONST(N); #else real_t scale = REAL_CONST(4.0/N); #endif #ifdef ALLOW_SMALL_FRAMELENGTH #ifdef FIXED_POINT /* detect non-power of 2 */ if (N & (N-1)) { /* adjust scale for non-power of 2 MDCT */ /* *= sqrt(2048/1920) */ scale = MUL_C(scale, COEF_CONST(1.0327955589886444)); } #endif #endif /* pre-FFT complex multiplication */ for (k = 0; k < N8; k++) { uint16_t n = k << 1; RE(x) = X_in[N - N4 - 1 - n] + X_in[N - N4 + n]; IM(x) = X_in[ N4 + n] - X_in[ N4 - 1 - n]; ComplexMult(&RE(Z1[k]), &IM(Z1[k]), RE(x), IM(x), RE(sincos[k]), IM(sincos[k])); RE(Z1[k]) = MUL_R(RE(Z1[k]), scale); IM(Z1[k]) = MUL_R(IM(Z1[k]), scale); RE(x) = X_in[N2 - 1 - n] - X_in[ n]; IM(x) = X_in[N2 + n] + X_in[N - 1 - n]; ComplexMult(&RE(Z1[k + N8]), &IM(Z1[k + N8]), RE(x), IM(x), RE(sincos[k + N8]), IM(sincos[k + N8])); RE(Z1[k + N8]) = MUL_R(RE(Z1[k + N8]), scale); IM(Z1[k + N8]) = MUL_R(IM(Z1[k + N8]), scale); } /* complex FFT, any non-scaling FFT can be used here */ cfftf(mdct->cfft, Z1); /* post-FFT complex multiplication */ for (k = 0; k < N4; k++) { uint16_t n = k << 1; ComplexMult(&RE(x), &IM(x), RE(Z1[k]), IM(Z1[k]), RE(sincos[k]), IM(sincos[k])); X_out[ n] = -RE(x); X_out[N2 - 1 - n] = IM(x); X_out[N2 + n] = -IM(x); X_out[N - 1 - n] = RE(x); } }
int cfftf(int *n, doublecomplex *c, double *wsave) { // Casting (doublecomplex*) to (double*) is probably OK. return cfftf(n, (double *)c, wsave); }
/* Main program */ int MAIN__(void) { /* Initialized data */ static integer nd[10] = { 120,54,49,32,4,3,2 }; /* Format strings */ static char fmt_1001[] = "(\0020N\002,i5,\002 RFFTF \002,e10.3,\002 RFF" "TB \002,e10.3,\002 RFFTFB \002,e10.3,\002 SINT \002,e10.3," "\002 SINTFB \002,e10.3,\002 COST \002,e10.3/7x,\002 COSTFB " "\002,e10.3,\002 SINQF \002,e10.3,\002 SINQB \002,e10.3,\002 SI" "NQFB \002,e10.3,\002 COSQF \002,e10.3,\002 COSQB \002,e10.3/7x," "\002 COSQFB \002,e10.3,\002 DEZF \002,e10.3,\002 DEZB \002,e" "10.3,\002 DEZFB \002,e10.3,\002 CFFTF \002,e10.3,\002 CFFTB " " \002,e10.3/7x,\002 CFFTFB \002,e10.3)"; /* System generated locals */ integer i__1, i__2, i__3, i__4, i__5, i__6; doublereal d__1, d__2, d__3, d__4; doublecomplex z__1, z__2, z__3; /* Builtin functions */ double sqrt(doublereal), sin(doublereal), cos(doublereal); integer pow_ii(integer *, integer *); double atan(doublereal), z_abs(doublecomplex *); integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ doublereal a[100], b[100]; integer i__, j, k, n; doublereal w[2000], x[200], y[200], ah[100], bh[100], cf, fn, dt, pi; doublecomplex cx[200], cy[200]; doublereal xh[200]; integer nz, nm1, np1, ns2; doublereal arg, tfn, tpi; integer nns; doublereal sum, arg1, arg2; integer ns2m; doublereal sum1, sum2, dcfb; integer ifac[64], modn; doublereal rftb, rftf; extern /* Subroutine */ void cost(integer *, doublereal *, doublereal *, integer *), sint(integer *, doublereal *, doublereal *, integer * ); doublereal dezb1, dezf1, sqrt2; extern /* Subroutine */ void cfftb(integer *, doublecomplex *, doublereal *, integer *), cfftf(integer *, doublecomplex *, doublereal *, integer *); doublereal dezfb; extern /* Subroutine */ void cffti(integer *, doublereal *, integer *), rfftb(integer *, doublereal *, doublereal *, integer *); doublereal rftfb; extern /* Subroutine */ void rfftf(integer *, doublereal *, doublereal *, integer *), cosqb(integer *, doublereal *, doublereal *, integer *), rffti(integer *, doublereal *, integer *), cosqf(integer *, doublereal *, doublereal *, integer *), sinqb(integer *, doublereal *, doublereal *, integer *), cosqi(integer *, doublereal *, integer *), sinqf(integer *, doublereal *, doublereal *, integer *), costi(integer *, doublereal *, integer *); doublereal azero; extern /* Subroutine */ void sinqi(integer *, doublereal *, integer *), sinti(integer *, doublereal *, integer *); doublereal costt, sintt, dcfftb, dcfftf, cosqfb, costfb; extern /* Subroutine */ void ezfftb(integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *); doublereal sinqfb; extern /* Subroutine */ void ezfftf(integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *); doublereal sintfb; extern /* Subroutine */ void ezffti(integer *, doublereal *, integer *); doublereal azeroh, cosqbt, cosqft, sinqbt, sinqft; /* Fortran I/O blocks */ static cilist io___58 = { 0, 6, 0, fmt_1001, 0 }; /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ /* VERSION 4 APRIL 1985 */ /* A TEST DRIVER FOR */ /* A PACKAGE OF FORTRAN SUBPROGRAMS FOR THE FAST FOURIER */ /* TRANSFORM OF PERIODIC AND OTHER SYMMETRIC SEQUENCES */ /* BY */ /* PAUL N SWARZTRAUBER */ /* NATIONAL CENTER FOR ATMOSPHERIC RESEARCH BOULDER,COLORADO 80307 */ /* WHICH IS SPONSORED BY THE NATIONAL SCIENCE FOUNDATION */ /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ /* THIS PROGRAM TESTS THE PACKAGE OF FAST FOURIER */ /* TRANSFORMS FOR BOTH COMPLEX AND REAL PERIODIC SEQUENCES AND */ /* CERTIAN OTHER SYMMETRIC SEQUENCES THAT ARE LISTED BELOW. */ /* 1. RFFTI INITIALIZE RFFTF AND RFFTB */ /* 2. RFFTF FORWARD TRANSFORM OF A REAL PERIODIC SEQUENCE */ /* 3. RFFTB BACKWARD TRANSFORM OF A REAL COEFFICIENT ARRAY */ /* 4. EZFFTI INITIALIZE EZFFTF AND EZFFTB */ /* 5. EZFFTF A SIMPLIFIED REAL PERIODIC FORWARD TRANSFORM */ /* 6. EZFFTB A SIMPLIFIED REAL PERIODIC BACKWARD TRANSFORM */ /* 7. SINTI INITIALIZE SINT */ /* 8. SINT SINE TRANSFORM OF A REAL ODD SEQUENCE */ /* 9. COSTI INITIALIZE COST */ /* 10. COST COSINE TRANSFORM OF A REAL EVEN SEQUENCE */ /* 11. SINQI INITIALIZE SINQF AND SINQB */ /* 12. SINQF FORWARD SINE TRANSFORM WITH ODD WAVE NUMBERS */ /* 13. SINQB UNNORMALIZED INVERSE OF SINQF */ /* 14. COSQI INITIALIZE COSQF AND COSQB */ /* 15. COSQF FORWARD COSINE TRANSFORM WITH ODD WAVE NUMBERS */ /* 16. COSQB UNNORMALIZED INVERSE OF COSQF */ /* 17. CFFTI INITIALIZE CFFTF AND CFFTB */ /* 18. CFFTF FORWARD TRANSFORM OF A COMPLEX PERIODIC SEQUENCE */ /* 19. CFFTB UNNORMALIZED INVERSE OF CFFTF */ sqrt2 = sqrt(2.0); nns = 7; i__1 = nns; for (nz = 1; nz <= i__1; ++nz) { n = nd[nz - 1]; modn = n % 2; fn = (real) n; tfn = fn + fn; np1 = n + 1; nm1 = n - 1; i__2 = np1; for (j = 1; j <= i__2; ++j) { x[j - 1] = sin((real) j * sqrt2); y[j - 1] = x[j - 1]; xh[j - 1] = x[j - 1]; /* L101: */ } /* TEST SUBROUTINES RFFTI,RFFTF AND RFFTB */ rffti(&n, w, ifac); pi = 3.141592653589793238462643383279502884197169399375108209749445923; dt = (pi + pi) / fn; ns2 = (n + 1) / 2; if (ns2 < 2) { goto L104; } i__2 = ns2; for (k = 2; k <= i__2; ++k) { sum1 = 0.0; sum2 = 0.0; arg = (real) (k - 1) * dt; i__3 = n; for (i__ = 1; i__ <= i__3; ++i__) { arg1 = (real) (i__ - 1) * arg; sum1 += x[i__ - 1] * cos(arg1); sum2 += x[i__ - 1] * sin(arg1); /* L102: */ } y[(k << 1) - 3] = sum1; y[(k << 1) - 2] = -sum2; /* L103: */ } L104: sum1 = 0.0; sum2 = 0.0; i__2 = nm1; for (i__ = 1; i__ <= i__2; i__ += 2) { sum1 += x[i__ - 1]; sum2 += x[i__]; /* L105: */ } if (modn == 1) { sum1 += x[n - 1]; } y[0] = sum1 + sum2; if (modn == 0) { y[n - 1] = sum1 - sum2; } rfftf(&n, x, w, ifac); rftf = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = rftf, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1)); rftf = max(d__2,d__3); x[i__ - 1] = xh[i__ - 1]; /* L106: */ } rftf /= fn; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { sum = x[0] * 0.5; arg = (real) (i__ - 1) * dt; if (ns2 < 2) { goto L108; } i__3 = ns2; for (k = 2; k <= i__3; ++k) { arg1 = (real) (k - 1) * arg; sum = sum + x[(k << 1) - 3] * cos(arg1) - x[(k << 1) - 2] * sin(arg1); /* L107: */ } L108: if (modn == 0) { i__3 = i__ - 1; sum += (real) pow_ii(&c_n1, &i__3) * 0.5 * x[n - 1]; } y[i__ - 1] = sum + sum; /* L109: */ } rfftb(&n, x, w, ifac); rftb = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = rftb, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1)); rftb = max(d__2,d__3); x[i__ - 1] = xh[i__ - 1]; y[i__ - 1] = xh[i__ - 1]; /* L110: */ } rfftb(&n, y, w, ifac); rfftf(&n, y, w, ifac); cf = 1.0 / fn; rftfb = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = rftfb, d__3 = (d__1 = cf * y[i__ - 1] - x[i__ - 1], abs( d__1)); rftfb = max(d__2,d__3); /* L111: */ } /* TEST SUBROUTINES SINTI AND SINT */ dt = pi / fn; i__2 = nm1; for (i__ = 1; i__ <= i__2; ++i__) { x[i__ - 1] = xh[i__ - 1]; /* L112: */ } i__2 = nm1; for (i__ = 1; i__ <= i__2; ++i__) { y[i__ - 1] = 0.0; arg1 = (real) i__ * dt; i__3 = nm1; for (k = 1; k <= i__3; ++k) { y[i__ - 1] += x[k - 1] * sin((real) k * arg1); /* L113: */ } y[i__ - 1] += y[i__ - 1]; /* L114: */ } sinti(&nm1, w, ifac); sint(&nm1, x, w, ifac); cf = 0.5 / fn; sintt = 0.0; i__2 = nm1; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = sintt, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1)); sintt = max(d__2,d__3); x[i__ - 1] = xh[i__ - 1]; y[i__ - 1] = x[i__ - 1]; /* L115: */ } sintt = cf * sintt; sint(&nm1, x, w, ifac); sint(&nm1, x, w, ifac); sintfb = 0.0; i__2 = nm1; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = sintfb, d__3 = (d__1 = cf * x[i__ - 1] - y[i__ - 1], abs( d__1)); sintfb = max(d__2,d__3); /* L116: */ } /* TEST SUBROUTINES COSTI AND COST */ i__2 = np1; for (i__ = 1; i__ <= i__2; ++i__) { x[i__ - 1] = xh[i__ - 1]; /* L117: */ } i__2 = np1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + 1; y[i__ - 1] = (x[0] + (real) pow_ii(&c_n1, &i__3) * x[n]) * 0.5; arg = (real) (i__ - 1) * dt; i__3 = n; for (k = 2; k <= i__3; ++k) { y[i__ - 1] += x[k - 1] * cos((real) (k - 1) * arg); /* L118: */ } y[i__ - 1] += y[i__ - 1]; /* L119: */ } costi(&np1, w, ifac); cost(&np1, x, w, ifac); costt = 0.0; i__2 = np1; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = costt, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1)); costt = max(d__2,d__3); x[i__ - 1] = xh[i__ - 1]; y[i__ - 1] = xh[i__ - 1]; /* L120: */ } costt = cf * costt; cost(&np1, x, w, ifac); cost(&np1, x, w, ifac); costfb = 0.0; i__2 = np1; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = costfb, d__3 = (d__1 = cf * x[i__ - 1] - y[i__ - 1], abs( d__1)); costfb = max(d__2,d__3); /* L121: */ } /* TEST SUBROUTINES SINQI,SINQF AND SINQB */ cf = 0.25 / fn; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { y[i__ - 1] = xh[i__ - 1]; /* L122: */ } dt = pi / (fn + fn); i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { x[i__ - 1] = 0.0; arg = dt * (real) i__; i__3 = n; for (k = 1; k <= i__3; ++k) { x[i__ - 1] += y[k - 1] * sin((real) (k + k - 1) * arg); /* L123: */ } x[i__ - 1] *= 4.0; /* L124: */ } sinqi(&n, w, ifac); sinqb(&n, y, w, ifac); sinqbt = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = sinqbt, d__3 = (d__1 = y[i__ - 1] - x[i__ - 1], abs(d__1)); sinqbt = max(d__2,d__3); x[i__ - 1] = xh[i__ - 1]; /* L125: */ } sinqbt = cf * sinqbt; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { arg = (real) (i__ + i__ - 1) * dt; i__3 = i__ + 1; y[i__ - 1] = (real) pow_ii(&c_n1, &i__3) * 0.5 * x[n - 1]; i__3 = nm1; for (k = 1; k <= i__3; ++k) { y[i__ - 1] += x[k - 1] * sin((real) k * arg); /* L126: */ } y[i__ - 1] += y[i__ - 1]; /* L127: */ } sinqf(&n, x, w, ifac); sinqft = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = sinqft, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1)); sinqft = max(d__2,d__3); y[i__ - 1] = xh[i__ - 1]; x[i__ - 1] = xh[i__ - 1]; /* L128: */ } sinqf(&n, y, w, ifac); sinqb(&n, y, w, ifac); sinqfb = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = sinqfb, d__3 = (d__1 = cf * y[i__ - 1] - x[i__ - 1], abs( d__1)); sinqfb = max(d__2,d__3); /* L129: */ } /* TEST SUBROUTINES COSQI,COSQF AND COSQB */ i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { y[i__ - 1] = xh[i__ - 1]; /* L130: */ } i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { x[i__ - 1] = 0.0; arg = (real) (i__ - 1) * dt; i__3 = n; for (k = 1; k <= i__3; ++k) { x[i__ - 1] += y[k - 1] * cos((real) (k + k - 1) * arg); /* L131: */ } x[i__ - 1] *= 4.0; /* L132: */ } cosqi(&n, w, ifac); cosqb(&n, y, w, ifac); cosqbt = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = cosqbt, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1)); cosqbt = max(d__2,d__3); x[i__ - 1] = xh[i__ - 1]; /* L133: */ } cosqbt = cf * cosqbt; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { y[i__ - 1] = x[0] * 0.5; arg = (real) (i__ + i__ - 1) * dt; i__3 = n; for (k = 2; k <= i__3; ++k) { y[i__ - 1] += x[k - 1] * cos((real) (k - 1) * arg); /* L134: */ } y[i__ - 1] += y[i__ - 1]; /* L135: */ } cosqf(&n, x, w, ifac); cosqft = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = cosqft, d__3 = (d__1 = y[i__ - 1] - x[i__ - 1], abs(d__1)); cosqft = max(d__2,d__3); x[i__ - 1] = xh[i__ - 1]; y[i__ - 1] = xh[i__ - 1]; /* L136: */ } cosqft = cf * cosqft; cosqb(&n, x, w, ifac); cosqf(&n, x, w, ifac); cosqfb = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = cosqfb, d__3 = (d__1 = cf * x[i__ - 1] - y[i__ - 1], abs( d__1)); cosqfb = max(d__2,d__3); /* L137: */ } /* TEST PROGRAMS EZFFTI,EZFFTF,EZFFTB */ ezffti(&n, w, ifac); i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { x[i__ - 1] = xh[i__ - 1]; /* L138: */ } tpi = atan(1.0) * 8.0; dt = tpi / (real) n; ns2 = (n + 1) / 2; cf = 2.0 / (real) n; ns2m = ns2 - 1; if (ns2m <= 0) { goto L141; } i__2 = ns2m; for (k = 1; k <= i__2; ++k) { sum1 = 0.0; sum2 = 0.0; arg = (real) k * dt; i__3 = n; for (i__ = 1; i__ <= i__3; ++i__) { arg1 = (real) (i__ - 1) * arg; sum1 += x[i__ - 1] * cos(arg1); sum2 += x[i__ - 1] * sin(arg1); /* L139: */ } a[k - 1] = cf * sum1; b[k - 1] = cf * sum2; /* L140: */ } L141: nm1 = n - 1; sum1 = 0.0; sum2 = 0.0; i__2 = nm1; for (i__ = 1; i__ <= i__2; i__ += 2) { sum1 += x[i__ - 1]; sum2 += x[i__]; /* L142: */ } if (modn == 1) { sum1 += x[n - 1]; } azero = cf * 0.5 * (sum1 + sum2); if (modn == 0) { a[ns2 - 1] = cf * 0.5 * (sum1 - sum2); } ezfftf(&n, x, &azeroh, ah, bh, w, ifac); dezf1 = (d__1 = azeroh - azero, abs(d__1)); if (modn == 0) { /* Computing MAX */ d__2 = dezf1, d__3 = (d__1 = a[ns2 - 1] - ah[ns2 - 1], abs(d__1)); dezf1 = max(d__2,d__3); } if (ns2m <= 0) { goto L144; } i__2 = ns2m; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__3 = dezf1, d__4 = (d__1 = ah[i__ - 1] - a[i__ - 1], abs(d__1)), d__3 = max(d__3,d__4), d__4 = (d__2 = bh[i__ - 1] - b[ i__ - 1], abs(d__2)); dezf1 = max(d__3,d__4); /* L143: */ } L144: ns2 = n / 2; if (modn == 0) { b[ns2 - 1] = 0.0; } i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { sum = azero; arg1 = (real) (i__ - 1) * dt; i__3 = ns2; for (k = 1; k <= i__3; ++k) { arg2 = (real) k * arg1; sum = sum + a[k - 1] * cos(arg2) + b[k - 1] * sin(arg2); /* L145: */ } x[i__ - 1] = sum; /* L146: */ } ezfftb(&n, y, &azero, a, b, w, ifac); dezb1 = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = dezb1, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1)); dezb1 = max(d__2,d__3); x[i__ - 1] = xh[i__ - 1]; /* L147: */ } ezfftf(&n, x, &azero, a, b, w, ifac); ezfftb(&n, y, &azero, a, b, w, ifac); dezfb = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = dezfb, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1)); dezfb = max(d__2,d__3); /* L148: */ } /* TEST CFFTI,CFFTF,CFFTB */ i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ - 1; d__1 = cos(sqrt2 * (real) i__); d__2 = sin(sqrt2 * (real) (i__ * i__)); z__1.r = d__1, z__1.i = d__2; cx[i__3].r = z__1.r, cx[i__3].i = z__1.i; /* L149: */ } dt = (pi + pi) / fn; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { arg1 = -((real) (i__ - 1)) * dt; i__3 = i__ - 1; cy[i__3].r = 0.0, cy[i__3].i = 0.0; i__3 = n; for (k = 1; k <= i__3; ++k) { arg2 = (real) (k - 1) * arg1; i__4 = i__ - 1; i__5 = i__ - 1; d__1 = cos(arg2); d__2 = sin(arg2); z__3.r = d__1, z__3.i = d__2; i__6 = k - 1; z__2.r = z__3.r * cx[i__6].r - z__3.i * cx[i__6].i, z__2.i = z__3.r * cx[i__6].i + z__3.i * cx[i__6].r; z__1.r = cy[i__5].r + z__2.r, z__1.i = cy[i__5].i + z__2.i; cy[i__4].r = z__1.r, cy[i__4].i = z__1.i; /* L150: */ } /* L151: */ } cffti(&n, w, ifac); cfftf(&n, cx, w, ifac); dcfftf = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ i__3 = i__ - 1; i__4 = i__ - 1; z__1.r = cx[i__3].r - cy[i__4].r, z__1.i = cx[i__3].i - cy[i__4] .i; d__1 = dcfftf, d__2 = z_abs(&z__1); dcfftf = max(d__1,d__2); i__3 = i__ - 1; i__4 = i__ - 1; z__1.r = cx[i__4].r / fn, z__1.i = cx[i__4].i / fn; cx[i__3].r = z__1.r, cx[i__3].i = z__1.i; /* L152: */ } dcfftf /= fn; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { arg1 = (real) (i__ - 1) * dt; i__3 = i__ - 1; cy[i__3].r = 0.0, cy[i__3].i = 0.0; i__3 = n; for (k = 1; k <= i__3; ++k) { arg2 = (real) (k - 1) * arg1; i__4 = i__ - 1; i__5 = i__ - 1; d__1 = cos(arg2); d__2 = sin(arg2); z__3.r = d__1, z__3.i = d__2; i__6 = k - 1; z__2.r = z__3.r * cx[i__6].r - z__3.i * cx[i__6].i, z__2.i = z__3.r * cx[i__6].i + z__3.i * cx[i__6].r; z__1.r = cy[i__5].r + z__2.r, z__1.i = cy[i__5].i + z__2.i; cy[i__4].r = z__1.r, cy[i__4].i = z__1.i; /* L153: */ } /* L154: */ } cfftb(&n, cx, w, ifac); dcfftb = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ i__3 = i__ - 1; i__4 = i__ - 1; z__1.r = cx[i__3].r - cy[i__4].r, z__1.i = cx[i__3].i - cy[i__4] .i; d__1 = dcfftb, d__2 = z_abs(&z__1); dcfftb = max(d__1,d__2); i__3 = i__ - 1; i__4 = i__ - 1; cx[i__3].r = cy[i__4].r, cx[i__3].i = cy[i__4].i; /* L155: */ } cf = 1.0 / fn; cfftf(&n, cx, w, ifac); cfftb(&n, cx, w, ifac); dcfb = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ i__3 = i__ - 1; z__2.r = cf * cx[i__3].r, z__2.i = cf * cx[i__3].i; i__4 = i__ - 1; z__1.r = z__2.r - cy[i__4].r, z__1.i = z__2.i - cy[i__4].i; d__1 = dcfb, d__2 = z_abs(&z__1); dcfb = max(d__1,d__2); /* L156: */ } s_wsfe(&io___58); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&rftf, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&rftb, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&rftfb, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&sintt, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&sintfb, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&costt, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&costfb, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&sinqft, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&sinqbt, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&sinqfb, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&cosqft, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&cosqbt, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&cosqfb, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&dezf1, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&dezb1, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&dezfb, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&dcfftf, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&dcfftb, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&dcfb, (ftnlen)sizeof(doublereal)); e_wsfe(); /* L157: */ } return 0; } /* MAIN__ */
void bluestein (size_t n, double *data, double *tstorage, int isign) { size_t n2=*((size_t *)tstorage); size_t m; double *bk, *bkf, *akf, *work; bk = tstorage+2; bkf = tstorage+2+2*n; work= tstorage+2+2*(n+n2); akf = tstorage+2+2*n+6*n2+16; /* initialize a_k and FFT it */ if (isign>0) for (m=0; m<2*n; m+=2) { akf[m] = data[m]*bk[m] - data[m+1]*bk[m+1]; akf[m+1] = data[m]*bk[m+1] + data[m+1]*bk[m]; } else for (m=0; m<2*n; m+=2) { akf[m] = data[m]*bk[m] + data[m+1]*bk[m+1]; akf[m+1] =-data[m]*bk[m+1] + data[m+1]*bk[m]; } for (m=2*n; m<2*n2; ++m) akf[m]=0; cfftf (n2,akf,work); /* do the convolution */ if (isign>0) for (m=0; m<2*n2; m+=2) { double im = -akf[m]*bkf[m+1] + akf[m+1]*bkf[m]; akf[m ] = akf[m]*bkf[m] + akf[m+1]*bkf[m+1]; akf[m+1] = im; } else for (m=0; m<2*n2; m+=2) { double im = akf[m]*bkf[m+1] + akf[m+1]*bkf[m]; akf[m ] = akf[m]*bkf[m] - akf[m+1]*bkf[m+1]; akf[m+1] = im; } /* inverse FFT */ cfftb (n2,akf,work); /* multiply by b_k* */ if (isign>0) for (m=0; m<2*n; m+=2) { data[m] = bk[m] *akf[m] - bk[m+1]*akf[m+1]; data[m+1] = bk[m+1]*akf[m] + bk[m] *akf[m+1]; } else for (m=0; m<2*n; m+=2) { data[m] = bk[m] *akf[m] + bk[m+1]*akf[m+1]; data[m+1] =-bk[m+1]*akf[m] + bk[m] *akf[m+1]; } }
/* compare results with the regular fftpack */ void pffft_validate_N(int N, int cplx) { int Nfloat = N*(cplx?2:1); int Nbytes = Nfloat * sizeof(float); float *ref, *in, *out, *tmp, *tmp2; PFFFT_Setup *s = pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); int pass; if (!s) { printf("Skipping N=%d, not supported\n", N); return; } ref = pffft_aligned_malloc(Nbytes); in = pffft_aligned_malloc(Nbytes); out = pffft_aligned_malloc(Nbytes); tmp = pffft_aligned_malloc(Nbytes); tmp2 = pffft_aligned_malloc(Nbytes); for (pass=0; pass < 2; ++pass) { float ref_max = 0; int k; //printf("N=%d pass=%d cplx=%d\n", N, pass, cplx); // compute reference solution with FFTPACK if (pass == 0) { float *wrk = malloc(2*Nbytes+15*sizeof(float)); for (k=0; k < Nfloat; ++k) { ref[k] = in[k] = frand()*2-1; out[k] = 1e30; } if (!cplx) { rffti(N, wrk); rfftf(N, ref, wrk); // use our ordering for real ffts instead of the one of fftpack { float refN=ref[N-1]; for (k=N-2; k >= 1; --k) ref[k+1] = ref[k]; ref[1] = refN; } } else { cffti(N, wrk); cfftf(N, ref, wrk); } free(wrk); } for (k = 0; k < Nfloat; ++k) ref_max = MAX(ref_max, fabs(ref[k])); // pass 0 : non canonical ordering of transform coefficients if (pass == 0) { // test forward transform, with different input / output pffft_transform(s, in, tmp, 0, PFFFT_FORWARD); memcpy(tmp2, tmp, Nbytes); memcpy(tmp, in, Nbytes); pffft_transform(s, tmp, tmp, 0, PFFFT_FORWARD); for (k = 0; k < Nfloat; ++k) { assert(tmp2[k] == tmp[k]); } // test reordering pffft_zreorder(s, tmp, out, PFFFT_FORWARD); pffft_zreorder(s, out, tmp, PFFFT_BACKWARD); for (k = 0; k < Nfloat; ++k) { assert(tmp2[k] == tmp[k]); } pffft_zreorder(s, tmp, out, PFFFT_FORWARD); } else { // pass 1 : canonical ordering of transform coeffs. pffft_transform_ordered(s, in, tmp, 0, PFFFT_FORWARD); memcpy(tmp2, tmp, Nbytes); memcpy(tmp, in, Nbytes); pffft_transform_ordered(s, tmp, tmp, 0, PFFFT_FORWARD); for (k = 0; k < Nfloat; ++k) { assert(tmp2[k] == tmp[k]); } memcpy(out, tmp, Nbytes); } { for (k=0; k < Nfloat; ++k) { if (!(fabs(ref[k] - out[k]) < 1e-3*ref_max)) { printf("%s forward PFFFT mismatch found for N=%d\n", (cplx?"CPLX":"REAL"), N); exit(1); } } if (pass == 0) pffft_transform(s, tmp, out, 0, PFFFT_BACKWARD); else pffft_transform_ordered(s, tmp, out, 0, PFFFT_BACKWARD); memcpy(tmp2, out, Nbytes); memcpy(out, tmp, Nbytes); if (pass == 0) pffft_transform(s, out, out, 0, PFFFT_BACKWARD); else pffft_transform_ordered(s, out, out, 0, PFFFT_BACKWARD); for (k = 0; k < Nfloat; ++k) { assert(tmp2[k] == out[k]); out[k] *= 1.f/N; } for (k = 0; k < Nfloat; ++k) { if (fabs(in[k] - out[k]) > 1e-3 * ref_max) { printf("pass=%d, %s IFFFT does not match for N=%d\n", pass, (cplx?"CPLX":"REAL"), N); break; exit(1); } } } // quick test of the circular convolution in fft domain { float conv_err = 0, conv_max = 0; pffft_zreorder(s, ref, tmp, PFFFT_FORWARD); memset(out, 0, Nbytes); pffft_zconvolve_accumulate(s, ref, ref, out, 1.0); pffft_zreorder(s, out, tmp2, PFFFT_FORWARD); for (k=0; k < Nfloat; k += 2) { float ar = tmp[k], ai=tmp[k+1]; if (cplx || k > 0) { tmp[k] = ar*ar - ai*ai; tmp[k+1] = 2*ar*ai; } else { tmp[0] = ar*ar; tmp[1] = ai*ai; } } for (k=0; k < Nfloat; ++k) { float d = fabs(tmp[k] - tmp2[k]), e = fabs(tmp[k]); if (d > conv_err) conv_err = d; if (e > conv_max) conv_max = e; } if (conv_err > 1e-5*conv_max) { printf("zconvolve error ? %g %g\n", conv_err, conv_max); exit(1); } } } printf("%s PFFFT is OK for N=%d\n", (cplx?"CPLX":"REAL"), N); fflush(stdout); pffft_destroy_setup(s); pffft_aligned_free(ref); pffft_aligned_free(in); pffft_aligned_free(out); pffft_aligned_free(tmp); pffft_aligned_free(tmp2); }
void benchmark_ffts(int N, int cplx) { int Nfloat = (cplx ? N*2 : N); int Nbytes = Nfloat * sizeof(float); float *X = pffft_aligned_malloc(Nbytes), *Y = pffft_aligned_malloc(Nbytes), *Z = pffft_aligned_malloc(Nbytes); double t0, t1, flops; int k; int max_iter = 5120000/N*4; #ifdef __arm__ max_iter /= 4; #endif int iter; for (k = 0; k < Nfloat; ++k) { X[k] = 0; //sqrtf(k+1); } // FFTPack benchmark { float *wrk = malloc(2*Nbytes + 15*sizeof(float)); int max_iter_ = max_iter/pffft_simd_size(); if (max_iter_ == 0) max_iter_ = 1; if (cplx) cffti(N, wrk); else rffti(N, wrk); t0 = uclock_sec(); for (iter = 0; iter < max_iter_; ++iter) { if (cplx) { cfftf(N, X, wrk); cfftb(N, X, wrk); } else { rfftf(N, X, wrk); rfftb(N, X, wrk); } } t1 = uclock_sec(); free(wrk); flops = (max_iter_*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html show_output("FFTPack", N, cplx, flops, t0, t1, max_iter_); } #ifdef HAVE_VECLIB int log2N = (int)(log(N)/log(2) + 0.5f); if (N == (1<<log2N)) { FFTSetup setup; setup = vDSP_create_fftsetup(log2N, FFT_RADIX2); DSPSplitComplex zsamples; zsamples.realp = &X[0]; zsamples.imagp = &X[Nfloat/2]; t0 = uclock_sec(); for (iter = 0; iter < max_iter; ++iter) { if (cplx) { vDSP_fft_zip(setup, &zsamples, 1, log2N, kFFTDirection_Forward); vDSP_fft_zip(setup, &zsamples, 1, log2N, kFFTDirection_Inverse); } else { vDSP_fft_zrip(setup, &zsamples, 1, log2N, kFFTDirection_Forward); vDSP_fft_zrip(setup, &zsamples, 1, log2N, kFFTDirection_Inverse); } } t1 = uclock_sec(); vDSP_destroy_fftsetup(setup); flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html show_output("vDSP", N, cplx, flops, t0, t1, max_iter); } else { show_output("vDSP", N, cplx, -1, -1, -1, -1); } #endif #ifdef HAVE_FFTW { fftwf_plan planf, planb; fftw_complex *in = (fftw_complex*) fftwf_malloc(sizeof(fftw_complex) * N); fftw_complex *out = (fftw_complex*) fftwf_malloc(sizeof(fftw_complex) * N); memset(in, 0, sizeof(fftw_complex) * N); int flags = (N < 40000 ? FFTW_MEASURE : FFTW_ESTIMATE); // measure takes a lot of time on largest ffts //int flags = FFTW_ESTIMATE; if (cplx) { planf = fftwf_plan_dft_1d(N, (fftwf_complex*)in, (fftwf_complex*)out, FFTW_FORWARD, flags); planb = fftwf_plan_dft_1d(N, (fftwf_complex*)in, (fftwf_complex*)out, FFTW_BACKWARD, flags); } else { planf = fftwf_plan_dft_r2c_1d(N, (float*)in, (fftwf_complex*)out, flags); planb = fftwf_plan_dft_c2r_1d(N, (fftwf_complex*)in, (float*)out, flags); } t0 = uclock_sec(); for (iter = 0; iter < max_iter; ++iter) { fftwf_execute(planf); fftwf_execute(planb); } t1 = uclock_sec(); fftwf_destroy_plan(planf); fftwf_destroy_plan(planb); fftwf_free(in); fftwf_free(out); flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html show_output((flags == FFTW_MEASURE ? "FFTW (meas.)" : " FFTW (estim)"), N, cplx, flops, t0, t1, max_iter); } #endif // PFFFT benchmark { PFFFT_Setup *s = pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); if (s) { t0 = uclock_sec(); for (iter = 0; iter < max_iter; ++iter) { pffft_transform(s, X, Z, Y, PFFFT_FORWARD); pffft_transform(s, X, Z, Y, PFFFT_BACKWARD); } t1 = uclock_sec(); pffft_destroy_setup(s); flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html show_output("PFFFT", N, cplx, flops, t0, t1, max_iter); } } if (!array_output_format) { printf("--\n"); } pffft_aligned_free(X); pffft_aligned_free(Y); pffft_aligned_free(Z); }