Esempio n. 1
0
void complex_plan_forward (complex_plan plan, double *data)
  {
  if (plan->bluestein)
    bluestein (plan->length, data, plan->work, -1);
  else
    cfftf (plan->length, data, plan->work);
  }
Esempio n. 2
0
void faad_mdct(mdct_info *mdct, real_t *X_in, real_t *X_out)
{
    uint16_t k;

    complex_t x;
    ALIGN complex_t Z1[512];
    complex_t *sincos = mdct->sincos;

    uint16_t N  = mdct->N;
    uint16_t N2 = N >> 1;
    uint16_t N4 = N >> 2;
    uint16_t N8 = N >> 3;

#ifndef FIXED_POINT
	real_t scale = REAL_CONST(N);
#else
	real_t scale = REAL_CONST(4.0/N);
#endif

    /* pre-FFT complex multiplication */
    for (k = 0; k < N8; k++)
    {
        uint16_t n = k << 1;
        RE(x) = X_in[N - N4 - 1 - n] + X_in[N - N4 +     n];
        IM(x) = X_in[    N4 +     n] - X_in[    N4 - 1 - n];

        ComplexMult(&RE(Z1[k]), &IM(Z1[k]),
            RE(x), IM(x), RE(sincos[k]), IM(sincos[k]));

        RE(Z1[k]) = MUL_R(RE(Z1[k]), scale);
        IM(Z1[k]) = MUL_R(IM(Z1[k]), scale);

        RE(x) =  X_in[N2 - 1 - n] - X_in[        n];
        IM(x) =  X_in[N2 +     n] + X_in[N - 1 - n];

        ComplexMult(&RE(Z1[k + N8]), &IM(Z1[k + N8]),
            RE(x), IM(x), RE(sincos[k + N8]), IM(sincos[k + N8]));

        RE(Z1[k + N8]) = MUL_R(RE(Z1[k + N8]), scale);
        IM(Z1[k + N8]) = MUL_R(IM(Z1[k + N8]), scale);
    }

    /* complex FFT, any non-scaling FFT can be used here  */
    cfftf(mdct->cfft, Z1);

    /* post-FFT complex multiplication */
    for (k = 0; k < N4; k++)
    {
        uint16_t n = k << 1;
        ComplexMult(&RE(x), &IM(x),
            RE(Z1[k]), IM(Z1[k]), RE(sincos[k]), IM(sincos[k]));

        X_out[         n] = -RE(x);
        X_out[N2 - 1 - n] =  IM(x);
        X_out[N2 +     n] = -IM(x);
        X_out[N  - 1 - n] =  RE(x);
    }
}
Esempio n. 3
0
PyObject *
fftpack_cfftf(PyObject *NPY_UNUSED(self), PyObject *args)
{
    PyObject *op1, *op2;
    PyArrayObject *data;
    PyArray_Descr *descr;
    double *wsave, *dptr;
    npy_intp nsave;
    int npts, nrepeats, i;

    if(!PyArg_ParseTuple(args, "OO", &op1, &op2)) {
        return NULL;
    }
    data = (PyArrayObject *)PyArray_CopyFromObject(op1,
            NPY_CDOUBLE, 1, 0);
    if (data == NULL) {
        return NULL;
    }
    descr = PyArray_DescrFromType(NPY_DOUBLE);
    if (PyArray_AsCArray(&op2, (void *)&wsave, &nsave, 1, descr) == -1) {
        goto fail;
    }
    if (data == NULL) {
        goto fail;
    }

    npts = PyArray_DIM(data, PyArray_NDIM(data) - 1);
    if (nsave != npts*4 + 15) {
        PyErr_SetString(ErrorObject, "invalid work array for fft size");
        goto fail;
    }

    nrepeats = PyArray_SIZE(data)/npts;
    dptr = (double *)PyArray_DATA(data);
    NPY_SIGINT_ON;
    for (i = 0; i < nrepeats; i++) {
        cfftf(npts, dptr, wsave);
        dptr += npts*2;
    }
    NPY_SIGINT_OFF;
    PyArray_Free(op2, (char *)wsave);
    return (PyObject *)data;

fail:
    PyArray_Free(op2, (char *)wsave);
    Py_DECREF(data);
    return NULL;
}
Esempio n. 4
0
void bluestein_i (size_t n, double **tstorage, size_t *worksize)
  {
  static const double pi=3.14159265358979323846;
  size_t n2=good_size(n*2-1);
  size_t m, coeff;
  double angle, xn2;
  double *bk, *bkf, *work;
  double pibyn=pi/n;
  *worksize=2+2*n+8*n2+16;
  *tstorage = RALLOC(double,2+2*n+8*n2+16);
  ((size_t *)(*tstorage))[0]=n2;
  bk  = *tstorage+2;
  bkf = *tstorage+2+2*n;
  work= *tstorage+2+2*(n+n2);

/* initialize b_k */
  bk[0] = 1;
  bk[1] = 0;

  coeff=0;
  for (m=1; m<n; ++m)
    {
    coeff+=2*m-1;
    if (coeff>=2*n) coeff-=2*n;
    angle = pibyn*coeff;
    bk[2*m] = cos(angle);
    bk[2*m+1] = sin(angle);
    }

/* initialize the zero-padded, Fourier transformed b_k. Add normalisation. */
  xn2 = 1./n2;
  bkf[0] = bk[0]*xn2;
  bkf[1] = bk[1]*xn2;
  for (m=2; m<2*n; m+=2)
    {
    bkf[m]   = bkf[2*n2-m]   = bk[m]   *xn2;
    bkf[m+1] = bkf[2*n2-m+1] = bk[m+1] *xn2;
    }
  for (m=2*n;m<=(2*n2-2*n+1);++m)
    bkf[m]=0.;
  cffti (n2,work);
  cfftf (n2,bkf,work);
  }
Esempio n. 5
0
void faad_mdct(mdct_info *mdct, real_t *X_in, real_t *X_out)
{
    uint16_t k;

    complex_t x;
    ALIGN complex_t Z1[512];
    complex_t *sincos = mdct->sincos;

    uint16_t N  = mdct->N;
    uint16_t N2 = N >> 1;
    uint16_t N4 = N >> 2;
    uint16_t N8 = N >> 3;

#ifndef FIXED_POINT
	real_t scale = REAL_CONST(N);
#else
	real_t scale = REAL_CONST(4.0/N);
#endif

#ifdef ALLOW_SMALL_FRAMELENGTH
#ifdef FIXED_POINT
    /* detect non-power of 2 */
    if (N & (N-1))
    {
        /* adjust scale for non-power of 2 MDCT */
        /* *= sqrt(2048/1920) */
        scale = MUL_C(scale, COEF_CONST(1.0327955589886444));
    }
#endif
#endif

    /* pre-FFT complex multiplication */
    for (k = 0; k < N8; k++)
    {
        uint16_t n = k << 1;
        RE(x) = X_in[N - N4 - 1 - n] + X_in[N - N4 +     n];
        IM(x) = X_in[    N4 +     n] - X_in[    N4 - 1 - n];

        ComplexMult(&RE(Z1[k]), &IM(Z1[k]),
            RE(x), IM(x), RE(sincos[k]), IM(sincos[k]));

        RE(Z1[k]) = MUL_R(RE(Z1[k]), scale);
        IM(Z1[k]) = MUL_R(IM(Z1[k]), scale);

        RE(x) =  X_in[N2 - 1 - n] - X_in[        n];
        IM(x) =  X_in[N2 +     n] + X_in[N - 1 - n];

        ComplexMult(&RE(Z1[k + N8]), &IM(Z1[k + N8]),
            RE(x), IM(x), RE(sincos[k + N8]), IM(sincos[k + N8]));

        RE(Z1[k + N8]) = MUL_R(RE(Z1[k + N8]), scale);
        IM(Z1[k + N8]) = MUL_R(IM(Z1[k + N8]), scale);
    }

    /* complex FFT, any non-scaling FFT can be used here  */
    cfftf(mdct->cfft, Z1);

    /* post-FFT complex multiplication */
    for (k = 0; k < N4; k++)
    {
        uint16_t n = k << 1;
        ComplexMult(&RE(x), &IM(x),
            RE(Z1[k]), IM(Z1[k]), RE(sincos[k]), IM(sincos[k]));

        X_out[         n] = -RE(x);
        X_out[N2 - 1 - n] =  IM(x);
        X_out[N2 +     n] = -IM(x);
        X_out[N  - 1 - n] =  RE(x);
    }
}
Esempio n. 6
0
int cfftf(int *n, doublecomplex *c, double *wsave) {
  // Casting (doublecomplex*) to (double*) is probably OK.
  return cfftf(n, (double *)c, wsave);
}
Esempio n. 7
0
/* Main program */ int MAIN__(void)
{
    /* Initialized data */

    static integer nd[10] = { 120,54,49,32,4,3,2 };

    /* Format strings */
    static char fmt_1001[] = "(\0020N\002,i5,\002 RFFTF  \002,e10.3,\002 RFF"
	    "TB  \002,e10.3,\002 RFFTFB \002,e10.3,\002 SINT   \002,e10.3,"
	    "\002 SINTFB \002,e10.3,\002 COST   \002,e10.3/7x,\002 COSTFB "
	    "\002,e10.3,\002 SINQF  \002,e10.3,\002 SINQB  \002,e10.3,\002 SI"
	    "NQFB \002,e10.3,\002 COSQF  \002,e10.3,\002 COSQB  \002,e10.3/7x,"
	    "\002 COSQFB \002,e10.3,\002 DEZF   \002,e10.3,\002 DEZB   \002,e"
	    "10.3,\002 DEZFB  \002,e10.3,\002 CFFTF  \002,e10.3,\002 CFFTB "
	    " \002,e10.3/7x,\002 CFFTFB \002,e10.3)";

    /* System generated locals */
    integer i__1, i__2, i__3, i__4, i__5, i__6;
    doublereal d__1, d__2, d__3, d__4;
    doublecomplex z__1, z__2, z__3;

    /* Builtin functions */
    double sqrt(doublereal), sin(doublereal), cos(doublereal);
    integer pow_ii(integer *, integer *);
    double atan(doublereal), z_abs(doublecomplex *);
    integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void);

    /* Local variables */
    doublereal a[100], b[100];
    integer i__, j, k, n;
    doublereal w[2000], x[200], y[200], ah[100], bh[100], cf, fn, dt, pi;
    doublecomplex cx[200], cy[200];
    doublereal xh[200];
    integer nz, nm1, np1, ns2;
    doublereal arg, tfn, tpi;
    integer nns;
    doublereal sum, arg1, arg2;
    integer ns2m;
    doublereal sum1, sum2, dcfb;
    integer ifac[64], modn;
    doublereal rftb, rftf;
    extern /* Subroutine */ void cost(integer *, doublereal *, doublereal *, 
	    integer *), sint(integer *, doublereal *, doublereal *, integer *
	    );
    doublereal dezb1, dezf1, sqrt2;
    extern /* Subroutine */ void cfftb(integer *, doublecomplex *, doublereal 
	    *, integer *), cfftf(integer *, doublecomplex *, doublereal *, 
	    integer *);
    doublereal dezfb;
    extern /* Subroutine */ void cffti(integer *, doublereal *, integer *), 
	    rfftb(integer *, doublereal *, doublereal *, integer *);
    doublereal rftfb;
    extern /* Subroutine */ void rfftf(integer *, doublereal *, doublereal *, 
	    integer *), cosqb(integer *, doublereal *, doublereal *, integer 
	    *), rffti(integer *, doublereal *, integer *), cosqf(integer *, 
	    doublereal *, doublereal *, integer *), sinqb(integer *, 
	    doublereal *, doublereal *, integer *), cosqi(integer *, 
	    doublereal *, integer *), sinqf(integer *, doublereal *, 
	    doublereal *, integer *), costi(integer *, doublereal *, integer 
	    *);
    doublereal azero;
    extern /* Subroutine */ void sinqi(integer *, doublereal *, integer *), 
	    sinti(integer *, doublereal *, integer *);
    doublereal costt, sintt, dcfftb, dcfftf, cosqfb, costfb;
    extern /* Subroutine */ void ezfftb(integer *, doublereal *, doublereal *,
	     doublereal *, doublereal *, doublereal *, integer *);
    doublereal sinqfb;
    extern /* Subroutine */ void ezfftf(integer *, doublereal *, doublereal *,
	     doublereal *, doublereal *, doublereal *, integer *);
    doublereal sintfb;
    extern /* Subroutine */ void ezffti(integer *, doublereal *, integer *);
    doublereal azeroh, cosqbt, cosqft, sinqbt, sinqft;

    /* Fortran I/O blocks */
    static cilist io___58 = { 0, 6, 0, fmt_1001, 0 };



/*     * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

/*                       VERSION 4  APRIL 1985 */

/*                         A TEST DRIVER FOR */
/*          A PACKAGE OF FORTRAN SUBPROGRAMS FOR THE FAST FOURIER */
/*           TRANSFORM OF PERIODIC AND OTHER SYMMETRIC SEQUENCES */

/*                              BY */

/*                       PAUL N SWARZTRAUBER */

/*       NATIONAL CENTER FOR ATMOSPHERIC RESEARCH  BOULDER,COLORADO 80307 */

/*        WHICH IS SPONSORED BY THE NATIONAL SCIENCE FOUNDATION */

/*     * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */


/*             THIS PROGRAM TESTS THE PACKAGE OF FAST FOURIER */
/*     TRANSFORMS FOR BOTH COMPLEX AND REAL PERIODIC SEQUENCES AND */
/*     CERTIAN OTHER SYMMETRIC SEQUENCES THAT ARE LISTED BELOW. */

/*     1.   RFFTI     INITIALIZE  RFFTF AND RFFTB */
/*     2.   RFFTF     FORWARD TRANSFORM OF A REAL PERIODIC SEQUENCE */
/*     3.   RFFTB     BACKWARD TRANSFORM OF A REAL COEFFICIENT ARRAY */

/*     4.   EZFFTI    INITIALIZE EZFFTF AND EZFFTB */
/*     5.   EZFFTF    A SIMPLIFIED REAL PERIODIC FORWARD TRANSFORM */
/*     6.   EZFFTB    A SIMPLIFIED REAL PERIODIC BACKWARD TRANSFORM */

/*     7.   SINTI     INITIALIZE SINT */
/*     8.   SINT      SINE TRANSFORM OF A REAL ODD SEQUENCE */

/*     9.   COSTI     INITIALIZE COST */
/*     10.  COST      COSINE TRANSFORM OF A REAL EVEN SEQUENCE */

/*     11.  SINQI     INITIALIZE SINQF AND SINQB */
/*     12.  SINQF     FORWARD SINE TRANSFORM WITH ODD WAVE NUMBERS */
/*     13.  SINQB     UNNORMALIZED INVERSE OF SINQF */

/*     14.  COSQI     INITIALIZE COSQF AND COSQB */
/*     15.  COSQF     FORWARD COSINE TRANSFORM WITH ODD WAVE NUMBERS */
/*     16.  COSQB     UNNORMALIZED INVERSE OF COSQF */

/*     17.  CFFTI     INITIALIZE CFFTF AND CFFTB */
/*     18.  CFFTF     FORWARD TRANSFORM OF A COMPLEX PERIODIC SEQUENCE */
/*     19.  CFFTB     UNNORMALIZED INVERSE OF CFFTF */


    sqrt2 = sqrt(2.0);
    nns = 7;
    i__1 = nns;
    for (nz = 1; nz <= i__1; ++nz) {
	n = nd[nz - 1];
	modn = n % 2;
	fn = (real) n;
	tfn = fn + fn;
	np1 = n + 1;
	nm1 = n - 1;
	i__2 = np1;
	for (j = 1; j <= i__2; ++j) {
	    x[j - 1] = sin((real) j * sqrt2);
	    y[j - 1] = x[j - 1];
	    xh[j - 1] = x[j - 1];
/* L101: */
	}

/*     TEST SUBROUTINES RFFTI,RFFTF AND RFFTB */

	rffti(&n, w, ifac);
	pi = 3.141592653589793238462643383279502884197169399375108209749445923;
	dt = (pi + pi) / fn;
	ns2 = (n + 1) / 2;
	if (ns2 < 2) {
	    goto L104;
	}
	i__2 = ns2;
	for (k = 2; k <= i__2; ++k) {
	    sum1 = 0.0;
	    sum2 = 0.0;
	    arg = (real) (k - 1) * dt;
	    i__3 = n;
	    for (i__ = 1; i__ <= i__3; ++i__) {
		arg1 = (real) (i__ - 1) * arg;
		sum1 += x[i__ - 1] * cos(arg1);
		sum2 += x[i__ - 1] * sin(arg1);
/* L102: */
	    }
	    y[(k << 1) - 3] = sum1;
	    y[(k << 1) - 2] = -sum2;
/* L103: */
	}
L104:
	sum1 = 0.0;
	sum2 = 0.0;
	i__2 = nm1;
	for (i__ = 1; i__ <= i__2; i__ += 2) {
	    sum1 += x[i__ - 1];
	    sum2 += x[i__];
/* L105: */
	}
	if (modn == 1) {
	    sum1 += x[n - 1];
	}
	y[0] = sum1 + sum2;
	if (modn == 0) {
	    y[n - 1] = sum1 - sum2;
	}
	rfftf(&n, x, w, ifac);
	rftf = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = rftf, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1));
	    rftf = max(d__2,d__3);
	    x[i__ - 1] = xh[i__ - 1];
/* L106: */
	}
	rftf /= fn;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    sum = x[0] * 0.5;
	    arg = (real) (i__ - 1) * dt;
	    if (ns2 < 2) {
		goto L108;
	    }
	    i__3 = ns2;
	    for (k = 2; k <= i__3; ++k) {
		arg1 = (real) (k - 1) * arg;
		sum = sum + x[(k << 1) - 3] * cos(arg1) - x[(k << 1) - 2] * 
			sin(arg1);
/* L107: */
	    }
L108:
	    if (modn == 0) {
		i__3 = i__ - 1;
		sum += (real) pow_ii(&c_n1, &i__3) * 0.5 * x[n - 1];
	    }
	    y[i__ - 1] = sum + sum;
/* L109: */
	}
	rfftb(&n, x, w, ifac);
	rftb = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = rftb, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1));
	    rftb = max(d__2,d__3);
	    x[i__ - 1] = xh[i__ - 1];
	    y[i__ - 1] = xh[i__ - 1];
/* L110: */
	}
	rfftb(&n, y, w, ifac);
	rfftf(&n, y, w, ifac);
	cf = 1.0 / fn;
	rftfb = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = rftfb, d__3 = (d__1 = cf * y[i__ - 1] - x[i__ - 1], abs(
		    d__1));
	    rftfb = max(d__2,d__3);
/* L111: */
	}

/*     TEST SUBROUTINES SINTI AND SINT */

	dt = pi / fn;
	i__2 = nm1;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    x[i__ - 1] = xh[i__ - 1];
/* L112: */
	}
	i__2 = nm1;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    y[i__ - 1] = 0.0;
	    arg1 = (real) i__ * dt;
	    i__3 = nm1;
	    for (k = 1; k <= i__3; ++k) {
		y[i__ - 1] += x[k - 1] * sin((real) k * arg1);
/* L113: */
	    }
	    y[i__ - 1] += y[i__ - 1];
/* L114: */
	}
	sinti(&nm1, w, ifac);
	sint(&nm1, x, w, ifac);
	cf = 0.5 / fn;
	sintt = 0.0;
	i__2 = nm1;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = sintt, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1));
	    sintt = max(d__2,d__3);
	    x[i__ - 1] = xh[i__ - 1];
	    y[i__ - 1] = x[i__ - 1];
/* L115: */
	}
	sintt = cf * sintt;
	sint(&nm1, x, w, ifac);
	sint(&nm1, x, w, ifac);
	sintfb = 0.0;
	i__2 = nm1;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = sintfb, d__3 = (d__1 = cf * x[i__ - 1] - y[i__ - 1], abs(
		    d__1));
	    sintfb = max(d__2,d__3);
/* L116: */
	}

/*     TEST SUBROUTINES COSTI AND COST */

	i__2 = np1;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    x[i__ - 1] = xh[i__ - 1];
/* L117: */
	}
	i__2 = np1;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    i__3 = i__ + 1;
	    y[i__ - 1] = (x[0] + (real) pow_ii(&c_n1, &i__3) * x[n]) * 0.5;
	    arg = (real) (i__ - 1) * dt;
	    i__3 = n;
	    for (k = 2; k <= i__3; ++k) {
		y[i__ - 1] += x[k - 1] * cos((real) (k - 1) * arg);
/* L118: */
	    }
	    y[i__ - 1] += y[i__ - 1];
/* L119: */
	}
	costi(&np1, w, ifac);
	cost(&np1, x, w, ifac);
	costt = 0.0;
	i__2 = np1;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = costt, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1));
	    costt = max(d__2,d__3);
	    x[i__ - 1] = xh[i__ - 1];
	    y[i__ - 1] = xh[i__ - 1];
/* L120: */
	}
	costt = cf * costt;
	cost(&np1, x, w, ifac);
	cost(&np1, x, w, ifac);
	costfb = 0.0;
	i__2 = np1;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = costfb, d__3 = (d__1 = cf * x[i__ - 1] - y[i__ - 1], abs(
		    d__1));
	    costfb = max(d__2,d__3);
/* L121: */
	}

/*     TEST SUBROUTINES SINQI,SINQF AND SINQB */

	cf = 0.25 / fn;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    y[i__ - 1] = xh[i__ - 1];
/* L122: */
	}
	dt = pi / (fn + fn);
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    x[i__ - 1] = 0.0;
	    arg = dt * (real) i__;
	    i__3 = n;
	    for (k = 1; k <= i__3; ++k) {
		x[i__ - 1] += y[k - 1] * sin((real) (k + k - 1) * arg);
/* L123: */
	    }
	    x[i__ - 1] *= 4.0;
/* L124: */
	}
	sinqi(&n, w, ifac);
	sinqb(&n, y, w, ifac);
	sinqbt = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = sinqbt, d__3 = (d__1 = y[i__ - 1] - x[i__ - 1], abs(d__1));
	    sinqbt = max(d__2,d__3);
	    x[i__ - 1] = xh[i__ - 1];
/* L125: */
	}
	sinqbt = cf * sinqbt;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    arg = (real) (i__ + i__ - 1) * dt;
	    i__3 = i__ + 1;
	    y[i__ - 1] = (real) pow_ii(&c_n1, &i__3) * 0.5 * x[n - 1];
	    i__3 = nm1;
	    for (k = 1; k <= i__3; ++k) {
		y[i__ - 1] += x[k - 1] * sin((real) k * arg);
/* L126: */
	    }
	    y[i__ - 1] += y[i__ - 1];
/* L127: */
	}
	sinqf(&n, x, w, ifac);
	sinqft = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = sinqft, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1));
	    sinqft = max(d__2,d__3);
	    y[i__ - 1] = xh[i__ - 1];
	    x[i__ - 1] = xh[i__ - 1];
/* L128: */
	}
	sinqf(&n, y, w, ifac);
	sinqb(&n, y, w, ifac);
	sinqfb = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = sinqfb, d__3 = (d__1 = cf * y[i__ - 1] - x[i__ - 1], abs(
		    d__1));
	    sinqfb = max(d__2,d__3);
/* L129: */
	}

/*     TEST SUBROUTINES COSQI,COSQF AND COSQB */

	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    y[i__ - 1] = xh[i__ - 1];
/* L130: */
	}
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    x[i__ - 1] = 0.0;
	    arg = (real) (i__ - 1) * dt;
	    i__3 = n;
	    for (k = 1; k <= i__3; ++k) {
		x[i__ - 1] += y[k - 1] * cos((real) (k + k - 1) * arg);
/* L131: */
	    }
	    x[i__ - 1] *= 4.0;
/* L132: */
	}
	cosqi(&n, w, ifac);
	cosqb(&n, y, w, ifac);
	cosqbt = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = cosqbt, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1));
	    cosqbt = max(d__2,d__3);
	    x[i__ - 1] = xh[i__ - 1];
/* L133: */
	}
	cosqbt = cf * cosqbt;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    y[i__ - 1] = x[0] * 0.5;
	    arg = (real) (i__ + i__ - 1) * dt;
	    i__3 = n;
	    for (k = 2; k <= i__3; ++k) {
		y[i__ - 1] += x[k - 1] * cos((real) (k - 1) * arg);
/* L134: */
	    }
	    y[i__ - 1] += y[i__ - 1];
/* L135: */
	}
	cosqf(&n, x, w, ifac);
	cosqft = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = cosqft, d__3 = (d__1 = y[i__ - 1] - x[i__ - 1], abs(d__1));
	    cosqft = max(d__2,d__3);
	    x[i__ - 1] = xh[i__ - 1];
	    y[i__ - 1] = xh[i__ - 1];
/* L136: */
	}
	cosqft = cf * cosqft;
	cosqb(&n, x, w, ifac);
	cosqf(&n, x, w, ifac);
	cosqfb = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = cosqfb, d__3 = (d__1 = cf * x[i__ - 1] - y[i__ - 1], abs(
		    d__1));
	    cosqfb = max(d__2,d__3);
/* L137: */
	}

/*     TEST PROGRAMS EZFFTI,EZFFTF,EZFFTB */

	ezffti(&n, w, ifac);
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    x[i__ - 1] = xh[i__ - 1];
/* L138: */
	}
	tpi = atan(1.0) * 8.0;
	dt = tpi / (real) n;
	ns2 = (n + 1) / 2;
	cf = 2.0 / (real) n;
	ns2m = ns2 - 1;
	if (ns2m <= 0) {
	    goto L141;
	}
	i__2 = ns2m;
	for (k = 1; k <= i__2; ++k) {
	    sum1 = 0.0;
	    sum2 = 0.0;
	    arg = (real) k * dt;
	    i__3 = n;
	    for (i__ = 1; i__ <= i__3; ++i__) {
		arg1 = (real) (i__ - 1) * arg;
		sum1 += x[i__ - 1] * cos(arg1);
		sum2 += x[i__ - 1] * sin(arg1);
/* L139: */
	    }
	    a[k - 1] = cf * sum1;
	    b[k - 1] = cf * sum2;
/* L140: */
	}
L141:
	nm1 = n - 1;
	sum1 = 0.0;
	sum2 = 0.0;
	i__2 = nm1;
	for (i__ = 1; i__ <= i__2; i__ += 2) {
	    sum1 += x[i__ - 1];
	    sum2 += x[i__];
/* L142: */
	}
	if (modn == 1) {
	    sum1 += x[n - 1];
	}
	azero = cf * 0.5 * (sum1 + sum2);
	if (modn == 0) {
	    a[ns2 - 1] = cf * 0.5 * (sum1 - sum2);
	}
	ezfftf(&n, x, &azeroh, ah, bh, w, ifac);
	dezf1 = (d__1 = azeroh - azero, abs(d__1));
	if (modn == 0) {
/* Computing MAX */
	    d__2 = dezf1, d__3 = (d__1 = a[ns2 - 1] - ah[ns2 - 1], abs(d__1));
	    dezf1 = max(d__2,d__3);
	}
	if (ns2m <= 0) {
	    goto L144;
	}
	i__2 = ns2m;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__3 = dezf1, d__4 = (d__1 = ah[i__ - 1] - a[i__ - 1], abs(d__1)),
		     d__3 = max(d__3,d__4), d__4 = (d__2 = bh[i__ - 1] - b[
		    i__ - 1], abs(d__2));
	    dezf1 = max(d__3,d__4);
/* L143: */
	}
L144:
	ns2 = n / 2;
	if (modn == 0) {
	    b[ns2 - 1] = 0.0;
	}
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    sum = azero;
	    arg1 = (real) (i__ - 1) * dt;
	    i__3 = ns2;
	    for (k = 1; k <= i__3; ++k) {
		arg2 = (real) k * arg1;
		sum = sum + a[k - 1] * cos(arg2) + b[k - 1] * sin(arg2);
/* L145: */
	    }
	    x[i__ - 1] = sum;
/* L146: */
	}
	ezfftb(&n, y, &azero, a, b, w, ifac);
	dezb1 = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = dezb1, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1));
	    dezb1 = max(d__2,d__3);
	    x[i__ - 1] = xh[i__ - 1];
/* L147: */
	}
	ezfftf(&n, x, &azero, a, b, w, ifac);
	ezfftb(&n, y, &azero, a, b, w, ifac);
	dezfb = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = dezfb, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1));
	    dezfb = max(d__2,d__3);
/* L148: */
	}

/*     TEST  CFFTI,CFFTF,CFFTB */

	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    i__3 = i__ - 1;
	    d__1 = cos(sqrt2 * (real) i__);
	    d__2 = sin(sqrt2 * (real) (i__ * i__));
	    z__1.r = d__1, z__1.i = d__2;
	    cx[i__3].r = z__1.r, cx[i__3].i = z__1.i;
/* L149: */
	}
	dt = (pi + pi) / fn;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    arg1 = -((real) (i__ - 1)) * dt;
	    i__3 = i__ - 1;
	    cy[i__3].r = 0.0, cy[i__3].i = 0.0;
	    i__3 = n;
	    for (k = 1; k <= i__3; ++k) {
		arg2 = (real) (k - 1) * arg1;
		i__4 = i__ - 1;
		i__5 = i__ - 1;
		d__1 = cos(arg2);
		d__2 = sin(arg2);
		z__3.r = d__1, z__3.i = d__2;
		i__6 = k - 1;
		z__2.r = z__3.r * cx[i__6].r - z__3.i * cx[i__6].i, z__2.i = 
			z__3.r * cx[i__6].i + z__3.i * cx[i__6].r;
		z__1.r = cy[i__5].r + z__2.r, z__1.i = cy[i__5].i + z__2.i;
		cy[i__4].r = z__1.r, cy[i__4].i = z__1.i;
/* L150: */
	    }
/* L151: */
	}
	cffti(&n, w, ifac);
	cfftf(&n, cx, w, ifac);
	dcfftf = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    i__3 = i__ - 1;
	    i__4 = i__ - 1;
	    z__1.r = cx[i__3].r - cy[i__4].r, z__1.i = cx[i__3].i - cy[i__4]
		    .i;
	    d__1 = dcfftf, d__2 = z_abs(&z__1);
	    dcfftf = max(d__1,d__2);
	    i__3 = i__ - 1;
	    i__4 = i__ - 1;
	    z__1.r = cx[i__4].r / fn, z__1.i = cx[i__4].i / fn;
	    cx[i__3].r = z__1.r, cx[i__3].i = z__1.i;
/* L152: */
	}
	dcfftf /= fn;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    arg1 = (real) (i__ - 1) * dt;
	    i__3 = i__ - 1;
	    cy[i__3].r = 0.0, cy[i__3].i = 0.0;
	    i__3 = n;
	    for (k = 1; k <= i__3; ++k) {
		arg2 = (real) (k - 1) * arg1;
		i__4 = i__ - 1;
		i__5 = i__ - 1;
		d__1 = cos(arg2);
		d__2 = sin(arg2);
		z__3.r = d__1, z__3.i = d__2;
		i__6 = k - 1;
		z__2.r = z__3.r * cx[i__6].r - z__3.i * cx[i__6].i, z__2.i = 
			z__3.r * cx[i__6].i + z__3.i * cx[i__6].r;
		z__1.r = cy[i__5].r + z__2.r, z__1.i = cy[i__5].i + z__2.i;
		cy[i__4].r = z__1.r, cy[i__4].i = z__1.i;
/* L153: */
	    }
/* L154: */
	}
	cfftb(&n, cx, w, ifac);
	dcfftb = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    i__3 = i__ - 1;
	    i__4 = i__ - 1;
	    z__1.r = cx[i__3].r - cy[i__4].r, z__1.i = cx[i__3].i - cy[i__4]
		    .i;
	    d__1 = dcfftb, d__2 = z_abs(&z__1);
	    dcfftb = max(d__1,d__2);
	    i__3 = i__ - 1;
	    i__4 = i__ - 1;
	    cx[i__3].r = cy[i__4].r, cx[i__3].i = cy[i__4].i;
/* L155: */
	}
	cf = 1.0 / fn;
	cfftf(&n, cx, w, ifac);
	cfftb(&n, cx, w, ifac);
	dcfb = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    i__3 = i__ - 1;
	    z__2.r = cf * cx[i__3].r, z__2.i = cf * cx[i__3].i;
	    i__4 = i__ - 1;
	    z__1.r = z__2.r - cy[i__4].r, z__1.i = z__2.i - cy[i__4].i;
	    d__1 = dcfb, d__2 = z_abs(&z__1);
	    dcfb = max(d__1,d__2);
/* L156: */
	}
	s_wsfe(&io___58);
	do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer));
	do_fio(&c__1, (char *)&rftf, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&rftb, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&rftfb, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&sintt, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&sintfb, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&costt, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&costfb, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&sinqft, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&sinqbt, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&sinqfb, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&cosqft, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&cosqbt, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&cosqfb, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&dezf1, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&dezb1, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&dezfb, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&dcfftf, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&dcfftb, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&dcfb, (ftnlen)sizeof(doublereal));
	e_wsfe();
/* L157: */
    }




    return 0;
} /* MAIN__ */
Esempio n. 8
0
void bluestein (size_t n, double *data, double *tstorage, int isign)
  {
  size_t n2=*((size_t *)tstorage);
  size_t m;
  double *bk, *bkf, *akf, *work;
  bk  = tstorage+2;
  bkf = tstorage+2+2*n;
  work= tstorage+2+2*(n+n2);
  akf = tstorage+2+2*n+6*n2+16;

/* initialize a_k and FFT it */
  if (isign>0)
    for (m=0; m<2*n; m+=2)
      {
      akf[m]   = data[m]*bk[m]   - data[m+1]*bk[m+1];
      akf[m+1] = data[m]*bk[m+1] + data[m+1]*bk[m];
      }
  else
    for (m=0; m<2*n; m+=2)
      {
      akf[m]   = data[m]*bk[m]   + data[m+1]*bk[m+1];
      akf[m+1] =-data[m]*bk[m+1] + data[m+1]*bk[m];
      }
  for (m=2*n; m<2*n2; ++m)
    akf[m]=0;

  cfftf (n2,akf,work);

/* do the convolution */
  if (isign>0)
    for (m=0; m<2*n2; m+=2)
      {
      double im = -akf[m]*bkf[m+1] + akf[m+1]*bkf[m];
      akf[m  ]  =  akf[m]*bkf[m]   + akf[m+1]*bkf[m+1];
      akf[m+1]  = im;
      }
  else
    for (m=0; m<2*n2; m+=2)
      {
      double im = akf[m]*bkf[m+1] + akf[m+1]*bkf[m];
      akf[m  ]  = akf[m]*bkf[m]   - akf[m+1]*bkf[m+1];
      akf[m+1]  = im;
      }


/* inverse FFT */
  cfftb (n2,akf,work);

/* multiply by b_k* */
  if (isign>0)
    for (m=0; m<2*n; m+=2)
      {
      data[m]   = bk[m]  *akf[m] - bk[m+1]*akf[m+1];
      data[m+1] = bk[m+1]*akf[m] + bk[m]  *akf[m+1];
      }
  else
    for (m=0; m<2*n; m+=2)
      {
      data[m]   = bk[m]  *akf[m] + bk[m+1]*akf[m+1];
      data[m+1] =-bk[m+1]*akf[m] + bk[m]  *akf[m+1];
      }
  }
Esempio n. 9
0
/* compare results with the regular fftpack */
void pffft_validate_N(int N, int cplx) {
  int Nfloat = N*(cplx?2:1);
  int Nbytes = Nfloat * sizeof(float);
  float *ref, *in, *out, *tmp, *tmp2;
  PFFFT_Setup *s = pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL);
  int pass;

  if (!s) { printf("Skipping N=%d, not supported\n", N); return; }
  ref = pffft_aligned_malloc(Nbytes);
  in = pffft_aligned_malloc(Nbytes);
  out = pffft_aligned_malloc(Nbytes);
  tmp = pffft_aligned_malloc(Nbytes);
  tmp2 = pffft_aligned_malloc(Nbytes);

  for (pass=0; pass < 2; ++pass) {
    float ref_max = 0;
    int k;
    //printf("N=%d pass=%d cplx=%d\n", N, pass, cplx);
    // compute reference solution with FFTPACK
    if (pass == 0) {
      float *wrk = malloc(2*Nbytes+15*sizeof(float));
      for (k=0; k < Nfloat; ++k) {
        ref[k] = in[k] = frand()*2-1; 
        out[k] = 1e30;
      }
      if (!cplx) {
        rffti(N, wrk);
        rfftf(N, ref, wrk);
        // use our ordering for real ffts instead of the one of fftpack
        {
          float refN=ref[N-1];
          for (k=N-2; k >= 1; --k) ref[k+1] = ref[k]; 
          ref[1] = refN;
        }
      } else {
        cffti(N, wrk);
        cfftf(N, ref, wrk);
      }
      free(wrk);
    }

    for (k = 0; k < Nfloat; ++k) ref_max = MAX(ref_max, fabs(ref[k]));

      
    // pass 0 : non canonical ordering of transform coefficients  
    if (pass == 0) {
      // test forward transform, with different input / output
      pffft_transform(s, in, tmp, 0, PFFFT_FORWARD);
      memcpy(tmp2, tmp, Nbytes);
      memcpy(tmp, in, Nbytes);
      pffft_transform(s, tmp, tmp, 0, PFFFT_FORWARD);
      for (k = 0; k < Nfloat; ++k) {
        assert(tmp2[k] == tmp[k]);
      }

      // test reordering
      pffft_zreorder(s, tmp, out, PFFFT_FORWARD);
      pffft_zreorder(s, out, tmp, PFFFT_BACKWARD);
      for (k = 0; k < Nfloat; ++k) {
        assert(tmp2[k] == tmp[k]);
      }
      pffft_zreorder(s, tmp, out, PFFFT_FORWARD);
    } else {
      // pass 1 : canonical ordering of transform coeffs.
      pffft_transform_ordered(s, in, tmp, 0, PFFFT_FORWARD);
      memcpy(tmp2, tmp, Nbytes);
      memcpy(tmp, in, Nbytes);
      pffft_transform_ordered(s, tmp, tmp, 0, PFFFT_FORWARD);
      for (k = 0; k < Nfloat; ++k) {
        assert(tmp2[k] == tmp[k]);
      }
      memcpy(out, tmp, Nbytes);
    }

    {
      for (k=0; k < Nfloat; ++k) {
        if (!(fabs(ref[k] - out[k]) < 1e-3*ref_max)) {
          printf("%s forward PFFFT mismatch found for N=%d\n", (cplx?"CPLX":"REAL"), N);
          exit(1);
        }
      }
        
      if (pass == 0) pffft_transform(s, tmp, out, 0, PFFFT_BACKWARD);
      else   pffft_transform_ordered(s, tmp, out, 0, PFFFT_BACKWARD);
      memcpy(tmp2, out, Nbytes);
      memcpy(out, tmp, Nbytes);
      if (pass == 0) pffft_transform(s, out, out, 0, PFFFT_BACKWARD);
      else   pffft_transform_ordered(s, out, out, 0, PFFFT_BACKWARD);
      for (k = 0; k < Nfloat; ++k) {
        assert(tmp2[k] == out[k]);
        out[k] *= 1.f/N;
      }
      for (k = 0; k < Nfloat; ++k) {
        if (fabs(in[k] - out[k]) > 1e-3 * ref_max) {
          printf("pass=%d, %s IFFFT does not match for N=%d\n", pass, (cplx?"CPLX":"REAL"), N); break;
          exit(1);
        }
      }
    }

    // quick test of the circular convolution in fft domain
    {
      float conv_err = 0, conv_max = 0;

      pffft_zreorder(s, ref, tmp, PFFFT_FORWARD);
      memset(out, 0, Nbytes);
      pffft_zconvolve_accumulate(s, ref, ref, out, 1.0);
      pffft_zreorder(s, out, tmp2, PFFFT_FORWARD);
      
      for (k=0; k < Nfloat; k += 2) {
        float ar = tmp[k], ai=tmp[k+1];
        if (cplx || k > 0) {
          tmp[k] = ar*ar - ai*ai;
          tmp[k+1] = 2*ar*ai;
        } else {
          tmp[0] = ar*ar;
          tmp[1] = ai*ai;
        }
      }
      
      for (k=0; k < Nfloat; ++k) {
        float d = fabs(tmp[k] - tmp2[k]), e = fabs(tmp[k]);
        if (d > conv_err) conv_err = d;
        if (e > conv_max) conv_max = e;
      }
      if (conv_err > 1e-5*conv_max) {
        printf("zconvolve error ? %g %g\n", conv_err, conv_max); exit(1);
      }
    }

  }

  printf("%s PFFFT is OK for N=%d\n", (cplx?"CPLX":"REAL"), N); fflush(stdout);
  
  pffft_destroy_setup(s);
  pffft_aligned_free(ref);
  pffft_aligned_free(in);
  pffft_aligned_free(out);
  pffft_aligned_free(tmp);
  pffft_aligned_free(tmp2);
}
Esempio n. 10
0
void benchmark_ffts(int N, int cplx) {
  int Nfloat = (cplx ? N*2 : N);
  int Nbytes = Nfloat * sizeof(float);
  float *X = pffft_aligned_malloc(Nbytes), *Y = pffft_aligned_malloc(Nbytes), *Z = pffft_aligned_malloc(Nbytes);

  double t0, t1, flops;

  int k;
  int max_iter = 5120000/N*4;
#ifdef __arm__
  max_iter /= 4;
#endif
  int iter;

  for (k = 0; k < Nfloat; ++k) {
    X[k] = 0; //sqrtf(k+1);
  }

  // FFTPack benchmark
  {
    float *wrk = malloc(2*Nbytes + 15*sizeof(float));
    int max_iter_ = max_iter/pffft_simd_size(); if (max_iter_ == 0) max_iter_ = 1;
    if (cplx) cffti(N, wrk);
    else      rffti(N, wrk);
    t0 = uclock_sec();  
    
    for (iter = 0; iter < max_iter_; ++iter) {
      if (cplx) {
        cfftf(N, X, wrk);
        cfftb(N, X, wrk);
      } else {
        rfftf(N, X, wrk);
        rfftb(N, X, wrk);
      }
    }
    t1 = uclock_sec();
    free(wrk);
    
    flops = (max_iter_*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html
    show_output("FFTPack", N, cplx, flops, t0, t1, max_iter_);
  }

#ifdef HAVE_VECLIB
  int log2N = (int)(log(N)/log(2) + 0.5f);
  if (N == (1<<log2N)) {
    FFTSetup setup;

    setup = vDSP_create_fftsetup(log2N, FFT_RADIX2);
    DSPSplitComplex zsamples;
    zsamples.realp = &X[0];
    zsamples.imagp = &X[Nfloat/2];
    t0 = uclock_sec();  
    for (iter = 0; iter < max_iter; ++iter) {
      if (cplx) {
        vDSP_fft_zip(setup, &zsamples, 1, log2N, kFFTDirection_Forward);
        vDSP_fft_zip(setup, &zsamples, 1, log2N, kFFTDirection_Inverse);
      } else {
        vDSP_fft_zrip(setup, &zsamples, 1, log2N, kFFTDirection_Forward); 
        vDSP_fft_zrip(setup, &zsamples, 1, log2N, kFFTDirection_Inverse);
      }
    }
    t1 = uclock_sec();
    vDSP_destroy_fftsetup(setup);

    flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html
    show_output("vDSP", N, cplx, flops, t0, t1, max_iter);
  } else {
    show_output("vDSP", N, cplx, -1, -1, -1, -1);
  }
#endif
  
#ifdef HAVE_FFTW
  {
    fftwf_plan planf, planb;
    fftw_complex *in = (fftw_complex*) fftwf_malloc(sizeof(fftw_complex) * N);
    fftw_complex *out = (fftw_complex*) fftwf_malloc(sizeof(fftw_complex) * N);
    memset(in, 0, sizeof(fftw_complex) * N);
    int flags = (N < 40000 ? FFTW_MEASURE : FFTW_ESTIMATE);  // measure takes a lot of time on largest ffts
    //int flags = FFTW_ESTIMATE;
    if (cplx) {
      planf = fftwf_plan_dft_1d(N, (fftwf_complex*)in, (fftwf_complex*)out, FFTW_FORWARD, flags);
      planb = fftwf_plan_dft_1d(N, (fftwf_complex*)in, (fftwf_complex*)out, FFTW_BACKWARD, flags);
    } else {
      planf = fftwf_plan_dft_r2c_1d(N, (float*)in, (fftwf_complex*)out, flags);
      planb = fftwf_plan_dft_c2r_1d(N, (fftwf_complex*)in, (float*)out, flags);
    }

    t0 = uclock_sec();  
    for (iter = 0; iter < max_iter; ++iter) {
      fftwf_execute(planf);
      fftwf_execute(planb);
    }
    t1 = uclock_sec();

    fftwf_destroy_plan(planf);
    fftwf_destroy_plan(planb);
    fftwf_free(in); fftwf_free(out);

    flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html
    show_output((flags == FFTW_MEASURE ? "FFTW (meas.)" : " FFTW (estim)"), N, cplx, flops, t0, t1, max_iter);
  }
#endif  

  // PFFFT benchmark
  {
    PFFFT_Setup *s = pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL);
    if (s) {
      t0 = uclock_sec();  
      for (iter = 0; iter < max_iter; ++iter) {
        pffft_transform(s, X, Z, Y, PFFFT_FORWARD);
        pffft_transform(s, X, Z, Y, PFFFT_BACKWARD);
      }
      t1 = uclock_sec();
      pffft_destroy_setup(s);
    
      flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html
      show_output("PFFFT", N, cplx, flops, t0, t1, max_iter);
    }
  }

  if (!array_output_format) {
    printf("--\n");
  }

  pffft_aligned_free(X);
  pffft_aligned_free(Y);
  pffft_aligned_free(Z);
}