Example #1
0
mdct_info *faad_mdct_init(uint16_t N)
{
    mdct_info *mdct = (mdct_info*)faad_malloc(sizeof(mdct_info));

    assert(N % 8 == 0);

    mdct->N = N;

    /* NOTE: For "small framelengths" in FIXED_POINT the coefficients need to be
     * scaled by sqrt("(nearest power of 2) > N" / N) */

    /* RE(mdct->sincos[k]) = scale*(real_t)(cos(2.0*M_PI*(k+1./8.) / (real_t)N));
     * IM(mdct->sincos[k]) = scale*(real_t)(sin(2.0*M_PI*(k+1./8.) / (real_t)N)); */
    /* scale is 1 for fixed point, sqrt(N) for floating point */
    switch (N)
    {
    case 2048:
        mdct->sincos = (complex_t*)mdct_tab_2048;
        break;
    case 256:
        mdct->sincos = (complex_t*)mdct_tab_256;
        break;
#ifdef LD_DEC
    case 1024:
        mdct->sincos = (complex_t*)mdct_tab_1024;
        break;
#endif
#ifdef ALLOW_SMALL_FRAMELENGTH
    case 1920:
        mdct->sincos = (complex_t*)mdct_tab_1920;
        break;
    case 240:
        mdct->sincos = (complex_t*)mdct_tab_240;
        break;
#ifdef LD_DEC
    case 960:
        mdct->sincos = (complex_t*)mdct_tab_960;
        break;
#endif
#endif
#ifdef SSR_DEC
    case 512:
        mdct->sincos = (complex_t*)mdct_tab_512;
        break;
    case 64:
        mdct->sincos = (complex_t*)mdct_tab_64;
        break;
#endif
    }

    /* initialise fft */
    mdct->cfft = cffti(N/4);

#ifdef PROFILE
    mdct->cycles = 0;
    mdct->fft_cycles = 0;
#endif

    return mdct;
}
Example #2
0
mdct_info *faad_mdct_init(uint16_t N)
{
    uint16_t k;
#ifdef FIXED_POINT
    uint16_t N_idx;
    real_t cangle, sangle, c, s, cold;
#endif
	real_t scale;

    mdct_info *mdct = (mdct_info*)faad_malloc(sizeof(mdct_info));

    assert(N % 8 == 0);

    mdct->N = N;
    mdct->sincos = (complex_t*)faad_malloc(N/4*sizeof(complex_t));

#ifdef FIXED_POINT
    N_idx = map_N_to_idx(N);

    scale = const_tab[N_idx][0];
    cangle = const_tab[N_idx][1];
    sangle = const_tab[N_idx][2];
    c = const_tab[N_idx][3];
    s = const_tab[N_idx][4];
#else
    scale = (real_t)sqrt(2.0 / (real_t)N);
#endif

    /* (co)sine table build using recurrence relations */
    /* this can also be done using static table lookup or */
    /* some form of interpolation */
    for (k = 0; k < N/4; k++)
    {
#ifdef FIXED_POINT
        RE(mdct->sincos[k]) = c; //MUL_C_C(c,scale);
        IM(mdct->sincos[k]) = s; //MUL_C_C(s,scale);

        cold = c;
        c = MUL_F(c,cangle) - MUL_F(s,sangle);
        s = MUL_F(s,cangle) + MUL_F(cold,sangle);
#else
        /* no recurrence, just sines */
        RE(mdct->sincos[k]) = scale*(real_t)(cos(2.0*M_PI*(k+1./8.) / (real_t)N));
        IM(mdct->sincos[k]) = scale*(real_t)(sin(2.0*M_PI*(k+1./8.) / (real_t)N));
#endif
    }

    /* initialise fft */
    mdct->cfft = cffti(N/4);

#ifdef PROFILE
    mdct->cycles = 0;
    mdct->fft_cycles = 0;
#endif

    return mdct;
}
Example #3
0
complex_plan make_complex_plan (int length)
  {
  complex_plan plan = (complex_plan) malloc(sizeof(complex_plan_i));
  int pfsum = prime_factor_sum(length);
  double comp1 = length*pfsum;
  double comp2 = 2*3*length*log(3.*length);
  plan->length=length;
  plan->bluestein = (comp2<comp1);
  if (plan->bluestein)
    bluestein_i (length,&(plan->work));
  else
    {
    plan->work=(double *)malloc((4*length+15)*sizeof(double));
    cffti(length, plan->work);
    }
  return plan;
  }
Example #4
0
complex_plan make_complex_plan (size_t length)
  {
  complex_plan plan = RALLOC(complex_plan_i,1);
  size_t pfsum = prime_factor_sum(length);
  double comp1 = length*pfsum;
  double comp2 = 2*3*length*log(3.*length);
  comp2*=3.; /* fudge factor that appears to give good overall performance */
  plan->length=length;
  plan->bluestein = (comp2<comp1);
  if (plan->bluestein)
    bluestein_i (length,&(plan->work));
  else
    {
    plan->work=RALLOC(double,4*length+15);
    cffti(length, plan->work);
    }
  return plan;
  }
Example #5
0
void bluestein_i (size_t n, double **tstorage, size_t *worksize)
  {
  static const double pi=3.14159265358979323846;
  size_t n2=good_size(n*2-1);
  size_t m, coeff;
  double angle, xn2;
  double *bk, *bkf, *work;
  double pibyn=pi/n;
  *worksize=2+2*n+8*n2+16;
  *tstorage = RALLOC(double,2+2*n+8*n2+16);
  ((size_t *)(*tstorage))[0]=n2;
  bk  = *tstorage+2;
  bkf = *tstorage+2+2*n;
  work= *tstorage+2+2*(n+n2);

/* initialize b_k */
  bk[0] = 1;
  bk[1] = 0;

  coeff=0;
  for (m=1; m<n; ++m)
    {
    coeff+=2*m-1;
    if (coeff>=2*n) coeff-=2*n;
    angle = pibyn*coeff;
    bk[2*m] = cos(angle);
    bk[2*m+1] = sin(angle);
    }

/* initialize the zero-padded, Fourier transformed b_k. Add normalisation. */
  xn2 = 1./n2;
  bkf[0] = bk[0]*xn2;
  bkf[1] = bk[1]*xn2;
  for (m=2; m<2*n; m+=2)
    {
    bkf[m]   = bkf[2*n2-m]   = bk[m]   *xn2;
    bkf[m+1] = bkf[2*n2-m+1] = bk[m+1] *xn2;
    }
  for (m=2*n;m<=(2*n2-2*n+1);++m)
    bkf[m]=0.;
  cffti (n2,work);
  cfftf (n2,bkf,work);
  }
Example #6
0
static PyObject *
fftpack_cffti(PyObject *NPY_UNUSED(self), PyObject *args)
{
    PyArrayObject *op;
    npy_intp dim;
    long n;

    if (!PyArg_ParseTuple(args, "l", &n)) {
        return NULL;
    }
    /*Magic size needed by cffti*/
    dim = 4*n + 15;
    /*Create a 1 dimensional array of dimensions of type double*/
    op = (PyArrayObject *)PyArray_SimpleNew(1, &dim, NPY_DOUBLE);
    if (op == NULL) {
        return NULL;
    }

    NPY_SIGINT_ON;
    cffti(n, (double *)PyArray_DATA((PyArrayObject*)op));
    NPY_SIGINT_OFF;

    return (PyObject *)op;
}
Example #7
0
Datum 
fft_main(PG_FUNCTION_ARGS)
{
  int i,n;
  double sgn;
  double *w;
  double wtime;
  double *x,*y,*z;
  int32 arg = PG_GETARG_INT32(0);

  timestamp();

  ereport(INFO,(errmsg("  Number of processors available = %d\n", omp_get_num_procs())));
  ereport(INFO,(errmsg("  Number of threads =              %d\n", omp_get_max_threads())));

  //Prepare for tests.
  ereport(INFO,(errmsg("             N      Time\n")));

  n = 4;
  w = (double *) malloc(    n * sizeof(double));
  x = (double *) malloc(2 * n * sizeof(double));
  y = (double *) malloc(2 * n * sizeof(double));
  z = (double *) malloc(2 * n * sizeof(double));

  //初始化数据
  x[0]=1.0; x[1]=0.0;
  x[2]=2.0; x[3]=0.0;
  x[4]=4.0; x[5]=0.0;
  x[6]=3.0; x[7]=0.0;

  ereport(INFO,(errmsg("x=")));
  for(i=0; i<2*n; i++){
    ereport(INFO,(errmsg("%f,",x[i])));
  }

  //Initialize the sine and cosine tables.
  cffti(n, w);

  wtime = omp_get_wtime();

  //Transform forward
  sgn = + 1.0;

  //fft计算
  cfft2( n, x, y, w, sgn );
    
  //输出结果
  ereport(INFO,(errmsg("y=")));
  for(i=0; i<2*n; i++){
    ereport(INFO,(errmsg("%f,",y[i])));
  }

  //元素个数
  ereport(INFO,(errmsg("  %12d", n)));
  //运行时间
  wtime = omp_get_wtime() - wtime;
  ereport(INFO,(errmsg("  %12e\n", wtime)));

  free(w);
  free(x);
  free(y);

  //Terminate.
  ereport(INFO,(errmsg("  Normal end of execution.\n")));
  timestamp();

  PG_RETURN_INT32(arg);
}
Example #8
0
int main()
{
/* 
   SSE version of cfft2 - uses INTEL intrinsics
       W. Petersen, SAM. Math. ETHZ 2 May, 2002 
*/
   int first,i,icase,it,n;
   float seed,error,fnm1,sign,z0,z1,ggl();
   float t1,ln2,mflops;
   void cffti(),cfft2();

   first = 1;
   seed  = 331.0;
   for(icase=0;icase<2;icase++){
   if(first){
      for(i=0;i<2*N;i+=2){
         z0 = ggl(&seed);     /* real part of array */
         z1 = ggl(&seed);     /* imaginary part of array */
         x[i] = z0;
         z[i] = z0;           /* copy of initial real data */
         x[i+1] = z1;
         z[i+1] = z1;         /* copy of initial imag. data */
      }
   } else {
      for(i=0;i<2*N;i+=2){
         z0 = 0;              /* real part of array */
         z1 = 0;              /* imaginary part of array */
         x[i] = z0;
         z[i] = z0;           /* copy of initial real data */
         x[i+1] = z1;
         z[i+1] = z1;         /* copy of initial imag. data */
      }
   }
/* initialize sine/cosine tables */
   n = N;
   cffti(n,w);
/* transform forward, back */
   if(first){
      sign = 1.0;
      cfft2(n,x,y,w,sign);
      sign = -1.0;
      cfft2(n,y,x,w,sign);
/* results should be same as initial multiplied by N */
      fnm1 = 1.0/((float) n);
      error = 0.0;
      for(i=0;i<2*N;i+=2){
         error += (z[i] - fnm1*x[i])*(z[i] - fnm1*x[i]) +
                  (z[i+1] - fnm1*x[i+1])*(z[i+1] - fnm1*x[i+1]);
      }
      error = sqrt(fnm1*error);
      printf(" for n=%d, fwd/bck error=%e\n",N,error);
      first = 0;
   } else {
      unsigned j = 0;
      for(it=0;it<20000;it++){
         sign = +1.0;
         cfft2(n,x,y,w,sign);
         sign = -1.0;
         cfft2(n,y,x,w,sign);
      }
      printf(" for n=%d\n",n);
      for (i = 0; i<N; ++i) {
        printf("%g  ", w[i]);
        j++;
        if (j == 4) {
          printf("\n");
          j = 0;
        }
      }
   }
   }
   return 0;
}
Example #9
0
main()
{
    /*
       Example of Apple Altivec coded binary radix FFT
       using intrinsics from Petersen and Arbenz "Intro.
       to Parallel Computing," Section 3.6

       This is an expanded version of a generic work-space
       FFT: steps are in-line. cfft2(n,x,y,w,sign) takes complex
       n-array "x" (Fortran real,aimag,real,aimag,... order)
       and writes its DFT in "y". Both input "x" and the
       original contents of "y" are destroyed. Initialization
       for array "w" (size n/2 complex of twiddle factors
       (exp(twopi*i*k/n), for k=0..n/2-1)) is computed once
       by cffti(n,w).

                          WPP, SAM. Math. ETHZ, 1 June, 2002
    */

    int first,i,icase,it,ln2,n;
    int nits=1000000;
    static float seed = 331.0;
    float error,fnm1,sign,z0,z1,ggl();
    float *x,*y,*z,*w;
    double t1,mflops;
    /* allocate storage for x,y,z,w on 4-word bndr. */
    x = (float *) malloc(8*N);
    y = (float *) malloc(8*N);
    z = (float *) malloc(8*N);
    w = (float *) malloc(4*N);
    n     = 2;
    for(ln2=1; ln2<21; ln2++) {
        first = 1;
        for(icase=0; icase<2; icase++) {
            if(first) {
                for(i=0; i<2*n; i+=2) {
                    z0 = ggl(&seed);     /* real part of array */
                    z1 = ggl(&seed);     /* imaginary part of array */
                    x[i] = z0;
                    z[i] = z0;           /* copy of initial real data */
                    x[i+1] = z1;
                    z[i+1] = z1;         /* copy of initial imag. data */
                }
            } else {
                for(i=0; i<2*n; i+=2) {
                    z0 = 0;              /* real part of array */
                    z1 = 0;              /* imaginary part of array */
                    x[i] = z0;
                    z[i] = z0;           /* copy of initial real data */
                    x[i+1] = z1;
                    z[i+1] = z1;         /* copy of initial imag. data */
                }
            }
            /* initialize sine/cosine tables */
            cffti(n,w);
            /* transform forward, back */
            if(first) {
                sign = 1.0;
                cfft2(n,x,y,w,sign);
                sign = -1.0;
                cfft2(n,y,x,w,sign);
                /* results should be same as initial multiplied by n */
                fnm1 = 1.0/((float) n);
                error = 0.0;
                for(i=0; i<2*n; i+=2) {
                    error += (z[i] - fnm1*x[i])*(z[i] - fnm1*x[i]) +
                             (z[i+1] - fnm1*x[i+1])*(z[i+1] - fnm1*x[i+1]);
                }
                error = sqrt(fnm1*error);
                printf(" for n=%d, fwd/bck error=%e\n",n,error);
                first = 0;
            } else {
                for(it=0; it<nits; it++) {
                    sign = +1.0;
                    cfft2(n,x,y,w,sign);
                    sign = -1.0;
                    cfft2(n,y,x,w,sign);
                }
            }
        }
        if((ln2%4)==0) nits /= 10;
        n *= 2;
    }
    return 0;
}
Example #10
0
/* Main program */ int MAIN__(void)
{
    /* Initialized data */

    static integer nd[10] = { 120,54,49,32,4,3,2 };

    /* Format strings */
    static char fmt_1001[] = "(\0020N\002,i5,\002 RFFTF  \002,e10.3,\002 RFF"
	    "TB  \002,e10.3,\002 RFFTFB \002,e10.3,\002 SINT   \002,e10.3,"
	    "\002 SINTFB \002,e10.3,\002 COST   \002,e10.3/7x,\002 COSTFB "
	    "\002,e10.3,\002 SINQF  \002,e10.3,\002 SINQB  \002,e10.3,\002 SI"
	    "NQFB \002,e10.3,\002 COSQF  \002,e10.3,\002 COSQB  \002,e10.3/7x,"
	    "\002 COSQFB \002,e10.3,\002 DEZF   \002,e10.3,\002 DEZB   \002,e"
	    "10.3,\002 DEZFB  \002,e10.3,\002 CFFTF  \002,e10.3,\002 CFFTB "
	    " \002,e10.3/7x,\002 CFFTFB \002,e10.3)";

    /* System generated locals */
    integer i__1, i__2, i__3, i__4, i__5, i__6;
    doublereal d__1, d__2, d__3, d__4;
    doublecomplex z__1, z__2, z__3;

    /* Builtin functions */
    double sqrt(doublereal), sin(doublereal), cos(doublereal);
    integer pow_ii(integer *, integer *);
    double atan(doublereal), z_abs(doublecomplex *);
    integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void);

    /* Local variables */
    doublereal a[100], b[100];
    integer i__, j, k, n;
    doublereal w[2000], x[200], y[200], ah[100], bh[100], cf, fn, dt, pi;
    doublecomplex cx[200], cy[200];
    doublereal xh[200];
    integer nz, nm1, np1, ns2;
    doublereal arg, tfn, tpi;
    integer nns;
    doublereal sum, arg1, arg2;
    integer ns2m;
    doublereal sum1, sum2, dcfb;
    integer ifac[64], modn;
    doublereal rftb, rftf;
    extern /* Subroutine */ void cost(integer *, doublereal *, doublereal *, 
	    integer *), sint(integer *, doublereal *, doublereal *, integer *
	    );
    doublereal dezb1, dezf1, sqrt2;
    extern /* Subroutine */ void cfftb(integer *, doublecomplex *, doublereal 
	    *, integer *), cfftf(integer *, doublecomplex *, doublereal *, 
	    integer *);
    doublereal dezfb;
    extern /* Subroutine */ void cffti(integer *, doublereal *, integer *), 
	    rfftb(integer *, doublereal *, doublereal *, integer *);
    doublereal rftfb;
    extern /* Subroutine */ void rfftf(integer *, doublereal *, doublereal *, 
	    integer *), cosqb(integer *, doublereal *, doublereal *, integer 
	    *), rffti(integer *, doublereal *, integer *), cosqf(integer *, 
	    doublereal *, doublereal *, integer *), sinqb(integer *, 
	    doublereal *, doublereal *, integer *), cosqi(integer *, 
	    doublereal *, integer *), sinqf(integer *, doublereal *, 
	    doublereal *, integer *), costi(integer *, doublereal *, integer 
	    *);
    doublereal azero;
    extern /* Subroutine */ void sinqi(integer *, doublereal *, integer *), 
	    sinti(integer *, doublereal *, integer *);
    doublereal costt, sintt, dcfftb, dcfftf, cosqfb, costfb;
    extern /* Subroutine */ void ezfftb(integer *, doublereal *, doublereal *,
	     doublereal *, doublereal *, doublereal *, integer *);
    doublereal sinqfb;
    extern /* Subroutine */ void ezfftf(integer *, doublereal *, doublereal *,
	     doublereal *, doublereal *, doublereal *, integer *);
    doublereal sintfb;
    extern /* Subroutine */ void ezffti(integer *, doublereal *, integer *);
    doublereal azeroh, cosqbt, cosqft, sinqbt, sinqft;

    /* Fortran I/O blocks */
    static cilist io___58 = { 0, 6, 0, fmt_1001, 0 };



/*     * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

/*                       VERSION 4  APRIL 1985 */

/*                         A TEST DRIVER FOR */
/*          A PACKAGE OF FORTRAN SUBPROGRAMS FOR THE FAST FOURIER */
/*           TRANSFORM OF PERIODIC AND OTHER SYMMETRIC SEQUENCES */

/*                              BY */

/*                       PAUL N SWARZTRAUBER */

/*       NATIONAL CENTER FOR ATMOSPHERIC RESEARCH  BOULDER,COLORADO 80307 */

/*        WHICH IS SPONSORED BY THE NATIONAL SCIENCE FOUNDATION */

/*     * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */


/*             THIS PROGRAM TESTS THE PACKAGE OF FAST FOURIER */
/*     TRANSFORMS FOR BOTH COMPLEX AND REAL PERIODIC SEQUENCES AND */
/*     CERTIAN OTHER SYMMETRIC SEQUENCES THAT ARE LISTED BELOW. */

/*     1.   RFFTI     INITIALIZE  RFFTF AND RFFTB */
/*     2.   RFFTF     FORWARD TRANSFORM OF A REAL PERIODIC SEQUENCE */
/*     3.   RFFTB     BACKWARD TRANSFORM OF A REAL COEFFICIENT ARRAY */

/*     4.   EZFFTI    INITIALIZE EZFFTF AND EZFFTB */
/*     5.   EZFFTF    A SIMPLIFIED REAL PERIODIC FORWARD TRANSFORM */
/*     6.   EZFFTB    A SIMPLIFIED REAL PERIODIC BACKWARD TRANSFORM */

/*     7.   SINTI     INITIALIZE SINT */
/*     8.   SINT      SINE TRANSFORM OF A REAL ODD SEQUENCE */

/*     9.   COSTI     INITIALIZE COST */
/*     10.  COST      COSINE TRANSFORM OF A REAL EVEN SEQUENCE */

/*     11.  SINQI     INITIALIZE SINQF AND SINQB */
/*     12.  SINQF     FORWARD SINE TRANSFORM WITH ODD WAVE NUMBERS */
/*     13.  SINQB     UNNORMALIZED INVERSE OF SINQF */

/*     14.  COSQI     INITIALIZE COSQF AND COSQB */
/*     15.  COSQF     FORWARD COSINE TRANSFORM WITH ODD WAVE NUMBERS */
/*     16.  COSQB     UNNORMALIZED INVERSE OF COSQF */

/*     17.  CFFTI     INITIALIZE CFFTF AND CFFTB */
/*     18.  CFFTF     FORWARD TRANSFORM OF A COMPLEX PERIODIC SEQUENCE */
/*     19.  CFFTB     UNNORMALIZED INVERSE OF CFFTF */


    sqrt2 = sqrt(2.0);
    nns = 7;
    i__1 = nns;
    for (nz = 1; nz <= i__1; ++nz) {
	n = nd[nz - 1];
	modn = n % 2;
	fn = (real) n;
	tfn = fn + fn;
	np1 = n + 1;
	nm1 = n - 1;
	i__2 = np1;
	for (j = 1; j <= i__2; ++j) {
	    x[j - 1] = sin((real) j * sqrt2);
	    y[j - 1] = x[j - 1];
	    xh[j - 1] = x[j - 1];
/* L101: */
	}

/*     TEST SUBROUTINES RFFTI,RFFTF AND RFFTB */

	rffti(&n, w, ifac);
	pi = 3.141592653589793238462643383279502884197169399375108209749445923;
	dt = (pi + pi) / fn;
	ns2 = (n + 1) / 2;
	if (ns2 < 2) {
	    goto L104;
	}
	i__2 = ns2;
	for (k = 2; k <= i__2; ++k) {
	    sum1 = 0.0;
	    sum2 = 0.0;
	    arg = (real) (k - 1) * dt;
	    i__3 = n;
	    for (i__ = 1; i__ <= i__3; ++i__) {
		arg1 = (real) (i__ - 1) * arg;
		sum1 += x[i__ - 1] * cos(arg1);
		sum2 += x[i__ - 1] * sin(arg1);
/* L102: */
	    }
	    y[(k << 1) - 3] = sum1;
	    y[(k << 1) - 2] = -sum2;
/* L103: */
	}
L104:
	sum1 = 0.0;
	sum2 = 0.0;
	i__2 = nm1;
	for (i__ = 1; i__ <= i__2; i__ += 2) {
	    sum1 += x[i__ - 1];
	    sum2 += x[i__];
/* L105: */
	}
	if (modn == 1) {
	    sum1 += x[n - 1];
	}
	y[0] = sum1 + sum2;
	if (modn == 0) {
	    y[n - 1] = sum1 - sum2;
	}
	rfftf(&n, x, w, ifac);
	rftf = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = rftf, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1));
	    rftf = max(d__2,d__3);
	    x[i__ - 1] = xh[i__ - 1];
/* L106: */
	}
	rftf /= fn;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    sum = x[0] * 0.5;
	    arg = (real) (i__ - 1) * dt;
	    if (ns2 < 2) {
		goto L108;
	    }
	    i__3 = ns2;
	    for (k = 2; k <= i__3; ++k) {
		arg1 = (real) (k - 1) * arg;
		sum = sum + x[(k << 1) - 3] * cos(arg1) - x[(k << 1) - 2] * 
			sin(arg1);
/* L107: */
	    }
L108:
	    if (modn == 0) {
		i__3 = i__ - 1;
		sum += (real) pow_ii(&c_n1, &i__3) * 0.5 * x[n - 1];
	    }
	    y[i__ - 1] = sum + sum;
/* L109: */
	}
	rfftb(&n, x, w, ifac);
	rftb = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = rftb, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1));
	    rftb = max(d__2,d__3);
	    x[i__ - 1] = xh[i__ - 1];
	    y[i__ - 1] = xh[i__ - 1];
/* L110: */
	}
	rfftb(&n, y, w, ifac);
	rfftf(&n, y, w, ifac);
	cf = 1.0 / fn;
	rftfb = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = rftfb, d__3 = (d__1 = cf * y[i__ - 1] - x[i__ - 1], abs(
		    d__1));
	    rftfb = max(d__2,d__3);
/* L111: */
	}

/*     TEST SUBROUTINES SINTI AND SINT */

	dt = pi / fn;
	i__2 = nm1;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    x[i__ - 1] = xh[i__ - 1];
/* L112: */
	}
	i__2 = nm1;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    y[i__ - 1] = 0.0;
	    arg1 = (real) i__ * dt;
	    i__3 = nm1;
	    for (k = 1; k <= i__3; ++k) {
		y[i__ - 1] += x[k - 1] * sin((real) k * arg1);
/* L113: */
	    }
	    y[i__ - 1] += y[i__ - 1];
/* L114: */
	}
	sinti(&nm1, w, ifac);
	sint(&nm1, x, w, ifac);
	cf = 0.5 / fn;
	sintt = 0.0;
	i__2 = nm1;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = sintt, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1));
	    sintt = max(d__2,d__3);
	    x[i__ - 1] = xh[i__ - 1];
	    y[i__ - 1] = x[i__ - 1];
/* L115: */
	}
	sintt = cf * sintt;
	sint(&nm1, x, w, ifac);
	sint(&nm1, x, w, ifac);
	sintfb = 0.0;
	i__2 = nm1;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = sintfb, d__3 = (d__1 = cf * x[i__ - 1] - y[i__ - 1], abs(
		    d__1));
	    sintfb = max(d__2,d__3);
/* L116: */
	}

/*     TEST SUBROUTINES COSTI AND COST */

	i__2 = np1;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    x[i__ - 1] = xh[i__ - 1];
/* L117: */
	}
	i__2 = np1;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    i__3 = i__ + 1;
	    y[i__ - 1] = (x[0] + (real) pow_ii(&c_n1, &i__3) * x[n]) * 0.5;
	    arg = (real) (i__ - 1) * dt;
	    i__3 = n;
	    for (k = 2; k <= i__3; ++k) {
		y[i__ - 1] += x[k - 1] * cos((real) (k - 1) * arg);
/* L118: */
	    }
	    y[i__ - 1] += y[i__ - 1];
/* L119: */
	}
	costi(&np1, w, ifac);
	cost(&np1, x, w, ifac);
	costt = 0.0;
	i__2 = np1;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = costt, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1));
	    costt = max(d__2,d__3);
	    x[i__ - 1] = xh[i__ - 1];
	    y[i__ - 1] = xh[i__ - 1];
/* L120: */
	}
	costt = cf * costt;
	cost(&np1, x, w, ifac);
	cost(&np1, x, w, ifac);
	costfb = 0.0;
	i__2 = np1;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = costfb, d__3 = (d__1 = cf * x[i__ - 1] - y[i__ - 1], abs(
		    d__1));
	    costfb = max(d__2,d__3);
/* L121: */
	}

/*     TEST SUBROUTINES SINQI,SINQF AND SINQB */

	cf = 0.25 / fn;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    y[i__ - 1] = xh[i__ - 1];
/* L122: */
	}
	dt = pi / (fn + fn);
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    x[i__ - 1] = 0.0;
	    arg = dt * (real) i__;
	    i__3 = n;
	    for (k = 1; k <= i__3; ++k) {
		x[i__ - 1] += y[k - 1] * sin((real) (k + k - 1) * arg);
/* L123: */
	    }
	    x[i__ - 1] *= 4.0;
/* L124: */
	}
	sinqi(&n, w, ifac);
	sinqb(&n, y, w, ifac);
	sinqbt = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = sinqbt, d__3 = (d__1 = y[i__ - 1] - x[i__ - 1], abs(d__1));
	    sinqbt = max(d__2,d__3);
	    x[i__ - 1] = xh[i__ - 1];
/* L125: */
	}
	sinqbt = cf * sinqbt;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    arg = (real) (i__ + i__ - 1) * dt;
	    i__3 = i__ + 1;
	    y[i__ - 1] = (real) pow_ii(&c_n1, &i__3) * 0.5 * x[n - 1];
	    i__3 = nm1;
	    for (k = 1; k <= i__3; ++k) {
		y[i__ - 1] += x[k - 1] * sin((real) k * arg);
/* L126: */
	    }
	    y[i__ - 1] += y[i__ - 1];
/* L127: */
	}
	sinqf(&n, x, w, ifac);
	sinqft = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = sinqft, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1));
	    sinqft = max(d__2,d__3);
	    y[i__ - 1] = xh[i__ - 1];
	    x[i__ - 1] = xh[i__ - 1];
/* L128: */
	}
	sinqf(&n, y, w, ifac);
	sinqb(&n, y, w, ifac);
	sinqfb = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = sinqfb, d__3 = (d__1 = cf * y[i__ - 1] - x[i__ - 1], abs(
		    d__1));
	    sinqfb = max(d__2,d__3);
/* L129: */
	}

/*     TEST SUBROUTINES COSQI,COSQF AND COSQB */

	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    y[i__ - 1] = xh[i__ - 1];
/* L130: */
	}
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    x[i__ - 1] = 0.0;
	    arg = (real) (i__ - 1) * dt;
	    i__3 = n;
	    for (k = 1; k <= i__3; ++k) {
		x[i__ - 1] += y[k - 1] * cos((real) (k + k - 1) * arg);
/* L131: */
	    }
	    x[i__ - 1] *= 4.0;
/* L132: */
	}
	cosqi(&n, w, ifac);
	cosqb(&n, y, w, ifac);
	cosqbt = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = cosqbt, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1));
	    cosqbt = max(d__2,d__3);
	    x[i__ - 1] = xh[i__ - 1];
/* L133: */
	}
	cosqbt = cf * cosqbt;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    y[i__ - 1] = x[0] * 0.5;
	    arg = (real) (i__ + i__ - 1) * dt;
	    i__3 = n;
	    for (k = 2; k <= i__3; ++k) {
		y[i__ - 1] += x[k - 1] * cos((real) (k - 1) * arg);
/* L134: */
	    }
	    y[i__ - 1] += y[i__ - 1];
/* L135: */
	}
	cosqf(&n, x, w, ifac);
	cosqft = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = cosqft, d__3 = (d__1 = y[i__ - 1] - x[i__ - 1], abs(d__1));
	    cosqft = max(d__2,d__3);
	    x[i__ - 1] = xh[i__ - 1];
	    y[i__ - 1] = xh[i__ - 1];
/* L136: */
	}
	cosqft = cf * cosqft;
	cosqb(&n, x, w, ifac);
	cosqf(&n, x, w, ifac);
	cosqfb = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = cosqfb, d__3 = (d__1 = cf * x[i__ - 1] - y[i__ - 1], abs(
		    d__1));
	    cosqfb = max(d__2,d__3);
/* L137: */
	}

/*     TEST PROGRAMS EZFFTI,EZFFTF,EZFFTB */

	ezffti(&n, w, ifac);
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    x[i__ - 1] = xh[i__ - 1];
/* L138: */
	}
	tpi = atan(1.0) * 8.0;
	dt = tpi / (real) n;
	ns2 = (n + 1) / 2;
	cf = 2.0 / (real) n;
	ns2m = ns2 - 1;
	if (ns2m <= 0) {
	    goto L141;
	}
	i__2 = ns2m;
	for (k = 1; k <= i__2; ++k) {
	    sum1 = 0.0;
	    sum2 = 0.0;
	    arg = (real) k * dt;
	    i__3 = n;
	    for (i__ = 1; i__ <= i__3; ++i__) {
		arg1 = (real) (i__ - 1) * arg;
		sum1 += x[i__ - 1] * cos(arg1);
		sum2 += x[i__ - 1] * sin(arg1);
/* L139: */
	    }
	    a[k - 1] = cf * sum1;
	    b[k - 1] = cf * sum2;
/* L140: */
	}
L141:
	nm1 = n - 1;
	sum1 = 0.0;
	sum2 = 0.0;
	i__2 = nm1;
	for (i__ = 1; i__ <= i__2; i__ += 2) {
	    sum1 += x[i__ - 1];
	    sum2 += x[i__];
/* L142: */
	}
	if (modn == 1) {
	    sum1 += x[n - 1];
	}
	azero = cf * 0.5 * (sum1 + sum2);
	if (modn == 0) {
	    a[ns2 - 1] = cf * 0.5 * (sum1 - sum2);
	}
	ezfftf(&n, x, &azeroh, ah, bh, w, ifac);
	dezf1 = (d__1 = azeroh - azero, abs(d__1));
	if (modn == 0) {
/* Computing MAX */
	    d__2 = dezf1, d__3 = (d__1 = a[ns2 - 1] - ah[ns2 - 1], abs(d__1));
	    dezf1 = max(d__2,d__3);
	}
	if (ns2m <= 0) {
	    goto L144;
	}
	i__2 = ns2m;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__3 = dezf1, d__4 = (d__1 = ah[i__ - 1] - a[i__ - 1], abs(d__1)),
		     d__3 = max(d__3,d__4), d__4 = (d__2 = bh[i__ - 1] - b[
		    i__ - 1], abs(d__2));
	    dezf1 = max(d__3,d__4);
/* L143: */
	}
L144:
	ns2 = n / 2;
	if (modn == 0) {
	    b[ns2 - 1] = 0.0;
	}
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    sum = azero;
	    arg1 = (real) (i__ - 1) * dt;
	    i__3 = ns2;
	    for (k = 1; k <= i__3; ++k) {
		arg2 = (real) k * arg1;
		sum = sum + a[k - 1] * cos(arg2) + b[k - 1] * sin(arg2);
/* L145: */
	    }
	    x[i__ - 1] = sum;
/* L146: */
	}
	ezfftb(&n, y, &azero, a, b, w, ifac);
	dezb1 = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = dezb1, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1));
	    dezb1 = max(d__2,d__3);
	    x[i__ - 1] = xh[i__ - 1];
/* L147: */
	}
	ezfftf(&n, x, &azero, a, b, w, ifac);
	ezfftb(&n, y, &azero, a, b, w, ifac);
	dezfb = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = dezfb, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1));
	    dezfb = max(d__2,d__3);
/* L148: */
	}

/*     TEST  CFFTI,CFFTF,CFFTB */

	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    i__3 = i__ - 1;
	    d__1 = cos(sqrt2 * (real) i__);
	    d__2 = sin(sqrt2 * (real) (i__ * i__));
	    z__1.r = d__1, z__1.i = d__2;
	    cx[i__3].r = z__1.r, cx[i__3].i = z__1.i;
/* L149: */
	}
	dt = (pi + pi) / fn;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    arg1 = -((real) (i__ - 1)) * dt;
	    i__3 = i__ - 1;
	    cy[i__3].r = 0.0, cy[i__3].i = 0.0;
	    i__3 = n;
	    for (k = 1; k <= i__3; ++k) {
		arg2 = (real) (k - 1) * arg1;
		i__4 = i__ - 1;
		i__5 = i__ - 1;
		d__1 = cos(arg2);
		d__2 = sin(arg2);
		z__3.r = d__1, z__3.i = d__2;
		i__6 = k - 1;
		z__2.r = z__3.r * cx[i__6].r - z__3.i * cx[i__6].i, z__2.i = 
			z__3.r * cx[i__6].i + z__3.i * cx[i__6].r;
		z__1.r = cy[i__5].r + z__2.r, z__1.i = cy[i__5].i + z__2.i;
		cy[i__4].r = z__1.r, cy[i__4].i = z__1.i;
/* L150: */
	    }
/* L151: */
	}
	cffti(&n, w, ifac);
	cfftf(&n, cx, w, ifac);
	dcfftf = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    i__3 = i__ - 1;
	    i__4 = i__ - 1;
	    z__1.r = cx[i__3].r - cy[i__4].r, z__1.i = cx[i__3].i - cy[i__4]
		    .i;
	    d__1 = dcfftf, d__2 = z_abs(&z__1);
	    dcfftf = max(d__1,d__2);
	    i__3 = i__ - 1;
	    i__4 = i__ - 1;
	    z__1.r = cx[i__4].r / fn, z__1.i = cx[i__4].i / fn;
	    cx[i__3].r = z__1.r, cx[i__3].i = z__1.i;
/* L152: */
	}
	dcfftf /= fn;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    arg1 = (real) (i__ - 1) * dt;
	    i__3 = i__ - 1;
	    cy[i__3].r = 0.0, cy[i__3].i = 0.0;
	    i__3 = n;
	    for (k = 1; k <= i__3; ++k) {
		arg2 = (real) (k - 1) * arg1;
		i__4 = i__ - 1;
		i__5 = i__ - 1;
		d__1 = cos(arg2);
		d__2 = sin(arg2);
		z__3.r = d__1, z__3.i = d__2;
		i__6 = k - 1;
		z__2.r = z__3.r * cx[i__6].r - z__3.i * cx[i__6].i, z__2.i = 
			z__3.r * cx[i__6].i + z__3.i * cx[i__6].r;
		z__1.r = cy[i__5].r + z__2.r, z__1.i = cy[i__5].i + z__2.i;
		cy[i__4].r = z__1.r, cy[i__4].i = z__1.i;
/* L153: */
	    }
/* L154: */
	}
	cfftb(&n, cx, w, ifac);
	dcfftb = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    i__3 = i__ - 1;
	    i__4 = i__ - 1;
	    z__1.r = cx[i__3].r - cy[i__4].r, z__1.i = cx[i__3].i - cy[i__4]
		    .i;
	    d__1 = dcfftb, d__2 = z_abs(&z__1);
	    dcfftb = max(d__1,d__2);
	    i__3 = i__ - 1;
	    i__4 = i__ - 1;
	    cx[i__3].r = cy[i__4].r, cx[i__3].i = cy[i__4].i;
/* L155: */
	}
	cf = 1.0 / fn;
	cfftf(&n, cx, w, ifac);
	cfftb(&n, cx, w, ifac);
	dcfb = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    i__3 = i__ - 1;
	    z__2.r = cf * cx[i__3].r, z__2.i = cf * cx[i__3].i;
	    i__4 = i__ - 1;
	    z__1.r = z__2.r - cy[i__4].r, z__1.i = z__2.i - cy[i__4].i;
	    d__1 = dcfb, d__2 = z_abs(&z__1);
	    dcfb = max(d__1,d__2);
/* L156: */
	}
	s_wsfe(&io___58);
	do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer));
	do_fio(&c__1, (char *)&rftf, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&rftb, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&rftfb, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&sintt, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&sintfb, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&costt, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&costfb, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&sinqft, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&sinqbt, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&sinqfb, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&cosqft, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&cosqbt, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&cosqfb, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&dezf1, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&dezb1, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&dezfb, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&dcfftf, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&dcfftb, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&dcfb, (ftnlen)sizeof(doublereal));
	e_wsfe();
/* L157: */
    }




    return 0;
} /* MAIN__ */
Example #11
0
int main ( void )

/******************************************************************************/
/* 
  Purpose:

    MAIN is the main program for FFT_SERIAL.

  Discussion:

    The "complex" vector A is actually stored as a double vector B.

    The "complex" vector entry A[I] is stored as:

      B[I*2+0], the real part,
      B[I*2+1], the imaginary part.

  Modified:

    23 March 2009

  Author:

    Original C version by Wesley Petersen.
    This C version by John Burkardt.

  Reference:

    Wesley Petersen, Peter Arbenz, 
    Introduction to Parallel Computing - A practical guide with examples in C,
    Oxford University Press,
    ISBN: 0-19-851576-6,
    LC: QA76.58.P47.
*/
{
  double ctime;
  double ctime1;
  double ctime2;
  double error;
  int first;
  double flops;
  double fnm1;
  int i;
  int icase;
  int it;
  int ln2;
  double mflops;
  int n;
  int nits = 10000;
  static double seed;
  double sgn;
  double *w;
  double *x;
  double *y;
  double *z;
  double z0;
  double z1;

  timestamp ( );
  printf ( "\n" );
  printf ( "FFT_SERIAL\n" );
  printf ( "  C version\n" );
  printf ( "\n" );
  printf ( "  Demonstrate an implementation of the Fast Fourier Transform\n" );
  printf ( "  of a complex data vector.\n" );
/*
  Prepare for tests.
*/
  printf ( "\n" );
  printf ( "  Accuracy check:\n" );
  printf ( "\n" );
  printf ( "    FFT ( FFT ( X(1:N) ) ) == N * X(1:N)\n" );
  printf ( "\n" );
  printf ( "             N      NITS    Error         Time          Time/Call     MFLOPS\n" );
  printf ( "\n" );

  seed  = 331.0;
  n = 1;
/*
  LN2 is the log base 2 of N.  Each increase of LN2 doubles N.
*/
  for ( ln2 = 1; ln2 <= 20; ln2++ )
  {
    n = 2 * n;
/*
  Allocate storage for the complex arrays W, X, Y, Z.  

  We handle the complex arithmetic,
  and store a complex number as a pair of doubles, a complex vector as a doubly
  dimensioned array whose second dimension is 2. 
*/
    w = ( double * ) malloc (     n * sizeof ( double ) );
    x = ( double * ) malloc ( 2 * n * sizeof ( double ) );
    y = ( double * ) malloc ( 2 * n * sizeof ( double ) );
    z = ( double * ) malloc ( 2 * n * sizeof ( double ) );

    first = 1;

    for ( icase = 0; icase < 2; icase++ )
    {
      if ( first )
      {
        for ( i = 0; i < 2 * n; i = i + 2 )
        {
          z0 = ggl ( &seed );
          z1 = ggl ( &seed );
          x[i] = z0;
          z[i] = z0;
          x[i+1] = z1;
          z[i+1] = z1;
        }
      } 
      else
      {
        for ( i = 0; i < 2 * n; i = i + 2 )
        {
          z0 = 0.0;              /* real part of array */
          z1 = 0.0;              /* imaginary part of array */
          x[i] = z0;
          z[i] = z0;           /* copy of initial real data */
          x[i+1] = z1;
          z[i+1] = z1;         /* copy of initial imag. data */
        }
      }
/* 
  Initialize the sine and cosine tables.
*/
      cffti ( n, w );
/* 
  Transform forward, back 
*/
      if ( first )
      {
        sgn = + 1.0;
        cfft2 ( n, x, y, w, sgn );
        sgn = - 1.0;
        cfft2 ( n, y, x, w, sgn );
/* 
  Results should be same as the initial data multiplied by N.
*/
        fnm1 = 1.0 / ( double ) n;
        error = 0.0;
        for ( i = 0; i < 2 * n; i = i + 2 )
        {
          error = error 
          + pow ( z[i]   - fnm1 * x[i], 2 )
          + pow ( z[i+1] - fnm1 * x[i+1], 2 );
        }
        error = sqrt ( fnm1 * error );
        printf ( "  %12d  %8d  %12e", n, nits, error );
        first = 0;
      }
      else
      {
        ctime1 = cpu_time ( );
        for ( it = 0; it < nits; it++ )
        {
          sgn = + 1.0;
          cfft2 ( n, x, y, w, sgn );
          sgn = - 1.0;
          cfft2 ( n, y, x, w, sgn );
        }
        ctime2 = cpu_time ( );
        ctime = ctime2 - ctime1;

        flops = 2.0 * ( double ) nits * ( 5.0 * ( double ) n * ( double ) ln2 );

        mflops = flops / 1.0E+06 / ctime;

        printf ( "  %12e  %12e  %12f\n", ctime, ctime / ( double ) ( 2 * nits ), mflops );
      }
    }
    if ( ( ln2 % 4 ) == 0 ) 
    {
      nits = nits / 10;
    }
    if ( nits < 1 ) 
    {
      nits = 1;
    }
    free ( w );
    free ( x );
    free ( y );
    free ( z );
  }
  printf ( "\n" );
  printf ( "FFT_SERIAL:\n" );
  printf ( "  Normal end of execution.\n" );
  printf ( "\n" );
  timestamp ( );

  return 0;
}
Example #12
0
/* compare results with the regular fftpack */
void pffft_validate_N(int N, int cplx) {
  int Nfloat = N*(cplx?2:1);
  int Nbytes = Nfloat * sizeof(float);
  float *ref, *in, *out, *tmp, *tmp2;
  PFFFT_Setup *s = pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL);
  int pass;

  if (!s) { printf("Skipping N=%d, not supported\n", N); return; }
  ref = pffft_aligned_malloc(Nbytes);
  in = pffft_aligned_malloc(Nbytes);
  out = pffft_aligned_malloc(Nbytes);
  tmp = pffft_aligned_malloc(Nbytes);
  tmp2 = pffft_aligned_malloc(Nbytes);

  for (pass=0; pass < 2; ++pass) {
    float ref_max = 0;
    int k;
    //printf("N=%d pass=%d cplx=%d\n", N, pass, cplx);
    // compute reference solution with FFTPACK
    if (pass == 0) {
      float *wrk = malloc(2*Nbytes+15*sizeof(float));
      for (k=0; k < Nfloat; ++k) {
        ref[k] = in[k] = frand()*2-1; 
        out[k] = 1e30;
      }
      if (!cplx) {
        rffti(N, wrk);
        rfftf(N, ref, wrk);
        // use our ordering for real ffts instead of the one of fftpack
        {
          float refN=ref[N-1];
          for (k=N-2; k >= 1; --k) ref[k+1] = ref[k]; 
          ref[1] = refN;
        }
      } else {
        cffti(N, wrk);
        cfftf(N, ref, wrk);
      }
      free(wrk);
    }

    for (k = 0; k < Nfloat; ++k) ref_max = MAX(ref_max, fabs(ref[k]));

      
    // pass 0 : non canonical ordering of transform coefficients  
    if (pass == 0) {
      // test forward transform, with different input / output
      pffft_transform(s, in, tmp, 0, PFFFT_FORWARD);
      memcpy(tmp2, tmp, Nbytes);
      memcpy(tmp, in, Nbytes);
      pffft_transform(s, tmp, tmp, 0, PFFFT_FORWARD);
      for (k = 0; k < Nfloat; ++k) {
        assert(tmp2[k] == tmp[k]);
      }

      // test reordering
      pffft_zreorder(s, tmp, out, PFFFT_FORWARD);
      pffft_zreorder(s, out, tmp, PFFFT_BACKWARD);
      for (k = 0; k < Nfloat; ++k) {
        assert(tmp2[k] == tmp[k]);
      }
      pffft_zreorder(s, tmp, out, PFFFT_FORWARD);
    } else {
      // pass 1 : canonical ordering of transform coeffs.
      pffft_transform_ordered(s, in, tmp, 0, PFFFT_FORWARD);
      memcpy(tmp2, tmp, Nbytes);
      memcpy(tmp, in, Nbytes);
      pffft_transform_ordered(s, tmp, tmp, 0, PFFFT_FORWARD);
      for (k = 0; k < Nfloat; ++k) {
        assert(tmp2[k] == tmp[k]);
      }
      memcpy(out, tmp, Nbytes);
    }

    {
      for (k=0; k < Nfloat; ++k) {
        if (!(fabs(ref[k] - out[k]) < 1e-3*ref_max)) {
          printf("%s forward PFFFT mismatch found for N=%d\n", (cplx?"CPLX":"REAL"), N);
          exit(1);
        }
      }
        
      if (pass == 0) pffft_transform(s, tmp, out, 0, PFFFT_BACKWARD);
      else   pffft_transform_ordered(s, tmp, out, 0, PFFFT_BACKWARD);
      memcpy(tmp2, out, Nbytes);
      memcpy(out, tmp, Nbytes);
      if (pass == 0) pffft_transform(s, out, out, 0, PFFFT_BACKWARD);
      else   pffft_transform_ordered(s, out, out, 0, PFFFT_BACKWARD);
      for (k = 0; k < Nfloat; ++k) {
        assert(tmp2[k] == out[k]);
        out[k] *= 1.f/N;
      }
      for (k = 0; k < Nfloat; ++k) {
        if (fabs(in[k] - out[k]) > 1e-3 * ref_max) {
          printf("pass=%d, %s IFFFT does not match for N=%d\n", pass, (cplx?"CPLX":"REAL"), N); break;
          exit(1);
        }
      }
    }

    // quick test of the circular convolution in fft domain
    {
      float conv_err = 0, conv_max = 0;

      pffft_zreorder(s, ref, tmp, PFFFT_FORWARD);
      memset(out, 0, Nbytes);
      pffft_zconvolve_accumulate(s, ref, ref, out, 1.0);
      pffft_zreorder(s, out, tmp2, PFFFT_FORWARD);
      
      for (k=0; k < Nfloat; k += 2) {
        float ar = tmp[k], ai=tmp[k+1];
        if (cplx || k > 0) {
          tmp[k] = ar*ar - ai*ai;
          tmp[k+1] = 2*ar*ai;
        } else {
          tmp[0] = ar*ar;
          tmp[1] = ai*ai;
        }
      }
      
      for (k=0; k < Nfloat; ++k) {
        float d = fabs(tmp[k] - tmp2[k]), e = fabs(tmp[k]);
        if (d > conv_err) conv_err = d;
        if (e > conv_max) conv_max = e;
      }
      if (conv_err > 1e-5*conv_max) {
        printf("zconvolve error ? %g %g\n", conv_err, conv_max); exit(1);
      }
    }

  }

  printf("%s PFFFT is OK for N=%d\n", (cplx?"CPLX":"REAL"), N); fflush(stdout);
  
  pffft_destroy_setup(s);
  pffft_aligned_free(ref);
  pffft_aligned_free(in);
  pffft_aligned_free(out);
  pffft_aligned_free(tmp);
  pffft_aligned_free(tmp2);
}
Example #13
0
void benchmark_ffts(int N, int cplx) {
  int Nfloat = (cplx ? N*2 : N);
  int Nbytes = Nfloat * sizeof(float);
  float *X = pffft_aligned_malloc(Nbytes), *Y = pffft_aligned_malloc(Nbytes), *Z = pffft_aligned_malloc(Nbytes);

  double t0, t1, flops;

  int k;
  int max_iter = 5120000/N*4;
#ifdef __arm__
  max_iter /= 4;
#endif
  int iter;

  for (k = 0; k < Nfloat; ++k) {
    X[k] = 0; //sqrtf(k+1);
  }

  // FFTPack benchmark
  {
    float *wrk = malloc(2*Nbytes + 15*sizeof(float));
    int max_iter_ = max_iter/pffft_simd_size(); if (max_iter_ == 0) max_iter_ = 1;
    if (cplx) cffti(N, wrk);
    else      rffti(N, wrk);
    t0 = uclock_sec();  
    
    for (iter = 0; iter < max_iter_; ++iter) {
      if (cplx) {
        cfftf(N, X, wrk);
        cfftb(N, X, wrk);
      } else {
        rfftf(N, X, wrk);
        rfftb(N, X, wrk);
      }
    }
    t1 = uclock_sec();
    free(wrk);
    
    flops = (max_iter_*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html
    show_output("FFTPack", N, cplx, flops, t0, t1, max_iter_);
  }

#ifdef HAVE_VECLIB
  int log2N = (int)(log(N)/log(2) + 0.5f);
  if (N == (1<<log2N)) {
    FFTSetup setup;

    setup = vDSP_create_fftsetup(log2N, FFT_RADIX2);
    DSPSplitComplex zsamples;
    zsamples.realp = &X[0];
    zsamples.imagp = &X[Nfloat/2];
    t0 = uclock_sec();  
    for (iter = 0; iter < max_iter; ++iter) {
      if (cplx) {
        vDSP_fft_zip(setup, &zsamples, 1, log2N, kFFTDirection_Forward);
        vDSP_fft_zip(setup, &zsamples, 1, log2N, kFFTDirection_Inverse);
      } else {
        vDSP_fft_zrip(setup, &zsamples, 1, log2N, kFFTDirection_Forward); 
        vDSP_fft_zrip(setup, &zsamples, 1, log2N, kFFTDirection_Inverse);
      }
    }
    t1 = uclock_sec();
    vDSP_destroy_fftsetup(setup);

    flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html
    show_output("vDSP", N, cplx, flops, t0, t1, max_iter);
  } else {
    show_output("vDSP", N, cplx, -1, -1, -1, -1);
  }
#endif
  
#ifdef HAVE_FFTW
  {
    fftwf_plan planf, planb;
    fftw_complex *in = (fftw_complex*) fftwf_malloc(sizeof(fftw_complex) * N);
    fftw_complex *out = (fftw_complex*) fftwf_malloc(sizeof(fftw_complex) * N);
    memset(in, 0, sizeof(fftw_complex) * N);
    int flags = (N < 40000 ? FFTW_MEASURE : FFTW_ESTIMATE);  // measure takes a lot of time on largest ffts
    //int flags = FFTW_ESTIMATE;
    if (cplx) {
      planf = fftwf_plan_dft_1d(N, (fftwf_complex*)in, (fftwf_complex*)out, FFTW_FORWARD, flags);
      planb = fftwf_plan_dft_1d(N, (fftwf_complex*)in, (fftwf_complex*)out, FFTW_BACKWARD, flags);
    } else {
      planf = fftwf_plan_dft_r2c_1d(N, (float*)in, (fftwf_complex*)out, flags);
      planb = fftwf_plan_dft_c2r_1d(N, (fftwf_complex*)in, (float*)out, flags);
    }

    t0 = uclock_sec();  
    for (iter = 0; iter < max_iter; ++iter) {
      fftwf_execute(planf);
      fftwf_execute(planb);
    }
    t1 = uclock_sec();

    fftwf_destroy_plan(planf);
    fftwf_destroy_plan(planb);
    fftwf_free(in); fftwf_free(out);

    flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html
    show_output((flags == FFTW_MEASURE ? "FFTW (meas.)" : " FFTW (estim)"), N, cplx, flops, t0, t1, max_iter);
  }
#endif  

  // PFFFT benchmark
  {
    PFFFT_Setup *s = pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL);
    if (s) {
      t0 = uclock_sec();  
      for (iter = 0; iter < max_iter; ++iter) {
        pffft_transform(s, X, Z, Y, PFFFT_FORWARD);
        pffft_transform(s, X, Z, Y, PFFFT_BACKWARD);
      }
      t1 = uclock_sec();
      pffft_destroy_setup(s);
    
      flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html
      show_output("PFFFT", N, cplx, flops, t0, t1, max_iter);
    }
  }

  if (!array_output_format) {
    printf("--\n");
  }

  pffft_aligned_free(X);
  pffft_aligned_free(Y);
  pffft_aligned_free(Z);
}