Example #1
0
real_plan make_real_plan (int length)
  {
  real_plan plan = (real_plan) malloc(sizeof(real_plan_i));
  int pfsum = prime_factor_sum(length);
  double comp1 = .5*length*pfsum;
  double comp2 = 2*3*length*log(3.*length);
  plan->length=length;
  plan->bluestein = (comp2<comp1);
  if (plan->bluestein)
    bluestein_i (length,&(plan->work));
  else
    {
    plan->work=(double *)malloc((2*length+15)*sizeof(double));
    rffti(length, plan->work);
    }
  return plan;
  }
Example #2
0
real_plan make_real_plan (size_t length)
  {
  real_plan plan = RALLOC(real_plan_i,1);
  size_t pfsum = prime_factor_sum(length);
  double comp1 = .5*length*pfsum;
  double comp2 = 2*3*length*log(3.*length);
  comp2*=3; /* fudge factor that appears to give good overall performance */
  plan->length=length;
  plan->bluestein = (comp2<comp1);
  if (plan->bluestein)
    bluestein_i (length,&(plan->work));
  else
    {
    plan->work=RALLOC(double,2*length+15);
    rffti(length, plan->work);
    }
  return plan;
  }
Example #3
0
static PyObject *
fftpack_rffti(PyObject *NPY_UNUSED(self), PyObject *args)
{
  PyArrayObject *op;
  npy_intp dim;
  long n;

  if (!PyArg_ParseTuple(args, "l", &n)) {
      return NULL;
  }
  /*Magic size needed by rffti*/
  dim = 2*n + 15;
  /*Create a 1 dimensional array of dimensions of type double*/
  op = (PyArrayObject *)PyArray_SimpleNew(1, &dim, NPY_DOUBLE);
  if (op == NULL) {
      return NULL;
  }
  NPY_SIGINT_ON;
  rffti(n, (double *)PyArray_DATA((PyArrayObject*)op));
  NPY_SIGINT_OFF;

  return (PyObject *)op;
}
Example #4
0
void ChromSmoother::smooth_vect_fft( const std::vector<float> & raw_vec, std::vector<float> & out_vec, bool test) {



/*
    if sum_norm :
        total_weight = sum(weights)
        weights = weights / total_weight

    n = len(values)
    assert(len(weights) % 2 == 1)
    #create a zero-padded array for both the weights and values
    #extra padding needed = (len(weights) / 2 )+ 1
    N = n + len(weights) / 2 + 1
    values_padded = numpy.zeros(N)
    
    weights_padded = numpy.zeros(N)
    values_padded[0:n]    = values
    M = len(weights)
    M_mid = len(weights) / 2
    weights_padded[0] = weights[M_mid]
    for  i in range(1,M_mid)  :
       weights_padded[N-i] = weights[M_mid-i]
       weights_padded[i]   = weights[M_mid+i]
    convolved_fft = data_fft * weights_fft
    convolved_real = numpy.fft.irfft(convolved_fft)
    return convolved_real[:n]    
void cffti( integer_t *n, real_t *wsave, integer_t *ifac );
*/



#ifdef HAVE_FFTPACK

    int n = raw_vec.size();
    //std::basic_ofstream<char> x("foo");
    //x.close();
    //std::ofstream t1("pre_rfftb.test.tab");
    std::cerr << "n % 2" << n % 2 << " , n: " << n << std::endl;

    const int M = this->weights.size();
    assert( M % 2 == 1 );
    const int M_mid = M / 2;
    int N = n + M_mid + 1;
    std::vector<float> input_padded(raw_vec);
    for ( int i = raw_vec.size() ; i < N ; i++ ) {
       input_padded.push_back(0.0f);
    }
    std::vector<float> weights_padded(N,0.0f);
    weights_padded[0] = this->weights[M_mid];
    for ( int i = 1 ; i < M_mid ; i++ ) {
       weights_padded[N-i] = weights[M_mid-i];
       weights_padded[i] = weights[M_mid+i];
    }
    float * wsave_weights = (float*)malloc((8*N+15)*sizeof(float));
    float * wsave_data    = (float*)malloc((8*N+15)*sizeof(float));
    float * wsave_back    = (float*)malloc((8*N+15)*sizeof(float));
    float * fft_prod      = (float*)malloc(N*sizeof(float));
  
    std::cerr << "finished padding" << std::endl;

    int ifac[64];
    rffti( N, wsave_data);
    rffti( N, wsave_weights);
    rffti( n, wsave_back);
    rfftf( N, &(input_padded[0]), wsave_data);
    for ( int i = 0 ; i < N ; i++ ) {
      input_padded[i] /= N;
    }

    if ( true || SG_DALKE_DUMP ) {
      std::vector<float> output_padded(input_padded);

      //crawutils::output_vector(t1,output_padded);
    }
    
    //t1.close();

    std::cerr << "weights fft 1" << std::endl;
    rfftf( N, &(weights_padded[0]), wsave_weights);
    for ( int i = 0 ; i < N ; i++ ) {
       weights_padded[i] *= N;
    }
    //std::vector<float> weights_rev(weights_padded);
    //rfftb( &N, &(weights_rev[0]), wsave, ifac);
    //std::ofstream t3("weights_fb.txt");
    //crawutils::output_vector(t3,weights_rev);

    for ( int i = 0; i < N ; i++ ) {
       fft_prod[i] = input_padded[i] * weights_padded[i];
    }
    std::cerr << "product 1" << std::endl;        

    rfftb( N, fft_prod, wsave_back);
    std::cerr << "backwards fft" << std::endl;        
    for ( int i = 0 ; i < out_vec.size() ; i++ ) {
      out_vec[i] = fft_prod[i];
    }
    std::cerr << "out_vec filled" << std::endl;
    free(wsave_data);
    free(wsave_weights);
    free(wsave_back);
    free(fft_prod);

#else
    throw("Forget about trying to call smooth_vect_fft if you don't have FFTPACK");
#endif

}
Example #5
0
/* Main program */ int MAIN__(void)
{
    /* Initialized data */

    static integer nd[10] = { 120,54,49,32,4,3,2 };

    /* Format strings */
    static char fmt_1001[] = "(\0020N\002,i5,\002 RFFTF  \002,e10.3,\002 RFF"
	    "TB  \002,e10.3,\002 RFFTFB \002,e10.3,\002 SINT   \002,e10.3,"
	    "\002 SINTFB \002,e10.3,\002 COST   \002,e10.3/7x,\002 COSTFB "
	    "\002,e10.3,\002 SINQF  \002,e10.3,\002 SINQB  \002,e10.3,\002 SI"
	    "NQFB \002,e10.3,\002 COSQF  \002,e10.3,\002 COSQB  \002,e10.3/7x,"
	    "\002 COSQFB \002,e10.3,\002 DEZF   \002,e10.3,\002 DEZB   \002,e"
	    "10.3,\002 DEZFB  \002,e10.3,\002 CFFTF  \002,e10.3,\002 CFFTB "
	    " \002,e10.3/7x,\002 CFFTFB \002,e10.3)";

    /* System generated locals */
    integer i__1, i__2, i__3, i__4, i__5, i__6;
    doublereal d__1, d__2, d__3, d__4;
    doublecomplex z__1, z__2, z__3;

    /* Builtin functions */
    double sqrt(doublereal), sin(doublereal), cos(doublereal);
    integer pow_ii(integer *, integer *);
    double atan(doublereal), z_abs(doublecomplex *);
    integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void);

    /* Local variables */
    doublereal a[100], b[100];
    integer i__, j, k, n;
    doublereal w[2000], x[200], y[200], ah[100], bh[100], cf, fn, dt, pi;
    doublecomplex cx[200], cy[200];
    doublereal xh[200];
    integer nz, nm1, np1, ns2;
    doublereal arg, tfn, tpi;
    integer nns;
    doublereal sum, arg1, arg2;
    integer ns2m;
    doublereal sum1, sum2, dcfb;
    integer ifac[64], modn;
    doublereal rftb, rftf;
    extern /* Subroutine */ void cost(integer *, doublereal *, doublereal *, 
	    integer *), sint(integer *, doublereal *, doublereal *, integer *
	    );
    doublereal dezb1, dezf1, sqrt2;
    extern /* Subroutine */ void cfftb(integer *, doublecomplex *, doublereal 
	    *, integer *), cfftf(integer *, doublecomplex *, doublereal *, 
	    integer *);
    doublereal dezfb;
    extern /* Subroutine */ void cffti(integer *, doublereal *, integer *), 
	    rfftb(integer *, doublereal *, doublereal *, integer *);
    doublereal rftfb;
    extern /* Subroutine */ void rfftf(integer *, doublereal *, doublereal *, 
	    integer *), cosqb(integer *, doublereal *, doublereal *, integer 
	    *), rffti(integer *, doublereal *, integer *), cosqf(integer *, 
	    doublereal *, doublereal *, integer *), sinqb(integer *, 
	    doublereal *, doublereal *, integer *), cosqi(integer *, 
	    doublereal *, integer *), sinqf(integer *, doublereal *, 
	    doublereal *, integer *), costi(integer *, doublereal *, integer 
	    *);
    doublereal azero;
    extern /* Subroutine */ void sinqi(integer *, doublereal *, integer *), 
	    sinti(integer *, doublereal *, integer *);
    doublereal costt, sintt, dcfftb, dcfftf, cosqfb, costfb;
    extern /* Subroutine */ void ezfftb(integer *, doublereal *, doublereal *,
	     doublereal *, doublereal *, doublereal *, integer *);
    doublereal sinqfb;
    extern /* Subroutine */ void ezfftf(integer *, doublereal *, doublereal *,
	     doublereal *, doublereal *, doublereal *, integer *);
    doublereal sintfb;
    extern /* Subroutine */ void ezffti(integer *, doublereal *, integer *);
    doublereal azeroh, cosqbt, cosqft, sinqbt, sinqft;

    /* Fortran I/O blocks */
    static cilist io___58 = { 0, 6, 0, fmt_1001, 0 };



/*     * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

/*                       VERSION 4  APRIL 1985 */

/*                         A TEST DRIVER FOR */
/*          A PACKAGE OF FORTRAN SUBPROGRAMS FOR THE FAST FOURIER */
/*           TRANSFORM OF PERIODIC AND OTHER SYMMETRIC SEQUENCES */

/*                              BY */

/*                       PAUL N SWARZTRAUBER */

/*       NATIONAL CENTER FOR ATMOSPHERIC RESEARCH  BOULDER,COLORADO 80307 */

/*        WHICH IS SPONSORED BY THE NATIONAL SCIENCE FOUNDATION */

/*     * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */


/*             THIS PROGRAM TESTS THE PACKAGE OF FAST FOURIER */
/*     TRANSFORMS FOR BOTH COMPLEX AND REAL PERIODIC SEQUENCES AND */
/*     CERTIAN OTHER SYMMETRIC SEQUENCES THAT ARE LISTED BELOW. */

/*     1.   RFFTI     INITIALIZE  RFFTF AND RFFTB */
/*     2.   RFFTF     FORWARD TRANSFORM OF A REAL PERIODIC SEQUENCE */
/*     3.   RFFTB     BACKWARD TRANSFORM OF A REAL COEFFICIENT ARRAY */

/*     4.   EZFFTI    INITIALIZE EZFFTF AND EZFFTB */
/*     5.   EZFFTF    A SIMPLIFIED REAL PERIODIC FORWARD TRANSFORM */
/*     6.   EZFFTB    A SIMPLIFIED REAL PERIODIC BACKWARD TRANSFORM */

/*     7.   SINTI     INITIALIZE SINT */
/*     8.   SINT      SINE TRANSFORM OF A REAL ODD SEQUENCE */

/*     9.   COSTI     INITIALIZE COST */
/*     10.  COST      COSINE TRANSFORM OF A REAL EVEN SEQUENCE */

/*     11.  SINQI     INITIALIZE SINQF AND SINQB */
/*     12.  SINQF     FORWARD SINE TRANSFORM WITH ODD WAVE NUMBERS */
/*     13.  SINQB     UNNORMALIZED INVERSE OF SINQF */

/*     14.  COSQI     INITIALIZE COSQF AND COSQB */
/*     15.  COSQF     FORWARD COSINE TRANSFORM WITH ODD WAVE NUMBERS */
/*     16.  COSQB     UNNORMALIZED INVERSE OF COSQF */

/*     17.  CFFTI     INITIALIZE CFFTF AND CFFTB */
/*     18.  CFFTF     FORWARD TRANSFORM OF A COMPLEX PERIODIC SEQUENCE */
/*     19.  CFFTB     UNNORMALIZED INVERSE OF CFFTF */


    sqrt2 = sqrt(2.0);
    nns = 7;
    i__1 = nns;
    for (nz = 1; nz <= i__1; ++nz) {
	n = nd[nz - 1];
	modn = n % 2;
	fn = (real) n;
	tfn = fn + fn;
	np1 = n + 1;
	nm1 = n - 1;
	i__2 = np1;
	for (j = 1; j <= i__2; ++j) {
	    x[j - 1] = sin((real) j * sqrt2);
	    y[j - 1] = x[j - 1];
	    xh[j - 1] = x[j - 1];
/* L101: */
	}

/*     TEST SUBROUTINES RFFTI,RFFTF AND RFFTB */

	rffti(&n, w, ifac);
	pi = 3.141592653589793238462643383279502884197169399375108209749445923;
	dt = (pi + pi) / fn;
	ns2 = (n + 1) / 2;
	if (ns2 < 2) {
	    goto L104;
	}
	i__2 = ns2;
	for (k = 2; k <= i__2; ++k) {
	    sum1 = 0.0;
	    sum2 = 0.0;
	    arg = (real) (k - 1) * dt;
	    i__3 = n;
	    for (i__ = 1; i__ <= i__3; ++i__) {
		arg1 = (real) (i__ - 1) * arg;
		sum1 += x[i__ - 1] * cos(arg1);
		sum2 += x[i__ - 1] * sin(arg1);
/* L102: */
	    }
	    y[(k << 1) - 3] = sum1;
	    y[(k << 1) - 2] = -sum2;
/* L103: */
	}
L104:
	sum1 = 0.0;
	sum2 = 0.0;
	i__2 = nm1;
	for (i__ = 1; i__ <= i__2; i__ += 2) {
	    sum1 += x[i__ - 1];
	    sum2 += x[i__];
/* L105: */
	}
	if (modn == 1) {
	    sum1 += x[n - 1];
	}
	y[0] = sum1 + sum2;
	if (modn == 0) {
	    y[n - 1] = sum1 - sum2;
	}
	rfftf(&n, x, w, ifac);
	rftf = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = rftf, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1));
	    rftf = max(d__2,d__3);
	    x[i__ - 1] = xh[i__ - 1];
/* L106: */
	}
	rftf /= fn;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    sum = x[0] * 0.5;
	    arg = (real) (i__ - 1) * dt;
	    if (ns2 < 2) {
		goto L108;
	    }
	    i__3 = ns2;
	    for (k = 2; k <= i__3; ++k) {
		arg1 = (real) (k - 1) * arg;
		sum = sum + x[(k << 1) - 3] * cos(arg1) - x[(k << 1) - 2] * 
			sin(arg1);
/* L107: */
	    }
L108:
	    if (modn == 0) {
		i__3 = i__ - 1;
		sum += (real) pow_ii(&c_n1, &i__3) * 0.5 * x[n - 1];
	    }
	    y[i__ - 1] = sum + sum;
/* L109: */
	}
	rfftb(&n, x, w, ifac);
	rftb = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = rftb, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1));
	    rftb = max(d__2,d__3);
	    x[i__ - 1] = xh[i__ - 1];
	    y[i__ - 1] = xh[i__ - 1];
/* L110: */
	}
	rfftb(&n, y, w, ifac);
	rfftf(&n, y, w, ifac);
	cf = 1.0 / fn;
	rftfb = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = rftfb, d__3 = (d__1 = cf * y[i__ - 1] - x[i__ - 1], abs(
		    d__1));
	    rftfb = max(d__2,d__3);
/* L111: */
	}

/*     TEST SUBROUTINES SINTI AND SINT */

	dt = pi / fn;
	i__2 = nm1;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    x[i__ - 1] = xh[i__ - 1];
/* L112: */
	}
	i__2 = nm1;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    y[i__ - 1] = 0.0;
	    arg1 = (real) i__ * dt;
	    i__3 = nm1;
	    for (k = 1; k <= i__3; ++k) {
		y[i__ - 1] += x[k - 1] * sin((real) k * arg1);
/* L113: */
	    }
	    y[i__ - 1] += y[i__ - 1];
/* L114: */
	}
	sinti(&nm1, w, ifac);
	sint(&nm1, x, w, ifac);
	cf = 0.5 / fn;
	sintt = 0.0;
	i__2 = nm1;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = sintt, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1));
	    sintt = max(d__2,d__3);
	    x[i__ - 1] = xh[i__ - 1];
	    y[i__ - 1] = x[i__ - 1];
/* L115: */
	}
	sintt = cf * sintt;
	sint(&nm1, x, w, ifac);
	sint(&nm1, x, w, ifac);
	sintfb = 0.0;
	i__2 = nm1;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = sintfb, d__3 = (d__1 = cf * x[i__ - 1] - y[i__ - 1], abs(
		    d__1));
	    sintfb = max(d__2,d__3);
/* L116: */
	}

/*     TEST SUBROUTINES COSTI AND COST */

	i__2 = np1;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    x[i__ - 1] = xh[i__ - 1];
/* L117: */
	}
	i__2 = np1;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    i__3 = i__ + 1;
	    y[i__ - 1] = (x[0] + (real) pow_ii(&c_n1, &i__3) * x[n]) * 0.5;
	    arg = (real) (i__ - 1) * dt;
	    i__3 = n;
	    for (k = 2; k <= i__3; ++k) {
		y[i__ - 1] += x[k - 1] * cos((real) (k - 1) * arg);
/* L118: */
	    }
	    y[i__ - 1] += y[i__ - 1];
/* L119: */
	}
	costi(&np1, w, ifac);
	cost(&np1, x, w, ifac);
	costt = 0.0;
	i__2 = np1;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = costt, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1));
	    costt = max(d__2,d__3);
	    x[i__ - 1] = xh[i__ - 1];
	    y[i__ - 1] = xh[i__ - 1];
/* L120: */
	}
	costt = cf * costt;
	cost(&np1, x, w, ifac);
	cost(&np1, x, w, ifac);
	costfb = 0.0;
	i__2 = np1;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = costfb, d__3 = (d__1 = cf * x[i__ - 1] - y[i__ - 1], abs(
		    d__1));
	    costfb = max(d__2,d__3);
/* L121: */
	}

/*     TEST SUBROUTINES SINQI,SINQF AND SINQB */

	cf = 0.25 / fn;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    y[i__ - 1] = xh[i__ - 1];
/* L122: */
	}
	dt = pi / (fn + fn);
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    x[i__ - 1] = 0.0;
	    arg = dt * (real) i__;
	    i__3 = n;
	    for (k = 1; k <= i__3; ++k) {
		x[i__ - 1] += y[k - 1] * sin((real) (k + k - 1) * arg);
/* L123: */
	    }
	    x[i__ - 1] *= 4.0;
/* L124: */
	}
	sinqi(&n, w, ifac);
	sinqb(&n, y, w, ifac);
	sinqbt = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = sinqbt, d__3 = (d__1 = y[i__ - 1] - x[i__ - 1], abs(d__1));
	    sinqbt = max(d__2,d__3);
	    x[i__ - 1] = xh[i__ - 1];
/* L125: */
	}
	sinqbt = cf * sinqbt;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    arg = (real) (i__ + i__ - 1) * dt;
	    i__3 = i__ + 1;
	    y[i__ - 1] = (real) pow_ii(&c_n1, &i__3) * 0.5 * x[n - 1];
	    i__3 = nm1;
	    for (k = 1; k <= i__3; ++k) {
		y[i__ - 1] += x[k - 1] * sin((real) k * arg);
/* L126: */
	    }
	    y[i__ - 1] += y[i__ - 1];
/* L127: */
	}
	sinqf(&n, x, w, ifac);
	sinqft = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = sinqft, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1));
	    sinqft = max(d__2,d__3);
	    y[i__ - 1] = xh[i__ - 1];
	    x[i__ - 1] = xh[i__ - 1];
/* L128: */
	}
	sinqf(&n, y, w, ifac);
	sinqb(&n, y, w, ifac);
	sinqfb = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = sinqfb, d__3 = (d__1 = cf * y[i__ - 1] - x[i__ - 1], abs(
		    d__1));
	    sinqfb = max(d__2,d__3);
/* L129: */
	}

/*     TEST SUBROUTINES COSQI,COSQF AND COSQB */

	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    y[i__ - 1] = xh[i__ - 1];
/* L130: */
	}
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    x[i__ - 1] = 0.0;
	    arg = (real) (i__ - 1) * dt;
	    i__3 = n;
	    for (k = 1; k <= i__3; ++k) {
		x[i__ - 1] += y[k - 1] * cos((real) (k + k - 1) * arg);
/* L131: */
	    }
	    x[i__ - 1] *= 4.0;
/* L132: */
	}
	cosqi(&n, w, ifac);
	cosqb(&n, y, w, ifac);
	cosqbt = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = cosqbt, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1));
	    cosqbt = max(d__2,d__3);
	    x[i__ - 1] = xh[i__ - 1];
/* L133: */
	}
	cosqbt = cf * cosqbt;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    y[i__ - 1] = x[0] * 0.5;
	    arg = (real) (i__ + i__ - 1) * dt;
	    i__3 = n;
	    for (k = 2; k <= i__3; ++k) {
		y[i__ - 1] += x[k - 1] * cos((real) (k - 1) * arg);
/* L134: */
	    }
	    y[i__ - 1] += y[i__ - 1];
/* L135: */
	}
	cosqf(&n, x, w, ifac);
	cosqft = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = cosqft, d__3 = (d__1 = y[i__ - 1] - x[i__ - 1], abs(d__1));
	    cosqft = max(d__2,d__3);
	    x[i__ - 1] = xh[i__ - 1];
	    y[i__ - 1] = xh[i__ - 1];
/* L136: */
	}
	cosqft = cf * cosqft;
	cosqb(&n, x, w, ifac);
	cosqf(&n, x, w, ifac);
	cosqfb = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = cosqfb, d__3 = (d__1 = cf * x[i__ - 1] - y[i__ - 1], abs(
		    d__1));
	    cosqfb = max(d__2,d__3);
/* L137: */
	}

/*     TEST PROGRAMS EZFFTI,EZFFTF,EZFFTB */

	ezffti(&n, w, ifac);
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    x[i__ - 1] = xh[i__ - 1];
/* L138: */
	}
	tpi = atan(1.0) * 8.0;
	dt = tpi / (real) n;
	ns2 = (n + 1) / 2;
	cf = 2.0 / (real) n;
	ns2m = ns2 - 1;
	if (ns2m <= 0) {
	    goto L141;
	}
	i__2 = ns2m;
	for (k = 1; k <= i__2; ++k) {
	    sum1 = 0.0;
	    sum2 = 0.0;
	    arg = (real) k * dt;
	    i__3 = n;
	    for (i__ = 1; i__ <= i__3; ++i__) {
		arg1 = (real) (i__ - 1) * arg;
		sum1 += x[i__ - 1] * cos(arg1);
		sum2 += x[i__ - 1] * sin(arg1);
/* L139: */
	    }
	    a[k - 1] = cf * sum1;
	    b[k - 1] = cf * sum2;
/* L140: */
	}
L141:
	nm1 = n - 1;
	sum1 = 0.0;
	sum2 = 0.0;
	i__2 = nm1;
	for (i__ = 1; i__ <= i__2; i__ += 2) {
	    sum1 += x[i__ - 1];
	    sum2 += x[i__];
/* L142: */
	}
	if (modn == 1) {
	    sum1 += x[n - 1];
	}
	azero = cf * 0.5 * (sum1 + sum2);
	if (modn == 0) {
	    a[ns2 - 1] = cf * 0.5 * (sum1 - sum2);
	}
	ezfftf(&n, x, &azeroh, ah, bh, w, ifac);
	dezf1 = (d__1 = azeroh - azero, abs(d__1));
	if (modn == 0) {
/* Computing MAX */
	    d__2 = dezf1, d__3 = (d__1 = a[ns2 - 1] - ah[ns2 - 1], abs(d__1));
	    dezf1 = max(d__2,d__3);
	}
	if (ns2m <= 0) {
	    goto L144;
	}
	i__2 = ns2m;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__3 = dezf1, d__4 = (d__1 = ah[i__ - 1] - a[i__ - 1], abs(d__1)),
		     d__3 = max(d__3,d__4), d__4 = (d__2 = bh[i__ - 1] - b[
		    i__ - 1], abs(d__2));
	    dezf1 = max(d__3,d__4);
/* L143: */
	}
L144:
	ns2 = n / 2;
	if (modn == 0) {
	    b[ns2 - 1] = 0.0;
	}
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    sum = azero;
	    arg1 = (real) (i__ - 1) * dt;
	    i__3 = ns2;
	    for (k = 1; k <= i__3; ++k) {
		arg2 = (real) k * arg1;
		sum = sum + a[k - 1] * cos(arg2) + b[k - 1] * sin(arg2);
/* L145: */
	    }
	    x[i__ - 1] = sum;
/* L146: */
	}
	ezfftb(&n, y, &azero, a, b, w, ifac);
	dezb1 = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = dezb1, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1));
	    dezb1 = max(d__2,d__3);
	    x[i__ - 1] = xh[i__ - 1];
/* L147: */
	}
	ezfftf(&n, x, &azero, a, b, w, ifac);
	ezfftb(&n, y, &azero, a, b, w, ifac);
	dezfb = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = dezfb, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1));
	    dezfb = max(d__2,d__3);
/* L148: */
	}

/*     TEST  CFFTI,CFFTF,CFFTB */

	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    i__3 = i__ - 1;
	    d__1 = cos(sqrt2 * (real) i__);
	    d__2 = sin(sqrt2 * (real) (i__ * i__));
	    z__1.r = d__1, z__1.i = d__2;
	    cx[i__3].r = z__1.r, cx[i__3].i = z__1.i;
/* L149: */
	}
	dt = (pi + pi) / fn;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    arg1 = -((real) (i__ - 1)) * dt;
	    i__3 = i__ - 1;
	    cy[i__3].r = 0.0, cy[i__3].i = 0.0;
	    i__3 = n;
	    for (k = 1; k <= i__3; ++k) {
		arg2 = (real) (k - 1) * arg1;
		i__4 = i__ - 1;
		i__5 = i__ - 1;
		d__1 = cos(arg2);
		d__2 = sin(arg2);
		z__3.r = d__1, z__3.i = d__2;
		i__6 = k - 1;
		z__2.r = z__3.r * cx[i__6].r - z__3.i * cx[i__6].i, z__2.i = 
			z__3.r * cx[i__6].i + z__3.i * cx[i__6].r;
		z__1.r = cy[i__5].r + z__2.r, z__1.i = cy[i__5].i + z__2.i;
		cy[i__4].r = z__1.r, cy[i__4].i = z__1.i;
/* L150: */
	    }
/* L151: */
	}
	cffti(&n, w, ifac);
	cfftf(&n, cx, w, ifac);
	dcfftf = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    i__3 = i__ - 1;
	    i__4 = i__ - 1;
	    z__1.r = cx[i__3].r - cy[i__4].r, z__1.i = cx[i__3].i - cy[i__4]
		    .i;
	    d__1 = dcfftf, d__2 = z_abs(&z__1);
	    dcfftf = max(d__1,d__2);
	    i__3 = i__ - 1;
	    i__4 = i__ - 1;
	    z__1.r = cx[i__4].r / fn, z__1.i = cx[i__4].i / fn;
	    cx[i__3].r = z__1.r, cx[i__3].i = z__1.i;
/* L152: */
	}
	dcfftf /= fn;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    arg1 = (real) (i__ - 1) * dt;
	    i__3 = i__ - 1;
	    cy[i__3].r = 0.0, cy[i__3].i = 0.0;
	    i__3 = n;
	    for (k = 1; k <= i__3; ++k) {
		arg2 = (real) (k - 1) * arg1;
		i__4 = i__ - 1;
		i__5 = i__ - 1;
		d__1 = cos(arg2);
		d__2 = sin(arg2);
		z__3.r = d__1, z__3.i = d__2;
		i__6 = k - 1;
		z__2.r = z__3.r * cx[i__6].r - z__3.i * cx[i__6].i, z__2.i = 
			z__3.r * cx[i__6].i + z__3.i * cx[i__6].r;
		z__1.r = cy[i__5].r + z__2.r, z__1.i = cy[i__5].i + z__2.i;
		cy[i__4].r = z__1.r, cy[i__4].i = z__1.i;
/* L153: */
	    }
/* L154: */
	}
	cfftb(&n, cx, w, ifac);
	dcfftb = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    i__3 = i__ - 1;
	    i__4 = i__ - 1;
	    z__1.r = cx[i__3].r - cy[i__4].r, z__1.i = cx[i__3].i - cy[i__4]
		    .i;
	    d__1 = dcfftb, d__2 = z_abs(&z__1);
	    dcfftb = max(d__1,d__2);
	    i__3 = i__ - 1;
	    i__4 = i__ - 1;
	    cx[i__3].r = cy[i__4].r, cx[i__3].i = cy[i__4].i;
/* L155: */
	}
	cf = 1.0 / fn;
	cfftf(&n, cx, w, ifac);
	cfftb(&n, cx, w, ifac);
	dcfb = 0.0;
	i__2 = n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    i__3 = i__ - 1;
	    z__2.r = cf * cx[i__3].r, z__2.i = cf * cx[i__3].i;
	    i__4 = i__ - 1;
	    z__1.r = z__2.r - cy[i__4].r, z__1.i = z__2.i - cy[i__4].i;
	    d__1 = dcfb, d__2 = z_abs(&z__1);
	    dcfb = max(d__1,d__2);
/* L156: */
	}
	s_wsfe(&io___58);
	do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer));
	do_fio(&c__1, (char *)&rftf, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&rftb, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&rftfb, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&sintt, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&sintfb, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&costt, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&costfb, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&sinqft, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&sinqbt, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&sinqfb, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&cosqft, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&cosqbt, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&cosqfb, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&dezf1, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&dezb1, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&dezfb, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&dcfftf, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&dcfftb, (ftnlen)sizeof(doublereal));
	do_fio(&c__1, (char *)&dcfb, (ftnlen)sizeof(doublereal));
	e_wsfe();
/* L157: */
    }




    return 0;
} /* MAIN__ */
Example #6
0
/* compare results with the regular fftpack */
void pffft_validate_N(int N, int cplx) {
  int Nfloat = N*(cplx?2:1);
  int Nbytes = Nfloat * sizeof(float);
  float *ref, *in, *out, *tmp, *tmp2;
  PFFFT_Setup *s = pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL);
  int pass;

  if (!s) { printf("Skipping N=%d, not supported\n", N); return; }
  ref = pffft_aligned_malloc(Nbytes);
  in = pffft_aligned_malloc(Nbytes);
  out = pffft_aligned_malloc(Nbytes);
  tmp = pffft_aligned_malloc(Nbytes);
  tmp2 = pffft_aligned_malloc(Nbytes);

  for (pass=0; pass < 2; ++pass) {
    float ref_max = 0;
    int k;
    //printf("N=%d pass=%d cplx=%d\n", N, pass, cplx);
    // compute reference solution with FFTPACK
    if (pass == 0) {
      float *wrk = malloc(2*Nbytes+15*sizeof(float));
      for (k=0; k < Nfloat; ++k) {
        ref[k] = in[k] = frand()*2-1; 
        out[k] = 1e30;
      }
      if (!cplx) {
        rffti(N, wrk);
        rfftf(N, ref, wrk);
        // use our ordering for real ffts instead of the one of fftpack
        {
          float refN=ref[N-1];
          for (k=N-2; k >= 1; --k) ref[k+1] = ref[k]; 
          ref[1] = refN;
        }
      } else {
        cffti(N, wrk);
        cfftf(N, ref, wrk);
      }
      free(wrk);
    }

    for (k = 0; k < Nfloat; ++k) ref_max = MAX(ref_max, fabs(ref[k]));

      
    // pass 0 : non canonical ordering of transform coefficients  
    if (pass == 0) {
      // test forward transform, with different input / output
      pffft_transform(s, in, tmp, 0, PFFFT_FORWARD);
      memcpy(tmp2, tmp, Nbytes);
      memcpy(tmp, in, Nbytes);
      pffft_transform(s, tmp, tmp, 0, PFFFT_FORWARD);
      for (k = 0; k < Nfloat; ++k) {
        assert(tmp2[k] == tmp[k]);
      }

      // test reordering
      pffft_zreorder(s, tmp, out, PFFFT_FORWARD);
      pffft_zreorder(s, out, tmp, PFFFT_BACKWARD);
      for (k = 0; k < Nfloat; ++k) {
        assert(tmp2[k] == tmp[k]);
      }
      pffft_zreorder(s, tmp, out, PFFFT_FORWARD);
    } else {
      // pass 1 : canonical ordering of transform coeffs.
      pffft_transform_ordered(s, in, tmp, 0, PFFFT_FORWARD);
      memcpy(tmp2, tmp, Nbytes);
      memcpy(tmp, in, Nbytes);
      pffft_transform_ordered(s, tmp, tmp, 0, PFFFT_FORWARD);
      for (k = 0; k < Nfloat; ++k) {
        assert(tmp2[k] == tmp[k]);
      }
      memcpy(out, tmp, Nbytes);
    }

    {
      for (k=0; k < Nfloat; ++k) {
        if (!(fabs(ref[k] - out[k]) < 1e-3*ref_max)) {
          printf("%s forward PFFFT mismatch found for N=%d\n", (cplx?"CPLX":"REAL"), N);
          exit(1);
        }
      }
        
      if (pass == 0) pffft_transform(s, tmp, out, 0, PFFFT_BACKWARD);
      else   pffft_transform_ordered(s, tmp, out, 0, PFFFT_BACKWARD);
      memcpy(tmp2, out, Nbytes);
      memcpy(out, tmp, Nbytes);
      if (pass == 0) pffft_transform(s, out, out, 0, PFFFT_BACKWARD);
      else   pffft_transform_ordered(s, out, out, 0, PFFFT_BACKWARD);
      for (k = 0; k < Nfloat; ++k) {
        assert(tmp2[k] == out[k]);
        out[k] *= 1.f/N;
      }
      for (k = 0; k < Nfloat; ++k) {
        if (fabs(in[k] - out[k]) > 1e-3 * ref_max) {
          printf("pass=%d, %s IFFFT does not match for N=%d\n", pass, (cplx?"CPLX":"REAL"), N); break;
          exit(1);
        }
      }
    }

    // quick test of the circular convolution in fft domain
    {
      float conv_err = 0, conv_max = 0;

      pffft_zreorder(s, ref, tmp, PFFFT_FORWARD);
      memset(out, 0, Nbytes);
      pffft_zconvolve_accumulate(s, ref, ref, out, 1.0);
      pffft_zreorder(s, out, tmp2, PFFFT_FORWARD);
      
      for (k=0; k < Nfloat; k += 2) {
        float ar = tmp[k], ai=tmp[k+1];
        if (cplx || k > 0) {
          tmp[k] = ar*ar - ai*ai;
          tmp[k+1] = 2*ar*ai;
        } else {
          tmp[0] = ar*ar;
          tmp[1] = ai*ai;
        }
      }
      
      for (k=0; k < Nfloat; ++k) {
        float d = fabs(tmp[k] - tmp2[k]), e = fabs(tmp[k]);
        if (d > conv_err) conv_err = d;
        if (e > conv_max) conv_max = e;
      }
      if (conv_err > 1e-5*conv_max) {
        printf("zconvolve error ? %g %g\n", conv_err, conv_max); exit(1);
      }
    }

  }

  printf("%s PFFFT is OK for N=%d\n", (cplx?"CPLX":"REAL"), N); fflush(stdout);
  
  pffft_destroy_setup(s);
  pffft_aligned_free(ref);
  pffft_aligned_free(in);
  pffft_aligned_free(out);
  pffft_aligned_free(tmp);
  pffft_aligned_free(tmp2);
}
Example #7
0
void benchmark_ffts(int N, int cplx) {
  int Nfloat = (cplx ? N*2 : N);
  int Nbytes = Nfloat * sizeof(float);
  float *X = pffft_aligned_malloc(Nbytes), *Y = pffft_aligned_malloc(Nbytes), *Z = pffft_aligned_malloc(Nbytes);

  double t0, t1, flops;

  int k;
  int max_iter = 5120000/N*4;
#ifdef __arm__
  max_iter /= 4;
#endif
  int iter;

  for (k = 0; k < Nfloat; ++k) {
    X[k] = 0; //sqrtf(k+1);
  }

  // FFTPack benchmark
  {
    float *wrk = malloc(2*Nbytes + 15*sizeof(float));
    int max_iter_ = max_iter/pffft_simd_size(); if (max_iter_ == 0) max_iter_ = 1;
    if (cplx) cffti(N, wrk);
    else      rffti(N, wrk);
    t0 = uclock_sec();  
    
    for (iter = 0; iter < max_iter_; ++iter) {
      if (cplx) {
        cfftf(N, X, wrk);
        cfftb(N, X, wrk);
      } else {
        rfftf(N, X, wrk);
        rfftb(N, X, wrk);
      }
    }
    t1 = uclock_sec();
    free(wrk);
    
    flops = (max_iter_*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html
    show_output("FFTPack", N, cplx, flops, t0, t1, max_iter_);
  }

#ifdef HAVE_VECLIB
  int log2N = (int)(log(N)/log(2) + 0.5f);
  if (N == (1<<log2N)) {
    FFTSetup setup;

    setup = vDSP_create_fftsetup(log2N, FFT_RADIX2);
    DSPSplitComplex zsamples;
    zsamples.realp = &X[0];
    zsamples.imagp = &X[Nfloat/2];
    t0 = uclock_sec();  
    for (iter = 0; iter < max_iter; ++iter) {
      if (cplx) {
        vDSP_fft_zip(setup, &zsamples, 1, log2N, kFFTDirection_Forward);
        vDSP_fft_zip(setup, &zsamples, 1, log2N, kFFTDirection_Inverse);
      } else {
        vDSP_fft_zrip(setup, &zsamples, 1, log2N, kFFTDirection_Forward); 
        vDSP_fft_zrip(setup, &zsamples, 1, log2N, kFFTDirection_Inverse);
      }
    }
    t1 = uclock_sec();
    vDSP_destroy_fftsetup(setup);

    flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html
    show_output("vDSP", N, cplx, flops, t0, t1, max_iter);
  } else {
    show_output("vDSP", N, cplx, -1, -1, -1, -1);
  }
#endif
  
#ifdef HAVE_FFTW
  {
    fftwf_plan planf, planb;
    fftw_complex *in = (fftw_complex*) fftwf_malloc(sizeof(fftw_complex) * N);
    fftw_complex *out = (fftw_complex*) fftwf_malloc(sizeof(fftw_complex) * N);
    memset(in, 0, sizeof(fftw_complex) * N);
    int flags = (N < 40000 ? FFTW_MEASURE : FFTW_ESTIMATE);  // measure takes a lot of time on largest ffts
    //int flags = FFTW_ESTIMATE;
    if (cplx) {
      planf = fftwf_plan_dft_1d(N, (fftwf_complex*)in, (fftwf_complex*)out, FFTW_FORWARD, flags);
      planb = fftwf_plan_dft_1d(N, (fftwf_complex*)in, (fftwf_complex*)out, FFTW_BACKWARD, flags);
    } else {
      planf = fftwf_plan_dft_r2c_1d(N, (float*)in, (fftwf_complex*)out, flags);
      planb = fftwf_plan_dft_c2r_1d(N, (fftwf_complex*)in, (float*)out, flags);
    }

    t0 = uclock_sec();  
    for (iter = 0; iter < max_iter; ++iter) {
      fftwf_execute(planf);
      fftwf_execute(planb);
    }
    t1 = uclock_sec();

    fftwf_destroy_plan(planf);
    fftwf_destroy_plan(planb);
    fftwf_free(in); fftwf_free(out);

    flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html
    show_output((flags == FFTW_MEASURE ? "FFTW (meas.)" : " FFTW (estim)"), N, cplx, flops, t0, t1, max_iter);
  }
#endif  

  // PFFFT benchmark
  {
    PFFFT_Setup *s = pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL);
    if (s) {
      t0 = uclock_sec();  
      for (iter = 0; iter < max_iter; ++iter) {
        pffft_transform(s, X, Z, Y, PFFFT_FORWARD);
        pffft_transform(s, X, Z, Y, PFFFT_BACKWARD);
      }
      t1 = uclock_sec();
      pffft_destroy_setup(s);
    
      flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html
      show_output("PFFFT", N, cplx, flops, t0, t1, max_iter);
    }
  }

  if (!array_output_format) {
    printf("--\n");
  }

  pffft_aligned_free(X);
  pffft_aligned_free(Y);
  pffft_aligned_free(Z);
}