void real_plan_forward_fftpack (real_plan plan, double *data) { if (plan->bluestein) { int m; int n=plan->length; double *tmp = (double *)malloc(2*n*sizeof(double)); for (m=0; m<n; ++m) { tmp[2*m] = data[m]; tmp[2*m+1] = 0.; } bluestein(n,tmp,plan->work,-1); data[0] = tmp[0]; memcpy (data+1, tmp+2, (n-1)*sizeof(double)); free (tmp); } else rfftf (plan->length, data, plan->work); }
void real_plan_forward_c (real_plan plan, double *data) { int m; int n=plan->length; if (plan->bluestein) { for (m=1; m<2*n; m+=2) data[m]=0; bluestein (plan->length, data, plan->work, -1); data[1]=0; for (m=2; m<n; m+=2) { double avg; avg = 0.5*(data[2*n-m]+data[m]); data[2*n-m] = data[m] = avg; avg = 0.5*(data[2*n-m+1]-data[m+1]); data[2*n-m+1] = avg; data[m+1] = -avg; } if ((n&1)==0) data[n+1] = 0.; } else { for (m=0; m<n; ++m) data[m+1] = data[2*m]; rfftf (n, data+1, plan->work); data[0] = data[1]; data[1] = 0; for (m=2; m<n; m+=2) { data[2*n-m] = data[m]; data[2*n-m+1] = -data[m+1]; } if ((n&1)==0) data[n+1] = 0.; } }
void ChromSmoother::smooth_vect_fft( const std::vector<float> & raw_vec, std::vector<float> & out_vec, bool test) { /* if sum_norm : total_weight = sum(weights) weights = weights / total_weight n = len(values) assert(len(weights) % 2 == 1) #create a zero-padded array for both the weights and values #extra padding needed = (len(weights) / 2 )+ 1 N = n + len(weights) / 2 + 1 values_padded = numpy.zeros(N) weights_padded = numpy.zeros(N) values_padded[0:n] = values M = len(weights) M_mid = len(weights) / 2 weights_padded[0] = weights[M_mid] for i in range(1,M_mid) : weights_padded[N-i] = weights[M_mid-i] weights_padded[i] = weights[M_mid+i] convolved_fft = data_fft * weights_fft convolved_real = numpy.fft.irfft(convolved_fft) return convolved_real[:n] void cffti( integer_t *n, real_t *wsave, integer_t *ifac ); */ #ifdef HAVE_FFTPACK int n = raw_vec.size(); //std::basic_ofstream<char> x("foo"); //x.close(); //std::ofstream t1("pre_rfftb.test.tab"); std::cerr << "n % 2" << n % 2 << " , n: " << n << std::endl; const int M = this->weights.size(); assert( M % 2 == 1 ); const int M_mid = M / 2; int N = n + M_mid + 1; std::vector<float> input_padded(raw_vec); for ( int i = raw_vec.size() ; i < N ; i++ ) { input_padded.push_back(0.0f); } std::vector<float> weights_padded(N,0.0f); weights_padded[0] = this->weights[M_mid]; for ( int i = 1 ; i < M_mid ; i++ ) { weights_padded[N-i] = weights[M_mid-i]; weights_padded[i] = weights[M_mid+i]; } float * wsave_weights = (float*)malloc((8*N+15)*sizeof(float)); float * wsave_data = (float*)malloc((8*N+15)*sizeof(float)); float * wsave_back = (float*)malloc((8*N+15)*sizeof(float)); float * fft_prod = (float*)malloc(N*sizeof(float)); std::cerr << "finished padding" << std::endl; int ifac[64]; rffti( N, wsave_data); rffti( N, wsave_weights); rffti( n, wsave_back); rfftf( N, &(input_padded[0]), wsave_data); for ( int i = 0 ; i < N ; i++ ) { input_padded[i] /= N; } if ( true || SG_DALKE_DUMP ) { std::vector<float> output_padded(input_padded); //crawutils::output_vector(t1,output_padded); } //t1.close(); std::cerr << "weights fft 1" << std::endl; rfftf( N, &(weights_padded[0]), wsave_weights); for ( int i = 0 ; i < N ; i++ ) { weights_padded[i] *= N; } //std::vector<float> weights_rev(weights_padded); //rfftb( &N, &(weights_rev[0]), wsave, ifac); //std::ofstream t3("weights_fb.txt"); //crawutils::output_vector(t3,weights_rev); for ( int i = 0; i < N ; i++ ) { fft_prod[i] = input_padded[i] * weights_padded[i]; } std::cerr << "product 1" << std::endl; rfftb( N, fft_prod, wsave_back); std::cerr << "backwards fft" << std::endl; for ( int i = 0 ; i < out_vec.size() ; i++ ) { out_vec[i] = fft_prod[i]; } std::cerr << "out_vec filled" << std::endl; free(wsave_data); free(wsave_weights); free(wsave_back); free(fft_prod); #else throw("Forget about trying to call smooth_vect_fft if you don't have FFTPACK"); #endif }
/* Main program */ int MAIN__(void) { /* Initialized data */ static integer nd[10] = { 120,54,49,32,4,3,2 }; /* Format strings */ static char fmt_1001[] = "(\0020N\002,i5,\002 RFFTF \002,e10.3,\002 RFF" "TB \002,e10.3,\002 RFFTFB \002,e10.3,\002 SINT \002,e10.3," "\002 SINTFB \002,e10.3,\002 COST \002,e10.3/7x,\002 COSTFB " "\002,e10.3,\002 SINQF \002,e10.3,\002 SINQB \002,e10.3,\002 SI" "NQFB \002,e10.3,\002 COSQF \002,e10.3,\002 COSQB \002,e10.3/7x," "\002 COSQFB \002,e10.3,\002 DEZF \002,e10.3,\002 DEZB \002,e" "10.3,\002 DEZFB \002,e10.3,\002 CFFTF \002,e10.3,\002 CFFTB " " \002,e10.3/7x,\002 CFFTFB \002,e10.3)"; /* System generated locals */ integer i__1, i__2, i__3, i__4, i__5, i__6; doublereal d__1, d__2, d__3, d__4; doublecomplex z__1, z__2, z__3; /* Builtin functions */ double sqrt(doublereal), sin(doublereal), cos(doublereal); integer pow_ii(integer *, integer *); double atan(doublereal), z_abs(doublecomplex *); integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ doublereal a[100], b[100]; integer i__, j, k, n; doublereal w[2000], x[200], y[200], ah[100], bh[100], cf, fn, dt, pi; doublecomplex cx[200], cy[200]; doublereal xh[200]; integer nz, nm1, np1, ns2; doublereal arg, tfn, tpi; integer nns; doublereal sum, arg1, arg2; integer ns2m; doublereal sum1, sum2, dcfb; integer ifac[64], modn; doublereal rftb, rftf; extern /* Subroutine */ void cost(integer *, doublereal *, doublereal *, integer *), sint(integer *, doublereal *, doublereal *, integer * ); doublereal dezb1, dezf1, sqrt2; extern /* Subroutine */ void cfftb(integer *, doublecomplex *, doublereal *, integer *), cfftf(integer *, doublecomplex *, doublereal *, integer *); doublereal dezfb; extern /* Subroutine */ void cffti(integer *, doublereal *, integer *), rfftb(integer *, doublereal *, doublereal *, integer *); doublereal rftfb; extern /* Subroutine */ void rfftf(integer *, doublereal *, doublereal *, integer *), cosqb(integer *, doublereal *, doublereal *, integer *), rffti(integer *, doublereal *, integer *), cosqf(integer *, doublereal *, doublereal *, integer *), sinqb(integer *, doublereal *, doublereal *, integer *), cosqi(integer *, doublereal *, integer *), sinqf(integer *, doublereal *, doublereal *, integer *), costi(integer *, doublereal *, integer *); doublereal azero; extern /* Subroutine */ void sinqi(integer *, doublereal *, integer *), sinti(integer *, doublereal *, integer *); doublereal costt, sintt, dcfftb, dcfftf, cosqfb, costfb; extern /* Subroutine */ void ezfftb(integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *); doublereal sinqfb; extern /* Subroutine */ void ezfftf(integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *); doublereal sintfb; extern /* Subroutine */ void ezffti(integer *, doublereal *, integer *); doublereal azeroh, cosqbt, cosqft, sinqbt, sinqft; /* Fortran I/O blocks */ static cilist io___58 = { 0, 6, 0, fmt_1001, 0 }; /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ /* VERSION 4 APRIL 1985 */ /* A TEST DRIVER FOR */ /* A PACKAGE OF FORTRAN SUBPROGRAMS FOR THE FAST FOURIER */ /* TRANSFORM OF PERIODIC AND OTHER SYMMETRIC SEQUENCES */ /* BY */ /* PAUL N SWARZTRAUBER */ /* NATIONAL CENTER FOR ATMOSPHERIC RESEARCH BOULDER,COLORADO 80307 */ /* WHICH IS SPONSORED BY THE NATIONAL SCIENCE FOUNDATION */ /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ /* THIS PROGRAM TESTS THE PACKAGE OF FAST FOURIER */ /* TRANSFORMS FOR BOTH COMPLEX AND REAL PERIODIC SEQUENCES AND */ /* CERTIAN OTHER SYMMETRIC SEQUENCES THAT ARE LISTED BELOW. */ /* 1. RFFTI INITIALIZE RFFTF AND RFFTB */ /* 2. RFFTF FORWARD TRANSFORM OF A REAL PERIODIC SEQUENCE */ /* 3. RFFTB BACKWARD TRANSFORM OF A REAL COEFFICIENT ARRAY */ /* 4. EZFFTI INITIALIZE EZFFTF AND EZFFTB */ /* 5. EZFFTF A SIMPLIFIED REAL PERIODIC FORWARD TRANSFORM */ /* 6. EZFFTB A SIMPLIFIED REAL PERIODIC BACKWARD TRANSFORM */ /* 7. SINTI INITIALIZE SINT */ /* 8. SINT SINE TRANSFORM OF A REAL ODD SEQUENCE */ /* 9. COSTI INITIALIZE COST */ /* 10. COST COSINE TRANSFORM OF A REAL EVEN SEQUENCE */ /* 11. SINQI INITIALIZE SINQF AND SINQB */ /* 12. SINQF FORWARD SINE TRANSFORM WITH ODD WAVE NUMBERS */ /* 13. SINQB UNNORMALIZED INVERSE OF SINQF */ /* 14. COSQI INITIALIZE COSQF AND COSQB */ /* 15. COSQF FORWARD COSINE TRANSFORM WITH ODD WAVE NUMBERS */ /* 16. COSQB UNNORMALIZED INVERSE OF COSQF */ /* 17. CFFTI INITIALIZE CFFTF AND CFFTB */ /* 18. CFFTF FORWARD TRANSFORM OF A COMPLEX PERIODIC SEQUENCE */ /* 19. CFFTB UNNORMALIZED INVERSE OF CFFTF */ sqrt2 = sqrt(2.0); nns = 7; i__1 = nns; for (nz = 1; nz <= i__1; ++nz) { n = nd[nz - 1]; modn = n % 2; fn = (real) n; tfn = fn + fn; np1 = n + 1; nm1 = n - 1; i__2 = np1; for (j = 1; j <= i__2; ++j) { x[j - 1] = sin((real) j * sqrt2); y[j - 1] = x[j - 1]; xh[j - 1] = x[j - 1]; /* L101: */ } /* TEST SUBROUTINES RFFTI,RFFTF AND RFFTB */ rffti(&n, w, ifac); pi = 3.141592653589793238462643383279502884197169399375108209749445923; dt = (pi + pi) / fn; ns2 = (n + 1) / 2; if (ns2 < 2) { goto L104; } i__2 = ns2; for (k = 2; k <= i__2; ++k) { sum1 = 0.0; sum2 = 0.0; arg = (real) (k - 1) * dt; i__3 = n; for (i__ = 1; i__ <= i__3; ++i__) { arg1 = (real) (i__ - 1) * arg; sum1 += x[i__ - 1] * cos(arg1); sum2 += x[i__ - 1] * sin(arg1); /* L102: */ } y[(k << 1) - 3] = sum1; y[(k << 1) - 2] = -sum2; /* L103: */ } L104: sum1 = 0.0; sum2 = 0.0; i__2 = nm1; for (i__ = 1; i__ <= i__2; i__ += 2) { sum1 += x[i__ - 1]; sum2 += x[i__]; /* L105: */ } if (modn == 1) { sum1 += x[n - 1]; } y[0] = sum1 + sum2; if (modn == 0) { y[n - 1] = sum1 - sum2; } rfftf(&n, x, w, ifac); rftf = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = rftf, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1)); rftf = max(d__2,d__3); x[i__ - 1] = xh[i__ - 1]; /* L106: */ } rftf /= fn; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { sum = x[0] * 0.5; arg = (real) (i__ - 1) * dt; if (ns2 < 2) { goto L108; } i__3 = ns2; for (k = 2; k <= i__3; ++k) { arg1 = (real) (k - 1) * arg; sum = sum + x[(k << 1) - 3] * cos(arg1) - x[(k << 1) - 2] * sin(arg1); /* L107: */ } L108: if (modn == 0) { i__3 = i__ - 1; sum += (real) pow_ii(&c_n1, &i__3) * 0.5 * x[n - 1]; } y[i__ - 1] = sum + sum; /* L109: */ } rfftb(&n, x, w, ifac); rftb = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = rftb, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1)); rftb = max(d__2,d__3); x[i__ - 1] = xh[i__ - 1]; y[i__ - 1] = xh[i__ - 1]; /* L110: */ } rfftb(&n, y, w, ifac); rfftf(&n, y, w, ifac); cf = 1.0 / fn; rftfb = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = rftfb, d__3 = (d__1 = cf * y[i__ - 1] - x[i__ - 1], abs( d__1)); rftfb = max(d__2,d__3); /* L111: */ } /* TEST SUBROUTINES SINTI AND SINT */ dt = pi / fn; i__2 = nm1; for (i__ = 1; i__ <= i__2; ++i__) { x[i__ - 1] = xh[i__ - 1]; /* L112: */ } i__2 = nm1; for (i__ = 1; i__ <= i__2; ++i__) { y[i__ - 1] = 0.0; arg1 = (real) i__ * dt; i__3 = nm1; for (k = 1; k <= i__3; ++k) { y[i__ - 1] += x[k - 1] * sin((real) k * arg1); /* L113: */ } y[i__ - 1] += y[i__ - 1]; /* L114: */ } sinti(&nm1, w, ifac); sint(&nm1, x, w, ifac); cf = 0.5 / fn; sintt = 0.0; i__2 = nm1; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = sintt, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1)); sintt = max(d__2,d__3); x[i__ - 1] = xh[i__ - 1]; y[i__ - 1] = x[i__ - 1]; /* L115: */ } sintt = cf * sintt; sint(&nm1, x, w, ifac); sint(&nm1, x, w, ifac); sintfb = 0.0; i__2 = nm1; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = sintfb, d__3 = (d__1 = cf * x[i__ - 1] - y[i__ - 1], abs( d__1)); sintfb = max(d__2,d__3); /* L116: */ } /* TEST SUBROUTINES COSTI AND COST */ i__2 = np1; for (i__ = 1; i__ <= i__2; ++i__) { x[i__ - 1] = xh[i__ - 1]; /* L117: */ } i__2 = np1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + 1; y[i__ - 1] = (x[0] + (real) pow_ii(&c_n1, &i__3) * x[n]) * 0.5; arg = (real) (i__ - 1) * dt; i__3 = n; for (k = 2; k <= i__3; ++k) { y[i__ - 1] += x[k - 1] * cos((real) (k - 1) * arg); /* L118: */ } y[i__ - 1] += y[i__ - 1]; /* L119: */ } costi(&np1, w, ifac); cost(&np1, x, w, ifac); costt = 0.0; i__2 = np1; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = costt, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1)); costt = max(d__2,d__3); x[i__ - 1] = xh[i__ - 1]; y[i__ - 1] = xh[i__ - 1]; /* L120: */ } costt = cf * costt; cost(&np1, x, w, ifac); cost(&np1, x, w, ifac); costfb = 0.0; i__2 = np1; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = costfb, d__3 = (d__1 = cf * x[i__ - 1] - y[i__ - 1], abs( d__1)); costfb = max(d__2,d__3); /* L121: */ } /* TEST SUBROUTINES SINQI,SINQF AND SINQB */ cf = 0.25 / fn; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { y[i__ - 1] = xh[i__ - 1]; /* L122: */ } dt = pi / (fn + fn); i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { x[i__ - 1] = 0.0; arg = dt * (real) i__; i__3 = n; for (k = 1; k <= i__3; ++k) { x[i__ - 1] += y[k - 1] * sin((real) (k + k - 1) * arg); /* L123: */ } x[i__ - 1] *= 4.0; /* L124: */ } sinqi(&n, w, ifac); sinqb(&n, y, w, ifac); sinqbt = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = sinqbt, d__3 = (d__1 = y[i__ - 1] - x[i__ - 1], abs(d__1)); sinqbt = max(d__2,d__3); x[i__ - 1] = xh[i__ - 1]; /* L125: */ } sinqbt = cf * sinqbt; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { arg = (real) (i__ + i__ - 1) * dt; i__3 = i__ + 1; y[i__ - 1] = (real) pow_ii(&c_n1, &i__3) * 0.5 * x[n - 1]; i__3 = nm1; for (k = 1; k <= i__3; ++k) { y[i__ - 1] += x[k - 1] * sin((real) k * arg); /* L126: */ } y[i__ - 1] += y[i__ - 1]; /* L127: */ } sinqf(&n, x, w, ifac); sinqft = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = sinqft, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1)); sinqft = max(d__2,d__3); y[i__ - 1] = xh[i__ - 1]; x[i__ - 1] = xh[i__ - 1]; /* L128: */ } sinqf(&n, y, w, ifac); sinqb(&n, y, w, ifac); sinqfb = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = sinqfb, d__3 = (d__1 = cf * y[i__ - 1] - x[i__ - 1], abs( d__1)); sinqfb = max(d__2,d__3); /* L129: */ } /* TEST SUBROUTINES COSQI,COSQF AND COSQB */ i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { y[i__ - 1] = xh[i__ - 1]; /* L130: */ } i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { x[i__ - 1] = 0.0; arg = (real) (i__ - 1) * dt; i__3 = n; for (k = 1; k <= i__3; ++k) { x[i__ - 1] += y[k - 1] * cos((real) (k + k - 1) * arg); /* L131: */ } x[i__ - 1] *= 4.0; /* L132: */ } cosqi(&n, w, ifac); cosqb(&n, y, w, ifac); cosqbt = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = cosqbt, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1)); cosqbt = max(d__2,d__3); x[i__ - 1] = xh[i__ - 1]; /* L133: */ } cosqbt = cf * cosqbt; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { y[i__ - 1] = x[0] * 0.5; arg = (real) (i__ + i__ - 1) * dt; i__3 = n; for (k = 2; k <= i__3; ++k) { y[i__ - 1] += x[k - 1] * cos((real) (k - 1) * arg); /* L134: */ } y[i__ - 1] += y[i__ - 1]; /* L135: */ } cosqf(&n, x, w, ifac); cosqft = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = cosqft, d__3 = (d__1 = y[i__ - 1] - x[i__ - 1], abs(d__1)); cosqft = max(d__2,d__3); x[i__ - 1] = xh[i__ - 1]; y[i__ - 1] = xh[i__ - 1]; /* L136: */ } cosqft = cf * cosqft; cosqb(&n, x, w, ifac); cosqf(&n, x, w, ifac); cosqfb = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = cosqfb, d__3 = (d__1 = cf * x[i__ - 1] - y[i__ - 1], abs( d__1)); cosqfb = max(d__2,d__3); /* L137: */ } /* TEST PROGRAMS EZFFTI,EZFFTF,EZFFTB */ ezffti(&n, w, ifac); i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { x[i__ - 1] = xh[i__ - 1]; /* L138: */ } tpi = atan(1.0) * 8.0; dt = tpi / (real) n; ns2 = (n + 1) / 2; cf = 2.0 / (real) n; ns2m = ns2 - 1; if (ns2m <= 0) { goto L141; } i__2 = ns2m; for (k = 1; k <= i__2; ++k) { sum1 = 0.0; sum2 = 0.0; arg = (real) k * dt; i__3 = n; for (i__ = 1; i__ <= i__3; ++i__) { arg1 = (real) (i__ - 1) * arg; sum1 += x[i__ - 1] * cos(arg1); sum2 += x[i__ - 1] * sin(arg1); /* L139: */ } a[k - 1] = cf * sum1; b[k - 1] = cf * sum2; /* L140: */ } L141: nm1 = n - 1; sum1 = 0.0; sum2 = 0.0; i__2 = nm1; for (i__ = 1; i__ <= i__2; i__ += 2) { sum1 += x[i__ - 1]; sum2 += x[i__]; /* L142: */ } if (modn == 1) { sum1 += x[n - 1]; } azero = cf * 0.5 * (sum1 + sum2); if (modn == 0) { a[ns2 - 1] = cf * 0.5 * (sum1 - sum2); } ezfftf(&n, x, &azeroh, ah, bh, w, ifac); dezf1 = (d__1 = azeroh - azero, abs(d__1)); if (modn == 0) { /* Computing MAX */ d__2 = dezf1, d__3 = (d__1 = a[ns2 - 1] - ah[ns2 - 1], abs(d__1)); dezf1 = max(d__2,d__3); } if (ns2m <= 0) { goto L144; } i__2 = ns2m; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__3 = dezf1, d__4 = (d__1 = ah[i__ - 1] - a[i__ - 1], abs(d__1)), d__3 = max(d__3,d__4), d__4 = (d__2 = bh[i__ - 1] - b[ i__ - 1], abs(d__2)); dezf1 = max(d__3,d__4); /* L143: */ } L144: ns2 = n / 2; if (modn == 0) { b[ns2 - 1] = 0.0; } i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { sum = azero; arg1 = (real) (i__ - 1) * dt; i__3 = ns2; for (k = 1; k <= i__3; ++k) { arg2 = (real) k * arg1; sum = sum + a[k - 1] * cos(arg2) + b[k - 1] * sin(arg2); /* L145: */ } x[i__ - 1] = sum; /* L146: */ } ezfftb(&n, y, &azero, a, b, w, ifac); dezb1 = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = dezb1, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1)); dezb1 = max(d__2,d__3); x[i__ - 1] = xh[i__ - 1]; /* L147: */ } ezfftf(&n, x, &azero, a, b, w, ifac); ezfftb(&n, y, &azero, a, b, w, ifac); dezfb = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = dezfb, d__3 = (d__1 = x[i__ - 1] - y[i__ - 1], abs(d__1)); dezfb = max(d__2,d__3); /* L148: */ } /* TEST CFFTI,CFFTF,CFFTB */ i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ - 1; d__1 = cos(sqrt2 * (real) i__); d__2 = sin(sqrt2 * (real) (i__ * i__)); z__1.r = d__1, z__1.i = d__2; cx[i__3].r = z__1.r, cx[i__3].i = z__1.i; /* L149: */ } dt = (pi + pi) / fn; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { arg1 = -((real) (i__ - 1)) * dt; i__3 = i__ - 1; cy[i__3].r = 0.0, cy[i__3].i = 0.0; i__3 = n; for (k = 1; k <= i__3; ++k) { arg2 = (real) (k - 1) * arg1; i__4 = i__ - 1; i__5 = i__ - 1; d__1 = cos(arg2); d__2 = sin(arg2); z__3.r = d__1, z__3.i = d__2; i__6 = k - 1; z__2.r = z__3.r * cx[i__6].r - z__3.i * cx[i__6].i, z__2.i = z__3.r * cx[i__6].i + z__3.i * cx[i__6].r; z__1.r = cy[i__5].r + z__2.r, z__1.i = cy[i__5].i + z__2.i; cy[i__4].r = z__1.r, cy[i__4].i = z__1.i; /* L150: */ } /* L151: */ } cffti(&n, w, ifac); cfftf(&n, cx, w, ifac); dcfftf = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ i__3 = i__ - 1; i__4 = i__ - 1; z__1.r = cx[i__3].r - cy[i__4].r, z__1.i = cx[i__3].i - cy[i__4] .i; d__1 = dcfftf, d__2 = z_abs(&z__1); dcfftf = max(d__1,d__2); i__3 = i__ - 1; i__4 = i__ - 1; z__1.r = cx[i__4].r / fn, z__1.i = cx[i__4].i / fn; cx[i__3].r = z__1.r, cx[i__3].i = z__1.i; /* L152: */ } dcfftf /= fn; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { arg1 = (real) (i__ - 1) * dt; i__3 = i__ - 1; cy[i__3].r = 0.0, cy[i__3].i = 0.0; i__3 = n; for (k = 1; k <= i__3; ++k) { arg2 = (real) (k - 1) * arg1; i__4 = i__ - 1; i__5 = i__ - 1; d__1 = cos(arg2); d__2 = sin(arg2); z__3.r = d__1, z__3.i = d__2; i__6 = k - 1; z__2.r = z__3.r * cx[i__6].r - z__3.i * cx[i__6].i, z__2.i = z__3.r * cx[i__6].i + z__3.i * cx[i__6].r; z__1.r = cy[i__5].r + z__2.r, z__1.i = cy[i__5].i + z__2.i; cy[i__4].r = z__1.r, cy[i__4].i = z__1.i; /* L153: */ } /* L154: */ } cfftb(&n, cx, w, ifac); dcfftb = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ i__3 = i__ - 1; i__4 = i__ - 1; z__1.r = cx[i__3].r - cy[i__4].r, z__1.i = cx[i__3].i - cy[i__4] .i; d__1 = dcfftb, d__2 = z_abs(&z__1); dcfftb = max(d__1,d__2); i__3 = i__ - 1; i__4 = i__ - 1; cx[i__3].r = cy[i__4].r, cx[i__3].i = cy[i__4].i; /* L155: */ } cf = 1.0 / fn; cfftf(&n, cx, w, ifac); cfftb(&n, cx, w, ifac); dcfb = 0.0; i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ i__3 = i__ - 1; z__2.r = cf * cx[i__3].r, z__2.i = cf * cx[i__3].i; i__4 = i__ - 1; z__1.r = z__2.r - cy[i__4].r, z__1.i = z__2.i - cy[i__4].i; d__1 = dcfb, d__2 = z_abs(&z__1); dcfb = max(d__1,d__2); /* L156: */ } s_wsfe(&io___58); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&rftf, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&rftb, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&rftfb, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&sintt, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&sintfb, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&costt, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&costfb, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&sinqft, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&sinqbt, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&sinqfb, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&cosqft, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&cosqbt, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&cosqfb, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&dezf1, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&dezb1, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&dezfb, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&dcfftf, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&dcfftb, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&dcfb, (ftnlen)sizeof(doublereal)); e_wsfe(); /* L157: */ } return 0; } /* MAIN__ */
PyObject * fftpack_rfftf(PyObject *NPY_UNUSED(self), PyObject *args) { PyObject *op1, *op2; PyArrayObject *data, *ret; PyArray_Descr *descr; double *wsave, *dptr, *rptr; npy_intp nsave; int npts, nrepeats, i, rstep; if(!PyArg_ParseTuple(args, "OO", &op1, &op2)) { return NULL; } data = (PyArrayObject *)PyArray_ContiguousFromObject(op1, NPY_DOUBLE, 1, 0); if (data == NULL) { return NULL; } /* FIXME, direct access changing contents of data->dimensions */ npts = PyArray_DIM(data, PyArray_NDIM(data) - 1); PyArray_DIMS(data)[PyArray_NDIM(data) - 1] = npts/2 + 1; ret = (PyArrayObject *)PyArray_Zeros(PyArray_NDIM(data), PyArray_DIMS(data), PyArray_DescrFromType(NPY_CDOUBLE), 0); PyArray_DIMS(data)[PyArray_NDIM(data) - 1] = npts; rstep = PyArray_DIM(ret, PyArray_NDIM(ret) - 1)*2; descr = PyArray_DescrFromType(NPY_DOUBLE); if (PyArray_AsCArray(&op2, (void *)&wsave, &nsave, 1, descr) == -1) { goto fail; } if (data == NULL || ret == NULL) { goto fail; } if (nsave != npts*2+15) { PyErr_SetString(ErrorObject, "invalid work array for fft size"); goto fail; } nrepeats = PyArray_SIZE(data)/npts; rptr = (double *)PyArray_DATA(ret); dptr = (double *)PyArray_DATA(data); NPY_SIGINT_ON; for (i = 0; i < nrepeats; i++) { memcpy((char *)(rptr+1), dptr, npts*sizeof(double)); rfftf(npts, rptr+1, wsave); rptr[0] = rptr[1]; rptr[1] = 0.0; rptr += rstep; dptr += npts; } NPY_SIGINT_OFF; PyArray_Free(op2, (char *)wsave); Py_DECREF(data); return (PyObject *)ret; fail: PyArray_Free(op2, (char *)wsave); Py_XDECREF(data); Py_XDECREF(ret); return NULL; }
/* compare results with the regular fftpack */ void pffft_validate_N(int N, int cplx) { int Nfloat = N*(cplx?2:1); int Nbytes = Nfloat * sizeof(float); float *ref, *in, *out, *tmp, *tmp2; PFFFT_Setup *s = pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); int pass; if (!s) { printf("Skipping N=%d, not supported\n", N); return; } ref = pffft_aligned_malloc(Nbytes); in = pffft_aligned_malloc(Nbytes); out = pffft_aligned_malloc(Nbytes); tmp = pffft_aligned_malloc(Nbytes); tmp2 = pffft_aligned_malloc(Nbytes); for (pass=0; pass < 2; ++pass) { float ref_max = 0; int k; //printf("N=%d pass=%d cplx=%d\n", N, pass, cplx); // compute reference solution with FFTPACK if (pass == 0) { float *wrk = malloc(2*Nbytes+15*sizeof(float)); for (k=0; k < Nfloat; ++k) { ref[k] = in[k] = frand()*2-1; out[k] = 1e30; } if (!cplx) { rffti(N, wrk); rfftf(N, ref, wrk); // use our ordering for real ffts instead of the one of fftpack { float refN=ref[N-1]; for (k=N-2; k >= 1; --k) ref[k+1] = ref[k]; ref[1] = refN; } } else { cffti(N, wrk); cfftf(N, ref, wrk); } free(wrk); } for (k = 0; k < Nfloat; ++k) ref_max = MAX(ref_max, fabs(ref[k])); // pass 0 : non canonical ordering of transform coefficients if (pass == 0) { // test forward transform, with different input / output pffft_transform(s, in, tmp, 0, PFFFT_FORWARD); memcpy(tmp2, tmp, Nbytes); memcpy(tmp, in, Nbytes); pffft_transform(s, tmp, tmp, 0, PFFFT_FORWARD); for (k = 0; k < Nfloat; ++k) { assert(tmp2[k] == tmp[k]); } // test reordering pffft_zreorder(s, tmp, out, PFFFT_FORWARD); pffft_zreorder(s, out, tmp, PFFFT_BACKWARD); for (k = 0; k < Nfloat; ++k) { assert(tmp2[k] == tmp[k]); } pffft_zreorder(s, tmp, out, PFFFT_FORWARD); } else { // pass 1 : canonical ordering of transform coeffs. pffft_transform_ordered(s, in, tmp, 0, PFFFT_FORWARD); memcpy(tmp2, tmp, Nbytes); memcpy(tmp, in, Nbytes); pffft_transform_ordered(s, tmp, tmp, 0, PFFFT_FORWARD); for (k = 0; k < Nfloat; ++k) { assert(tmp2[k] == tmp[k]); } memcpy(out, tmp, Nbytes); } { for (k=0; k < Nfloat; ++k) { if (!(fabs(ref[k] - out[k]) < 1e-3*ref_max)) { printf("%s forward PFFFT mismatch found for N=%d\n", (cplx?"CPLX":"REAL"), N); exit(1); } } if (pass == 0) pffft_transform(s, tmp, out, 0, PFFFT_BACKWARD); else pffft_transform_ordered(s, tmp, out, 0, PFFFT_BACKWARD); memcpy(tmp2, out, Nbytes); memcpy(out, tmp, Nbytes); if (pass == 0) pffft_transform(s, out, out, 0, PFFFT_BACKWARD); else pffft_transform_ordered(s, out, out, 0, PFFFT_BACKWARD); for (k = 0; k < Nfloat; ++k) { assert(tmp2[k] == out[k]); out[k] *= 1.f/N; } for (k = 0; k < Nfloat; ++k) { if (fabs(in[k] - out[k]) > 1e-3 * ref_max) { printf("pass=%d, %s IFFFT does not match for N=%d\n", pass, (cplx?"CPLX":"REAL"), N); break; exit(1); } } } // quick test of the circular convolution in fft domain { float conv_err = 0, conv_max = 0; pffft_zreorder(s, ref, tmp, PFFFT_FORWARD); memset(out, 0, Nbytes); pffft_zconvolve_accumulate(s, ref, ref, out, 1.0); pffft_zreorder(s, out, tmp2, PFFFT_FORWARD); for (k=0; k < Nfloat; k += 2) { float ar = tmp[k], ai=tmp[k+1]; if (cplx || k > 0) { tmp[k] = ar*ar - ai*ai; tmp[k+1] = 2*ar*ai; } else { tmp[0] = ar*ar; tmp[1] = ai*ai; } } for (k=0; k < Nfloat; ++k) { float d = fabs(tmp[k] - tmp2[k]), e = fabs(tmp[k]); if (d > conv_err) conv_err = d; if (e > conv_max) conv_max = e; } if (conv_err > 1e-5*conv_max) { printf("zconvolve error ? %g %g\n", conv_err, conv_max); exit(1); } } } printf("%s PFFFT is OK for N=%d\n", (cplx?"CPLX":"REAL"), N); fflush(stdout); pffft_destroy_setup(s); pffft_aligned_free(ref); pffft_aligned_free(in); pffft_aligned_free(out); pffft_aligned_free(tmp); pffft_aligned_free(tmp2); }
void benchmark_ffts(int N, int cplx) { int Nfloat = (cplx ? N*2 : N); int Nbytes = Nfloat * sizeof(float); float *X = pffft_aligned_malloc(Nbytes), *Y = pffft_aligned_malloc(Nbytes), *Z = pffft_aligned_malloc(Nbytes); double t0, t1, flops; int k; int max_iter = 5120000/N*4; #ifdef __arm__ max_iter /= 4; #endif int iter; for (k = 0; k < Nfloat; ++k) { X[k] = 0; //sqrtf(k+1); } // FFTPack benchmark { float *wrk = malloc(2*Nbytes + 15*sizeof(float)); int max_iter_ = max_iter/pffft_simd_size(); if (max_iter_ == 0) max_iter_ = 1; if (cplx) cffti(N, wrk); else rffti(N, wrk); t0 = uclock_sec(); for (iter = 0; iter < max_iter_; ++iter) { if (cplx) { cfftf(N, X, wrk); cfftb(N, X, wrk); } else { rfftf(N, X, wrk); rfftb(N, X, wrk); } } t1 = uclock_sec(); free(wrk); flops = (max_iter_*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html show_output("FFTPack", N, cplx, flops, t0, t1, max_iter_); } #ifdef HAVE_VECLIB int log2N = (int)(log(N)/log(2) + 0.5f); if (N == (1<<log2N)) { FFTSetup setup; setup = vDSP_create_fftsetup(log2N, FFT_RADIX2); DSPSplitComplex zsamples; zsamples.realp = &X[0]; zsamples.imagp = &X[Nfloat/2]; t0 = uclock_sec(); for (iter = 0; iter < max_iter; ++iter) { if (cplx) { vDSP_fft_zip(setup, &zsamples, 1, log2N, kFFTDirection_Forward); vDSP_fft_zip(setup, &zsamples, 1, log2N, kFFTDirection_Inverse); } else { vDSP_fft_zrip(setup, &zsamples, 1, log2N, kFFTDirection_Forward); vDSP_fft_zrip(setup, &zsamples, 1, log2N, kFFTDirection_Inverse); } } t1 = uclock_sec(); vDSP_destroy_fftsetup(setup); flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html show_output("vDSP", N, cplx, flops, t0, t1, max_iter); } else { show_output("vDSP", N, cplx, -1, -1, -1, -1); } #endif #ifdef HAVE_FFTW { fftwf_plan planf, planb; fftw_complex *in = (fftw_complex*) fftwf_malloc(sizeof(fftw_complex) * N); fftw_complex *out = (fftw_complex*) fftwf_malloc(sizeof(fftw_complex) * N); memset(in, 0, sizeof(fftw_complex) * N); int flags = (N < 40000 ? FFTW_MEASURE : FFTW_ESTIMATE); // measure takes a lot of time on largest ffts //int flags = FFTW_ESTIMATE; if (cplx) { planf = fftwf_plan_dft_1d(N, (fftwf_complex*)in, (fftwf_complex*)out, FFTW_FORWARD, flags); planb = fftwf_plan_dft_1d(N, (fftwf_complex*)in, (fftwf_complex*)out, FFTW_BACKWARD, flags); } else { planf = fftwf_plan_dft_r2c_1d(N, (float*)in, (fftwf_complex*)out, flags); planb = fftwf_plan_dft_c2r_1d(N, (fftwf_complex*)in, (float*)out, flags); } t0 = uclock_sec(); for (iter = 0; iter < max_iter; ++iter) { fftwf_execute(planf); fftwf_execute(planb); } t1 = uclock_sec(); fftwf_destroy_plan(planf); fftwf_destroy_plan(planb); fftwf_free(in); fftwf_free(out); flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html show_output((flags == FFTW_MEASURE ? "FFTW (meas.)" : " FFTW (estim)"), N, cplx, flops, t0, t1, max_iter); } #endif // PFFFT benchmark { PFFFT_Setup *s = pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); if (s) { t0 = uclock_sec(); for (iter = 0; iter < max_iter; ++iter) { pffft_transform(s, X, Z, Y, PFFFT_FORWARD); pffft_transform(s, X, Z, Y, PFFFT_BACKWARD); } t1 = uclock_sec(); pffft_destroy_setup(s); flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html show_output("PFFFT", N, cplx, flops, t0, t1, max_iter); } } if (!array_output_format) { printf("--\n"); } pffft_aligned_free(X); pffft_aligned_free(Y); pffft_aligned_free(Z); }