int dg_DGR2HCConfig_init(DGR2HCConfig *config, int rank, int dims[3], int howmany) { int i, sz; fftw_r2r_kind *r2hckinds = NULL; fftw_r2r_kind *hc2rkinds = NULL; config->rank = rank; r2hckinds = (fftw_r2r_kind*)malloc(rank*sizeof(fftw_r2r_kind)); hc2rkinds = (fftw_r2r_kind*)malloc(rank*sizeof(fftw_r2r_kind)); sz = 1; for (i=0; i<rank; ++i) { sz *= dims[i]; config->dims[i] = dims[i]; r2hckinds[i] = FFTW_R2HC; hc2rkinds[i] = FFTW_HC2R; } config->howmany = howmany; config->sz = sz; config->rdata = (double*)fftw_malloc((sz*howmany)*sizeof(double)); config->hcdata = (double*)fftw_malloc((sz*howmany)*sizeof(double)); config->r2hc_plan = fftw_plan_many_r2r(rank, // rank config->dims, // n howmany, // howmany config->rdata, // in NULL, // inembed 1, // istride sz, // idist config->hcdata, NULL, 1, sz, r2hckinds, FFTW_ESTIMATE); config->hc2r_plan = fftw_plan_many_r2r(rank, // rank config->dims, // n howmany, // howmany config->hcdata, // in NULL, // inembed 1, // istride sz, // idist config->rdata, NULL, 1, sz, hc2rkinds, FFTW_ESTIMATE); free(r2hckinds); free(hc2rkinds); return 0; }
void sinefft(struct poisson* thePoisson){ int i,j,k; double *in=thePoisson->density; const int N_element=thePoisson->N_z_glob; int N_k=thePoisson->N_k; int N_r_glob=thePoisson->N_r_glob; int N_z_glob=thePoisson->N_z_glob; const double uni=sqrt(0.5*(N_element+1)); /* double * sine_buf=thePoisson->shortbuffer; fftw_plan p; for(j=0;j<N_k;j++) for(i=0;i<N_r_glob;i++){ for(k=0;k<N_z_glob;k++) sine_buf[k]=in[in_lookup(thePoisson,k,i,j)]; p=fftw_plan_r2r_1d(N_element, sine_buf,sine_buf,FFTW_RODFT00,FFTW_ESTIMATE); fftw_execute(p); for(k=0;k<N_z_glob;k++) in[in_lookup(thePoisson,k,i,j)]=sine_buf[k]/2.0/uni; fftw_destroy_plan(p); } */ double * sine_buf=thePoisson->buffer; int idx=0; for(j=0;j<N_k;j++) for(i=0;i<N_r_glob;i++) for(k=0;k<N_z_glob;k++) sine_buf[idx++]=in[in_lookup(thePoisson,k,i,j)]; const int n[1]={N_element}; const fftw_r2r_kind kind[1]={FFTW_RODFT00}; fftw_plan p=fftw_plan_many_r2r(1,n,N_k*N_r_glob,sine_buf,n,1,N_element,sine_buf,n,1,N_element,kind,FFTW_ESTIMATE); fftw_execute(p); fftw_destroy_plan(p); idx=0; i=0;j=0;k=0; for(j=0;j<N_k;j++) for(i=0;i<N_z_glob;i++) for(k=0;k<N_z_glob;k++) in[in_lookup(thePoisson,k,i,j)]=sine_buf[idx++]/2.0/uni; fftw_cleanup(); //nnote: in and out should have the same size;same size after sine transform }
fftw_plan nl_createplan (lua_State *L, nl_Matrix *m, int inverse, unsigned flags, lua_Number *scale) { fftw_plan plan; int i; nl_Buffer *dim = nl_getbuffer(L, m->ndims); for (i = 0; i < m->ndims; i++) /* reverse dims */ dim->data.bint[i] = m->dim[m->ndims - 1 - i]; *scale = 1.0 / m->size; if (m->iscomplex) { /* fft plan? */ /* in-place, howmany == 1, dist ignored, nembed == n */ plan = fftw_plan_many_dft(m->ndims, (const int *) dim->data.bint, 1, (fftw_complex *) m->data, NULL, m->stride, 0, (fftw_complex *) m->data, NULL, m->stride, 0, inverse ? FFTW_BACKWARD : FFTW_FORWARD, flags); } else { /* fct plan? */ nl_Buffer *kind = nl_getbuffer(L, m->ndims); if (inverse) { for (i = 0; i < m->ndims; i++) { kind->data.bint[i] = FFTW_REDFT01; *scale *= 0.5; } } else { for (i = 0; i < m->ndims; i++) kind->data.bint[i] = FFTW_REDFT10; } /* in-place, howmany == 1, dist ignored, nembed == n */ plan = fftw_plan_many_r2r(m->ndims, (const int *) dim->data.bint, 1, m->data, NULL, m->stride, 0, m->data, NULL, m->stride, 0, (const fftw_r2r_kind *) kind->data.bint, flags); nl_freebuffer(kind); } nl_freebuffer(dim); return plan; }
int main(void) { /* This example shows the use of the fast polynomial transform to evaluate a * finite expansion in Legendre polynomials, * * f(x) = a_0 P_0(x) + a_1 P_1(x) + ... + a_N P_N(x) (1) * * at the Chebyshev nodes x_j = cos(j*pi/N), j=0,1,...,N. */ const int N = 8; /* An fpt_set is a data structure that contains precomputed data for a number * of different polynomial transforms. Here, we need only one transform. the * second parameter (t) is the exponent of the maximum transform size desired * (2^t), i.e., t = 3 means that N in (1) can be at most N = 8. */ fpt_set set = fpt_init(1,lrint(ceil(log2((double)N))),0U); /* Three-term recurrence coefficients for Legendre polynomials */ double *alpha = malloc((N+2)*sizeof(double)), *beta = malloc((N+2)*sizeof(double)), *gamma = malloc((N+2)*sizeof(double)); /* alpha[0] and beta[0] are not referenced. */ alpha[0] = beta[0] = 0.0; /* gamma[0] contains the value of P_0(x) (which is a constant). */ gamma[0] = 1.0; /* Actual three-term recurrence coefficients for Legendre polynomials */ { int k; for (k = 0; k <= N; k++) { alpha[k+1] = ((double)(2*k+1))/((double)(k+1)); beta[k+1] = 0.0; gamma[k+1] = -((double)(k))/((double)(k+1)); } } printf( "Computing a fast polynomial transform (FPT) and a fast discrete cosine \n" "transform (DCT) to evaluate\n\n" " f_j = a_0 P_0(x_j) + a_1 P_1(x_j) + ... + a_N P_N(x_j), j=0,1,...,N,\n\n" "with N=%d, x_j = cos(j*pi/N), j=0,1,...N, the Chebyshev nodes, a_k,\n" "k=0,1,...,N, random Fourier coefficients in [-1,1]x[-1,1]*I, and P_k,\n" "k=0,1,...,N, the Legendre polynomials.",N ); /* Random seed, makes things reproducible. */ nfft_srand48(314); /* The function fpt_repcompute actually does the precomputation for a single * transform. It needs arrays alpha, beta, and gamma, containing the three- * term recurrence coefficients, here of the Legendre polynomials. The format * is explained above. The sixth parameter (k_start) is where the index in the * linear combination (1) starts, here k_start=0. The seventh parameter * (kappa) is the threshold which has an influence on the accuracy of the fast * polynomial transform. Usually, kappa = 1000 is a good choice. */ fpt_precompute(set,0,alpha,beta,gamma,0,1000.0); { /* Arrays for Fourier coefficients and function values. */ double _Complex *a = malloc((N+1)*sizeof(double _Complex)); double _Complex *b = malloc((N+1)*sizeof(double _Complex)); double *f = malloc((N+1)*sizeof(double _Complex)); /* Plan for discrete cosine transform */ const int NP1 = N + 1; fftw_r2r_kind kind = FFTW_REDFT00; fftw_plan p = fftw_plan_many_r2r(1, &NP1, 1, (double*)b, NULL, 2, 1, (double*)f, NULL, 1, 1, &kind, 0U); /* random Fourier coefficients */ { int k; printf("\n2) Random Fourier coefficients a_k, k=0,1,...,N:\n"); for (k = 0; k <= N; k++) { a[k] = 2.0*X(drand48)() - 1.0; /* for debugging: use k+1 */ printf(" a_%-2d = %+5.3lE\n",k,creal(a[k])); } } /* fast polynomial transform */ fpt_trafo(set,0,a,b,N,0U); /* Renormalize coefficients b_j, j=1,2,...,N-1 owing to how FFTW defines a * DCT-I; see * http://www.fftw.org/fftw3_doc/1d-Real_002deven-DFTs-_0028DCTs_0029.html * for details */ { int j; for (j = 1; j < N; j++) b[j] *= 0.5; } /* discrete cosine transform */ fftw_execute(p); { int j; printf("\n3) Function values f_j, j=1,1,...,M:\n"); for (j = 0; j <= N; j++) printf(" f_%-2d = %+5.3lE\n",j,f[j]); } /* cleanup */ free(a); free(b); free(f); /* cleanup */ fftw_destroy_plan(p); } /* cleanup */ fpt_finalize(set); free(alpha); free(beta); free(gamma); return EXIT_SUCCESS; }
Transformer::Transformer(DataLayout const& lay, unsigned int fftw_flags) : datalayout(lay), FFT_norm_factor(1.0/static_cast<double>(datalayout.sizex*datalayout.sizey)), FFTx_norm_factor(1.0/static_cast<double>(datalayout.sizex)), FFTy_norm_factor(1.0/static_cast<double>(datalayout.sizey)), DSCT_norm_factor(1.0/static_cast<double>(4*datalayout.sizex*datalayout.sizey)), DSCTx_norm_factor(1.0/static_cast<double>(2*datalayout.sizex)), DSCTy_norm_factor(1.0/static_cast<double>(2*datalayout.sizey)) { const int sx = static_cast<int>(datalayout.sizex); const int sy = static_cast<int>(datalayout.sizey); const double multiplier_x = M_PI/datalayout.lenx; const double multiplier_y = M_PI/datalayout.leny; // Compute frequency values d_fft_kx = new double[datalayout.sizex]; d_fft_ky = new double[datalayout.sizey]; d_dsct_kx = new double[datalayout.sizex]; d_dsct_ky = new double[datalayout.sizey]; for (int x=0; x < sx; x++) { d_fft_kx[x] = static_cast<double>((x < sx/2)? x : x-sx)*2*multiplier_x; d_dsct_kx[x] = static_cast<double>(x+1)*multiplier_x; } for (int y=0; y < sy; y++) { d_fft_ky[y] = static_cast<double>((y < sy/2)? y : y-sy)*2*multiplier_y; d_dsct_ky[y] = static_cast<double>(y+1)*multiplier_y; } // Initialize FFTW plans plans = new fftw_plan[num_transform_types]; // Allocate a temporary data array. This is needed for computing the optimal plans. fftw_complex* const fftw_data = reinterpret_cast<fftw_complex*>(fftw_malloc(sx*sy*sizeof(comp))); double* const real_data = reinterpret_cast<double*>(fftw_data); // plans for plain FFT plans[FFT] = fftw_plan_dft_2d(sy, sx, fftw_data, fftw_data, FFTW_FORWARD, fftw_flags); plans[iFFT] = fftw_plan_dft_2d(sy, sx, fftw_data, fftw_data, FFTW_BACKWARD, fftw_flags); plans[FFTx] = fftw_plan_many_dft(1, &sx, sy, fftw_data, NULL, 1, sx, fftw_data, NULL, 1, sx, FFTW_FORWARD, fftw_flags); plans[iFFTx] = fftw_plan_many_dft(1, &sx, sy, fftw_data, NULL, 1, sx, fftw_data, NULL, 1, sx, FFTW_BACKWARD, fftw_flags); plans[FFTy] = fftw_plan_many_dft(1, &sy, sx, fftw_data, NULL, sx, 1, fftw_data, NULL, sx, 1, FFTW_FORWARD, fftw_flags); plans[iFFTy] = fftw_plan_many_dft(1, &sy, sx, fftw_data, NULL, sx, 1, fftw_data, NULL, sx, 1, FFTW_BACKWARD, fftw_flags); // For the sine and cosine transforms separately for the real and imaginary // part we need to fiddle with the guru interface of FFTW. Some new // variables are needed for describing the complex loops involved. // Do not try to understand this code without first understanding what fftw_plan_guru does, // please refer to the FFTW documentation for that. const fftw_r2r_kind DST_kind[] = {FFTW_RODFT10, FFTW_RODFT10}; const fftw_r2r_kind IDST_kind[] = {FFTW_RODFT01, FFTW_RODFT01}; const fftw_r2r_kind DCT_kind[] = {FFTW_REDFT10, FFTW_REDFT10}; const fftw_r2r_kind IDCT_kind[] = {FFTW_REDFT01, FFTW_REDFT01}; const int n[] = {sy, sx}; fftw_iodim dimsx[1], dimsy[1], loopsx[2], loopsy[2]; // x-transform loop setup dimsx[0].n = sx; dimsx[0].is = 2; dimsx[0].os = 2; loopsx[0].n = sy; loopsx[0].is = 2*sx; loopsx[0].os = 2*sx; loopsx[1].n = 2; loopsx[1].is = 1; loopsx[1].os = 1; // y-transform loop setup dimsy[0].n = sy; dimsy[0].is = 2*sx; dimsy[0].os = 2*sx; loopsy[0].n = sx; loopsy[0].is = 2; loopsy[0].os = 2; loopsy[1].n = 2; loopsy[1].is = 1; loopsy[1].os = 1; // DST plans plans[DST] = fftw_plan_many_r2r(2, n, 2, real_data, NULL, 2, 1, real_data, NULL, 2, 1, DST_kind, fftw_flags); plans[iDST] = fftw_plan_many_r2r(2, n, 2, real_data, NULL, 2, 1, real_data, NULL, 2, 1, IDST_kind, fftw_flags); plans[DSTx] = fftw_plan_guru_r2r(1, dimsx, 2, loopsx, real_data, real_data, DST_kind, fftw_flags); plans[iDSTx] = fftw_plan_guru_r2r(1, dimsx, 2, loopsx, real_data, real_data, IDST_kind, fftw_flags); plans[DSTy] = fftw_plan_guru_r2r(1, dimsy, 2, loopsy, real_data, real_data, DST_kind, fftw_flags); plans[iDSTy] = fftw_plan_guru_r2r(1, dimsy, 2, loopsy, real_data, real_data, IDST_kind, fftw_flags); // DCT plans plans[DCT] = fftw_plan_many_r2r(2, n, 2, real_data, NULL, 2, 1, real_data, NULL, 2, 1, DCT_kind, fftw_flags); plans[iDCT] = fftw_plan_many_r2r(2, n, 2, real_data, NULL, 2, 1, real_data, NULL, 2, 1, IDCT_kind, fftw_flags); plans[DCTx] = fftw_plan_guru_r2r(1, dimsx, 2, loopsx, real_data, real_data, DCT_kind, fftw_flags); plans[iDCTx] = fftw_plan_guru_r2r(1, dimsx, 2, loopsx, real_data, real_data, IDCT_kind, fftw_flags); plans[DCTy] = fftw_plan_guru_r2r(1, dimsy, 2, loopsy, real_data, real_data, DCT_kind, fftw_flags); plans[iDCTy] = fftw_plan_guru_r2r(1, dimsy, 2, loopsy, real_data, real_data, IDCT_kind, fftw_flags); // free temporary data array fftw_free(fftw_data); }