void init_output_vtk() { double complex *w2d; const int n_size1D[1] = {NY}; #ifdef WITH_SHEAR w2d = (double complex *) fftw_malloc( sizeof(double complex) * (NY/2+1) * NZ ); if (w2d == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w2d allocation"); // FFT plans (we use dummy arrays since we use the "guru" interface of fft3 in the code) // The in place/ out of place will be set automatically at this stage // The Following Fourier transforms takes an array of size ( NX+1, NY+1, NZ+1) but consider only the "included" array // of size ( NX+1, NY, NZ+1) and transforms it in y, in an array of size (NX+1, NY/2+1, NZ+1). The j=NY plane is therefore // not modified by these calls #ifdef _OPENMP fftw_plan_with_nthreads( 1 ); #endif #ifdef WITH_2D fft_1d_forward = fftw_plan_many_dft_r2c(1, n_size1D, 1, wr1, NULL, 1, 1, w2d, NULL, 1, 1, FFT_PLANNING || FFTW_UNALIGNED); #else fft_1d_forward = fftw_plan_many_dft_r2c(1, n_size1D, NZ, wr1, NULL, NZ+2, 1, w2d, NULL, NZ, 1, FFT_PLANNING || FFTW_UNALIGNED); #endif if (fft_1d_forward == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW 1D forward plan creation failed"); #ifdef WITH_2D fft_1d_backward = fftw_plan_many_dft_c2r(1, n_size1D, 1, w2d, NULL, 1, 1, wr1, NULL, 1, 1, FFT_PLANNING || FFTW_UNALIGNED); #else fft_1d_backward = fftw_plan_many_dft_c2r(1, n_size1D, NZ, w2d, NULL, NZ, 1, wr1, NULL, NZ+2, 1, FFT_PLANNING || FFTW_UNALIGNED); #endif if (fft_1d_backward == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW 1D backward plan creation failed"); fftw_free(w2d); #endif }
void init_fft(void) { int i,j; int n[] = {Ny}; int howmany = Nx; int idist=NC, odist=NR; int istride=1, ostride=1; int *inembed=NULL, *onembed=NULL; Nmax = floor(2./3*NC); wc1 = (double complex *)malloc(sizeof(double complex)*Nx*NC); wr1 = (double *)wc1; wc2 = (double complex *)malloc(sizeof(double complex)*Nx*NC); wr2 = (double *)wc2; wc3 = (double complex *)malloc(sizeof(double complex)*Nx*NC); wr3 = (double *)wc3; mask = (double *)malloc(sizeof(double)*Nx*NC); for(i=0;i<Nx;i++) { for(j=0;j<NC;j++) { if (j < Nmax) mask[CINDX] = 1; else mask[CINDX] = 0; } } // // c2r1=fftw_plan_many_dft_c2r(1,&Nx,Ny,wc1,&Nx,1,NC,wr1,&Nx,1,NR,FFTW_ESTIMATE); // if (c2r1 == NULL) printf("Problem with c2r1\n"); // // r2c1=fftw_plan_many_dft_r2c(1,&Nx,Ny,wr1,&Nx,1,NR,wc1,&Nx,1,NC,FFTW_ESTIMATE); // if (r2c1 == NULL) printf("Problem with r2c1\n"); // // // c2r2=fftw_plan_many_dft_c2r(1,&Nx,Ny,wc2,&Nx,1,NC,wr2,&Nx,1,NR,FFTW_ESTIMATE); // if (c2r2 == NULL) printf("Problem with c2r2\n"); // // r2c2=fftw_plan_many_dft_r2c(1,&Nx,Ny,wr2,&Nx,1,NR,wc2,&Nx,1,NC,FFTW_ESTIMATE); // if (r2c2 == NULL) printf("Problem with r2c2\n"); // // // c2r3=fftw_plan_many_dft_c2r(1,&Nx,Ny,wc3,&Nx,1,NC,wr3,&Nx,1,NR,FFTW_ESTIMATE); // if (c2r3 == NULL) printf("Problem with c2r3\n"); // // r2c3=fftw_plan_many_dft_r2c(1,&Nx,Ny,wr3,&Nx,1,NR,wc3,&Nx,1,NC,FFTW_ESTIMATE); // if (r2c3 == NULL) printf("Problem with r2c3\n"); // c2r2=fftw_plan_many_dft_c2r(1,n,howmany,wc2, inembed, istride, idist, wr2, onembed, ostride, odist,FFTW_ESTIMATE); r2c2=fftw_plan_many_dft_r2c(1,n,howmany,wr2, onembed, ostride, odist, wc2, inembed, istride, idist,FFTW_ESTIMATE); c2r1=fftw_plan_many_dft_c2r(1,n,howmany,wc1, inembed, istride, idist, wr1, onembed, ostride, odist,FFTW_ESTIMATE); r2c1=fftw_plan_many_dft_r2c(1,n,howmany,wr1, onembed, ostride, odist, wc1, inembed, istride, idist,FFTW_ESTIMATE); c2r3=fftw_plan_many_dft_c2r(1,n,howmany,wc3, inembed, istride, idist, wr3, onembed, ostride, odist,FFTW_ESTIMATE); r2c3=fftw_plan_many_dft_r2c(1,n,howmany,wr3, onembed, ostride, odist, wc3, inembed, istride, idist,FFTW_ESTIMATE); return; }
double *fftr(double *Y, const double *X, const unsigned long nvar, const unsigned long nobs) { const unsigned long nvarout = (nvar>>1)+1; // Since FFT of real data are symmetric, we only store nvar/2+1 fftw_complex elements as output (symmetric part is not duplicated) const int n = nvar; // Store nvar within an int since we will pass it as a pointer // Create the FFTW plan fftw_plan plan = fftw_plan_many_dft_r2c(1, // [int rank] Rank 1 DFT &n, // [const int *n] Number of variables within input array nobs, // [int howmany] Number of observations (number of DFT to perform) (double *) X, // [double *in] Input array is X (it is cast to non-const but will not be modified since FFTW_DESTROY_INPUT is not set) NULL, // [const int *inembed] Distance between each rank in input array (Not used since rank=1) 1, // [int istride] Distance between successive variables in input array (in unit of double) n, // [int idist] Distance between 2 observations in input array (in unit of double) (fftw_complex *) Y, // [fftw_complex *out] Output array is Y NULL, // [const int *onembed] Distance between each rank in output array (Not used since rank=1) 1, // [int ostride] Distance between successive variables in output array (in unit of fftw_complex) nvarout, // [int odist] Distance between 2 observations in output array (in unit of fftw_complex) FFTW_ESTIMATE // [unsigned flags] Quickly choose a plan without performing full benchmarks (maybe sub-optimal but take less time) ); // If plan building fails, quit if(!plan) return NULL; // Execute FFTW plan fftw_execute(plan); // Destroy FFTW plan fftw_destroy_plan(plan); return Y; }
void FFT::forward(VolumeList &volumes) { if (volumes.size() > 0 && volumes[0].domain != Domain::Frequency) { fftw_plan plan; try { plan = forward_plans.at(volumes.data); } catch (...) { int rank = 3; int n[] = { static_cast<int>(volumes[0].width), static_cast<int>(volumes[0].height), static_cast<int>(volumes[0].depth) }; int howmany = volumes.size(); int idist = volumes.volume_size_real; int odist = volumes.volume_size_complex; int istride = 1, ostride = 1; int *inembed = 0, *onembed = 0; plan = fftw_plan_many_dft_r2c(rank, n, howmany, volumes.data, inembed, istride, idist, reinterpret_cast<fftw_complex *>(volumes.data), onembed, ostride, odist, FFTW_ESTIMATE); forward_plans[volumes.data] = plan; } fftw_execute(plan); for (size_t i = 0; i < volumes.size(); ++i) { volumes[i].domain = Domain::Frequency; } } }
void kemo_fftw_plan_many_dft_r2c(fftw_plan *plan, int rank, int *n_size, int howmany, double *dble_in, const int *inembed, int istride, int idist, fftw_complex *cplx_out, int *onembed, int ostride, int odist, unsigned *flags){ *plan = fftw_plan_many_dft_r2c(rank, n_size, howmany, dble_in, inembed, istride, idist, cplx_out, onembed, ostride, odist, *flags); return; }
int ambit_dft_r2c_inplace(struct ambit_dense_array *a, int sign) { int err = 0; fftw_plan plan = NULL; if (!ambit_dense_array_fftw_r2c_embedded(a)) { fprintf(stderr, "%s: Invalid array or inappropriate embedding.\n", __func__); err = -1; goto cleanup; } switch (sign) { case FFTW_FORWARD: plan = fftw_plan_many_dft_r2c(a->rank, (const int *)a->dim, 1, a->data, (const int *)a->dimembed, 1, 0, (C *)a->data, (const int *)a->dimembed, 1, 0, FFTW_ESTIMATE); break; case FFTW_BACKWARD: plan = fftw_plan_many_dft_c2r(a->rank, (const int *)a->dim, 1, (C *)a->data, (const int *)a->dimembed, 1, 0, a->data, (const int *)a->dimembed, 1, 0, FFTW_ESTIMATE); break; default: fprintf(stderr, "%s: Sign must be FFTW_FORWARD or FFTW_BACKWARD.\n", __func__); err = -1; goto cleanup; } if (!plan) { fprintf(stderr, "%s: DFT planning failed.\n", __func__); err = -1; goto cleanup; } fftw_execute(plan); cleanup: fftw_destroy_plan(plan); return err; }
FFT::FFT(int size, int count, int window) : m_size(size), m_count(count) { m_indata = (double *) malloc(sizeof(double) * size * count); m_outdata = (complex * ) malloc(sizeof(complex) * size * count); char filename[512]; // sprintf(filename, "fftw_wisdom/dr2c%dx%d", size, count); // FILE * file = fopen(filename, "r"); // fftw_import_wisdom_from_file(file); // fclose(file); int n = size; // fftw_plan_many_dft_r2c(int rank, const int *n, int howmany, double *in, const int *inembed, int istride, int idist, fftw_complex *out, const int *onembed, int ostride, int odist, unsigned int flags) m_plan = fftw_plan_many_dft_r2c(1, &n, count, m_indata, nullptr, count, 1, m_outdata, nullptr, count, 1, FFTW_PATIENT); // m_plan = fftw_plan_dft_r2c_1d(size, m_indata, m_outdata, FFTW_EXHAUSTIVE); // file = fopen(filename, "w"); // fftw_export_wisdom_to_file(file); // fclose(file); // build hamming window table m_window = (double *) malloc(sizeof(double) * size); if(window == 0) { for(int i = 0; i < size; i++) { m_window[i] = 0.54 - 0.46 * cos((2.0 * M_PI * i) / ((double) size - 1.0)); } } else if(window == 1) { for(int i = 0; i < size; i++) { m_window[i] = 0.5 - 0.5 * cos((2.0 * M_PI * i) / ((double) size - 1.0)); } } else if(window == 2) { for(int i = 0; i < size; i++) { m_window[i] = 1; } } }
void caffe_cpu_fft<double>(const int howmany, const int n, const double* x, complex<double>* y) { /* FFTW plan handle */ fftw_plan hplan = 0; const double *in = x; fftw_complex *out = reinterpret_cast<fftw_complex *>(y); int Ni[] = {n}; int No[] = {n/2+1}; hplan = fftw_plan_many_dft_r2c(1, Ni, howmany, const_cast<double *>(in), Ni, 1, n, out, No, 1, n/2+1, FFTW_ESTIMATE); if (0 == hplan) goto failed; fftw_execute(hplan); fftw_destroy_plan(hplan); failed: return; }
int main(int argc, char *argv[]) { int ret = EXIT_FAILURE; // Set up the PRNG dsfmt_t *dsfmt = malloc(sizeof(dsfmt_t)); if(dsfmt == NULL) { fprintf(stdout, "unable to allocate PRNG\n"); goto skip_deallocate_prng; } dsfmt_init_gen_rand(dsfmt, SEED); // Set up the source values double *src = fftw_malloc(N*VL*sizeof(double)); if(src == NULL) { fprintf(stdout, "unable to allocate source vector\n"); goto skip_deallocate_src; } for(unsigned int i = 0; i < N*VL; ++i) { src[i] = dsfmt_genrand_open_close(dsfmt); } // Allocate the FFT destination array double complex *fft = fftw_malloc(N*VL*sizeof(double complex)); if(fft == NULL) { fprintf(stdout, "unable to allocate fft vector\n"); goto skip_deallocate_fft; } // Execute the forward FFT fftw_plan fwd_plan = fftw_plan_many_dft_r2c(1, &N, VL, src, NULL, VL, 1, fft, NULL, VL, 1, FFTW_ESTIMATE); if(fwd_plan == NULL) { fprintf(stdout, "unable to allocate fft forward plan\n"); goto skip_deallocate_fwd_plan; } fftw_execute(fwd_plan); // Fill in the rest of the destination values using the Hermitian property. fft_r2c_1d_vec_finish(fft, N, VL); // Allocate the reverse FFT destination array double complex *dst = fftw_malloc(N*VL*sizeof(double complex)); if(dst == NULL) { fprintf(stdout, "unable to allocate dst vector\n"); goto skip_deallocate_dst; } // Perform the reverse FFT fftw_plan rev_plan = fftw_plan_many_dft(1, &N, VL, fft, NULL, VL, 1, dst, NULL, VL, 1, FFTW_BACKWARD, FFTW_ESTIMATE); if(rev_plan == NULL) { fprintf(stdout, "unable to allocate fft reverse plan\n"); goto skip_deallocate_rev_plan; } fftw_execute(rev_plan); // Compare the two vectors by sup norm double norm = 0.0; for(unsigned int i = 0; i < N*VL; ++i) { // Divide the resulting by N, because FFTW computes the un-normalized DFT: // the forward followed by reverse transform scales the data by N. norm = fmax(norm, cabs(dst[i]/N - src[i])); } if(norm <= 1e-6) { ret = EXIT_SUCCESS; } fftw_destroy_plan(rev_plan); skip_deallocate_rev_plan: fftw_free(dst); skip_deallocate_dst: fftw_destroy_plan(fwd_plan); skip_deallocate_fwd_plan: fftw_free(fft); skip_deallocate_fft: fftw_free(src); skip_deallocate_src: free(dsfmt); skip_deallocate_prng: // Keep valgrind happy by having fftw clean up its internal structures. This // helps ensure we aren't leaking memory. fftw_cleanup(); return ret; }
void CCorrelationFilters::fft_data(struct CDataStruct *img) { // Need to make this work with fftw threads, there seem to be some compiling and linking errors. int num_dim = img->size_data.size(); int rank = num_dim; int *n = new int(num_dim); int *m = new int(num_dim); for (int i=0; i<num_dim; i++) { n[i] = img->size_data[i]; m[i] = img->size_data_freq[i]; } //int numCPU = 2; Is there is a way to automatically figure out the number of CPUs? //fftw_init_threads(); //fftw_plan_with_nthreads(numCPU); fftw_plan plan; fftw_complex *out; int val = memcmp(n, m, num_dim*sizeof(int)); img->num_elements_freq = (img->size_data_freq.prod()/img->size_data_freq(num_dim-1))*(img->size_data_freq(num_dim-1)/2+1); double scale_factor = sqrt(img->size_data_freq.prod()); int istride = 1; int ostride = 1; int idist = img->size_data_freq.prod(); int odist = (img->size_data_freq.prod()/img->size_data_freq(num_dim-1))*(img->size_data_freq(num_dim-1)/2+1); if (val != 0){ // If the FFT size is NOT the same as the size of the data, zero pad the data. int num_channels = img->num_channels; double *data; data = new double[num_channels*img->size_data_freq.prod()]; int howmany = num_channels; delete[] img->data_freq; img->data_freq = new complex<double>[odist*num_channels*img->num_data]; out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*odist*num_channels); plan = fftw_plan_many_dft_r2c(rank, m, howmany, data, NULL, istride, idist, out, NULL, ostride, odist, FFTW_ESTIMATE); CDataStruct img1; img1.size_data = img->size_data; img1.size_data_freq = img->size_data_freq; img1.num_data = 1; img1.num_channels = img->num_channels; img1.data = new double[num_channels*img->size_data.prod()]; for (int i=0; i<img->num_data; i++) { memcpy(img1.data,img->data+i*num_channels*img->size_data.prod(),sizeof(double)*num_channels*img->size_data.prod()); zero_pad_data(data, &img1); fftw_execute(plan); memcpy(img->data_freq+i*odist*num_channels,reinterpret_cast<complex <double>*>(out),sizeof(complex<double>)*odist*num_channels); } for (int i=0; i<img->num_data; i++) { img->ptr_data_freq.push_back((img->data_freq+i*odist*num_channels)); } fftw_destroy_plan(plan); fftw_free(out); delete[] data; } else{ int howmany = img->num_data*img->num_channels; double *in = img->data; out = (fftw_complex*) fftwf_malloc(sizeof(fftw_complex)*odist*img->num_channels*img->num_data); plan = fftw_plan_many_dft_r2c(rank, n, howmany, in, NULL, istride, idist, out, NULL, ostride, odist, FFTW_ESTIMATE); fftw_execute(plan); img->data_freq = reinterpret_cast<complex <double>*>(out); for (int i=0; i<img->num_data; i++) { img->ptr_data_freq.push_back((img->data_freq+i*odist*img->num_channels)); } fftw_destroy_plan(plan); } for (int i=0; i<img->num_data*img->num_channels*odist; i++){ img->data_freq[i] = img->data_freq[i]/scale_factor; } //fftw_cleanup_threads(); delete[] n; delete[] m; }
int main(int argc, char *argv[]) { char const *inname = NULL; char const *outname = NULL; char const *initstring = NULL; char const *preamble = NULL; char const *postamble = NULL; uint32_t length = 16384; sf_count_t seek = 0; double seek_sec = 0; sf_count_t step = 0; double step_sec = 0; SNDFILE *infile = NULL; FILE *outfile = NULL; int decimate = 1; SF_INFO sfinfo; double *in = NULL; fftw_complex *out = NULL; fftw_plan plan = NULL; int opt; while ((opt = getopt(argc, argv, "i:o:l:s:S:t:T:w:d:I:p:P:")) != -1) switch (opt) { case 'i': inname = optarg; break; case 'o': outname = optarg; break; case 'l': length = atoi(optarg); break; case 's': seek_sec = atof(optarg); break; case 'S': seek = atoll(optarg); break; case 't': step_sec = atof(optarg); break; case 'T': step = atoll(optarg); break; case 'w': if (strcmp(optarg, "rectangular") && strcmp(optarg, "boxcar")) fprintf(stderr, "only rectangular and boxcar window functions supported.\n"); break; case 'd': decimate = atoi(optarg); break; case 'I': initstring = optarg; break; case 'p': preamble = optarg; break; case 'P': postamble = optarg; break; default: fprintf(stderr, "unknown option '%c'\n", opt); exit(EXIT_FAILURE); } if ((infile = sf_open(inname, SFM_READ, &sfinfo)) == NULL) { fprintf(stderr, "couldn't open input outfile '%s'\n", inname); exit(EXIT_FAILURE); } if (outname == NULL) outfile = stdout; else if ((outfile = fopen(outname, "wt")) == NULL) { fprintf(stderr, "couldn't open output outfile '%s'\n", outname); exit(EXIT_FAILURE); } if (initstring) fprintf(outfile, "%s\n", initstring); in = fftw_malloc(sizeof(*in) * sfinfo.channels * length); out = fftw_malloc(sizeof(*out) * sfinfo.channels * length); if (in && out) { int n[1] = { length }; plan = fftw_plan_many_dft_r2c(1, n, sfinfo.channels, in, NULL, sfinfo.channels, 1, out, NULL, sfinfo.channels, 1, FFTW_ESTIMATE | FFTW_DESTROY_INPUT); } if (plan == NULL) { fprintf(stderr, "couldn't initialise fftw.\n"); exit(EXIT_FAILURE); } seek += rint(seek_sec * sfinfo.samplerate); step += rint(step_sec * sfinfo.samplerate); do { int r, c; sf_seek(infile, (sf_count_t)rint(seek), SEEK_SET); r = sf_readf_double(infile, in, length) * sfinfo.channels; if (r <= 0) break; if (r < length * sfinfo.channels) step = 0; while (r < length * sfinfo.channels) in[r++] = 0.0; fftw_execute(plan); if (preamble) fprintf(outfile, "%s\n", preamble); for (r = 0; r * 2 < length; r += decimate) { double f = (double)r * sfinfo.samplerate / length; fprintf(outfile, "%lf", f); for (c = 0; c < sfinfo.channels; c++) { double x = 0.0; int i; for (i = 0; i < decimate; i++) { fftw_complex *p = &out[(r + i) * sfinfo.channels + c]; double y = p[0][0] * p[0][0] + p[0][1] * p[0][1]; if (x < y) x = y; } x = log10(x) / 2.0 * 20.0 - log10(length * 0.5) * 20.0; fprintf(outfile, " %lf", x); } fprintf(outfile, "\n"); } if (postamble) fprintf(outfile, "%s\n", postamble); seek += step; } while (step > 0); fftw_destroy_plan(plan); fftw_free(in); fftw_free(out); sf_close(infile); if (outname != NULL) fclose(outfile); return EXIT_SUCCESS; }
void init_gfft() { // This will init the plans needed by gfft // Transform of NY/NPROC arrays of (logical) size [NX, NZ] // The physical size is [NX, NZ+2] // We use in-place transforms int i; double complex *wi1, *whi1; double *wir1, *whir1; const int n_size2D[2] = {NX, NZ}; const int n_size1D[1] = {NY_COMPLEX}; wi1 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (wi1 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for wi1 allocation"); whi1 = (double complex *) fftw_malloc( sizeof(double complex) * NX*(NY/2+1)); if (whi1 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for wi1 allocation"); wir1 = (double *) wi1; whir1= (double *) whi1; for(i = 0 ; i < NTOTAL_COMPLEX; i++) { wi1[i]=1.0; } #ifdef _OPENMP fftw_plan_with_nthreads( nthreads ); #endif r2c_2d = fftw_plan_many_dft_r2c(2, n_size2D, NY / NPROC, wir1, NULL, 1, (NZ+2)*NX, wi1, NULL, 1, (NZ+2)*NX/2, FFT_PLANNING); if (r2c_2d == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW R2C_2D plan creation failed"); c2r_2d = fftw_plan_many_dft_c2r(2, n_size2D, NY / NPROC, wi1, NULL, 1, (NZ+2)*NX/2, wir1, NULL, 1, (NZ+2)*NX , FFT_PLANNING); if (c2r_2d == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW C2R_2D plan creation failed"); r2cfft_2Dslice = fftw_plan_dft_r2c_2d(NX,NY,wrh3,wh3,FFT_PLANNING); //,whir1,whi1 if (r2cfft_2Dslice == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW r2c slice plan creation failed"); // 1D transforms: This are actually c2c transforms, but are used for global 3D transforms. // We will transform forward and backward an array of logical size [NX/NPROC, NY, (NZ+2)/2] along the 2nd dimension // We will do NZ_COMPLEX transforms along Y. Will need a loop on NX/NPROC // We use &w1[NZ_COMPLEX] so that alignement check is done properly (see SIMD in fftw Documentation) #ifdef _OPENMP fftw_plan_with_nthreads( 1 ); #endif r2c_1d = fftw_plan_many_dft(1, n_size1D, NZ_COMPLEX, &wi1[NZ_COMPLEX], NULL, NZ_COMPLEX, 1, &wi1[NZ_COMPLEX], NULL, NZ_COMPLEX, 1, FFTW_FORWARD, FFT_PLANNING); if (r2c_1d == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW R2C_1D plan creation failed"); c2r_1d = fftw_plan_many_dft(1, n_size1D, NZ_COMPLEX, &wi1[NZ_COMPLEX], NULL, NZ_COMPLEX, 1, &wi1[NZ_COMPLEX], NULL, NZ_COMPLEX, 1, FFTW_BACKWARD, FFT_PLANNING); if (c2r_1d == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW C2R_1D plan creation failed"); // init transpose routines init_transpose(); // Let's see which method is faster (with our without threads) fftw_free(wi1); fftw_free(whi1); fft_timer=0.0; return; }
int main(int argc, char **argv) { // Error handling scheme: this function has failed until proven otherwise. int ret = EXIT_FAILURE; if(MPI_Init(&argc, &argv) != MPI_SUCCESS) { // Theoretically, an error at this point will abort the program, and this // code path is never followed. This is here for completeness. fprintf(stderr, "unable to initialize MPI\n"); goto die_immed; } // Install the MPI error handler that returns error codes, so we can perform // the usual process suicide ritual. if(MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN) != MPI_SUCCESS) { // Again, theoretically, the previous error handler (MPI_Abort) gets called // instead of reaching this fail point. fprintf(stderr, "unable to reset MPI error handler\n"); goto die_finalize_mpi; } int size, rank; if(MPI_Comm_size(MPI_COMM_WORLD, &size) != MPI_SUCCESS || MPI_Comm_rank(MPI_COMM_WORLD, &rank) != MPI_SUCCESS) { fprintf(stderr, "unable to determine rank and size\n"); goto die_finalize_mpi; } dsfmt_t *prng = malloc(sizeof(dsfmt_t)); if(prng == NULL) { fprintf(stderr, "unable to allocate PRNG\n"); goto die_finalize_mpi; } dsfmt_init_gen_rand(prng, SEED); const int master_elems = proc_elems * size; double *const master = fftw_malloc(VL*master_elems*sizeof(double)); if(master == NULL) { fprintf(stderr, "unable to allocate master array\n"); goto die_free_prng; } for(int i = 0; i < master_elems*VL; ++i) { master[i] = 2*dsfmt_genrand_open_close(prng) - 1; } /* Allocate the array holding the serial result */ double complex *const serial = fftw_malloc(VL*master_elems*sizeof(*serial)); if(serial == NULL) { fprintf(stderr, "unable to allocate serial array\n"); goto die_free_master; } /* Perform serial transform */ fftw_plan serial_plan = fftw_plan_many_dft_r2c(1, &master_elems, VL, master, NULL, VL, 1, serial, NULL, VL, 1, FFTW_ESTIMATE); if(serial_plan == NULL) { fprintf(stderr, "unable to construct forward transform plan\n"); goto die_free_serial; } /* Perform the serial transform, and complete it */ fftw_execute(serial_plan); fft_r2c_1d_vec_finish(serial, master_elems, VL); /* Allocate space to hold the parallel transform result */ double complex *const parallel = fftw_malloc( proc_elems*VL*sizeof(double complex)); if(parallel == NULL) { fprintf(stderr, "unable to allocate space for parallel array\n"); goto die_destroy_serial_plan; } /* Create the parallel plan */ fft_par_plan par_plan = fft_par_plan_r2c_1d(MPI_COMM_WORLD, proc_elems, VL, master + rank*proc_elems*VL, parallel, NULL); if(par_plan == NULL) { fprintf(stderr, "unable to create parallel transform plan\n"); goto die_free_parallel; } /* Execute the parallel transform */ if(fft_par_execute_fwd(par_plan) != MPI_SUCCESS) { fprintf(stderr, "unable to execute parallel transform\n"); goto die_destroy_par_plan; } /* Compare values */ int sup = 0.0; for(int i = 0; i < proc_elems*VL; ++i) { sup = fmax(sup, cabs(serial[rank*proc_elems*VL + i] - parallel[i])); } if(sup < 1.0e-6) { ret = EXIT_SUCCESS; } die_destroy_par_plan: fft_par_plan_destroy(par_plan); die_free_parallel: fftw_free(parallel); die_destroy_serial_plan: fftw_destroy_plan(serial_plan); die_free_serial: fftw_free(serial); die_free_master: fftw_free(master); die_free_prng: free(prng); die_finalize_mpi: if(MPI_Finalize() != MPI_SUCCESS) { fprintf(stderr, "unable to finalize MPI\n"); ret = EXIT_FAILURE; } die_immed: fftw_cleanup(); return ret; }