示例#1
0
void init_output_vtk() {

	double complex *w2d;
	
	const int n_size1D[1] = {NY};

	#ifdef WITH_SHEAR
	
	w2d = (double complex *) fftw_malloc( sizeof(double complex) * (NY/2+1) * NZ );
	if (w2d == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w2d allocation");
	
// FFT plans (we use dummy arrays since we use the "guru" interface of fft3 in the code)
// The in place/ out of place will be set automatically at this stage

// The Following Fourier transforms takes an array of size ( NX+1, NY+1, NZ+1) but consider only the "included" array
// of size ( NX+1, NY, NZ+1) and transforms it in y, in an array of size (NX+1, NY/2+1, NZ+1). The j=NY plane is therefore 
// not modified by these calls

#ifdef _OPENMP
	fftw_plan_with_nthreads( 1 );
#endif

#ifdef WITH_2D
	fft_1d_forward = fftw_plan_many_dft_r2c(1, n_size1D, 1,
											wr1, NULL, 1, 1,
											w2d, NULL, 1, 1,
											FFT_PLANNING || FFTW_UNALIGNED);
#else
	fft_1d_forward = fftw_plan_many_dft_r2c(1, n_size1D, NZ,
											wr1, NULL, NZ+2, 1,
											w2d, NULL, NZ, 1,
											FFT_PLANNING || FFTW_UNALIGNED);
#endif
											
	if (fft_1d_forward == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW 1D forward plan creation failed");
	
#ifdef WITH_2D
	fft_1d_backward = fftw_plan_many_dft_c2r(1, n_size1D, 1,
											w2d, NULL, 1, 1,
											wr1, NULL, 1, 1,
											FFT_PLANNING || FFTW_UNALIGNED);
#else
	fft_1d_backward = fftw_plan_many_dft_c2r(1, n_size1D, NZ,
											w2d, NULL, NZ, 1,
											wr1, NULL, NZ+2, 1,
											FFT_PLANNING || FFTW_UNALIGNED);
#endif
											
	if (fft_1d_backward == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW 1D backward plan creation failed");
	
	fftw_free(w2d);
	
#endif	


}
示例#2
0
void init_fft(void) {
	int i,j;
	
	int n[] = {Ny};
	int howmany = Nx;
	int idist=NC, odist=NR;
	int istride=1, ostride=1;
	int *inembed=NULL, *onembed=NULL;

	Nmax = floor(2./3*NC);

	
	wc1 = (double complex *)malloc(sizeof(double complex)*Nx*NC);
	wr1 = (double *)wc1;
	wc2 = (double complex *)malloc(sizeof(double complex)*Nx*NC);
	wr2 = (double *)wc2;
	wc3 = (double complex *)malloc(sizeof(double complex)*Nx*NC);
	wr3 = (double *)wc3;

	mask = (double *)malloc(sizeof(double)*Nx*NC);
	
	
	for(i=0;i<Nx;i++) {
		for(j=0;j<NC;j++) {
			if (j < Nmax) mask[CINDX] = 1;
			else mask[CINDX] = 0;
		}
	}
	
// //	c2r1=fftw_plan_many_dft_c2r(1,&Nx,Ny,wc1,&Nx,1,NC,wr1,&Nx,1,NR,FFTW_ESTIMATE);
// 	if (c2r1 == NULL) printf("Problem with c2r1\n");
// //	r2c1=fftw_plan_many_dft_r2c(1,&Nx,Ny,wr1,&Nx,1,NR,wc1,&Nx,1,NC,FFTW_ESTIMATE);
// 	if (r2c1 == NULL) printf("Problem with r2c1\n");
// 	
// //	c2r2=fftw_plan_many_dft_c2r(1,&Nx,Ny,wc2,&Nx,1,NC,wr2,&Nx,1,NR,FFTW_ESTIMATE);
// 	if (c2r2 == NULL) printf("Problem with c2r2\n");
// //	r2c2=fftw_plan_many_dft_r2c(1,&Nx,Ny,wr2,&Nx,1,NR,wc2,&Nx,1,NC,FFTW_ESTIMATE);
// 	if (r2c2 == NULL) printf("Problem with r2c2\n");
// 	
// //	c2r3=fftw_plan_many_dft_c2r(1,&Nx,Ny,wc3,&Nx,1,NC,wr3,&Nx,1,NR,FFTW_ESTIMATE);
// 	if (c2r3 == NULL) printf("Problem with c2r3\n");
// //	r2c3=fftw_plan_many_dft_r2c(1,&Nx,Ny,wr3,&Nx,1,NR,wc3,&Nx,1,NC,FFTW_ESTIMATE);
// 	if (r2c3 == NULL) printf("Problem with r2c3\n");
// 	
	c2r2=fftw_plan_many_dft_c2r(1,n,howmany,wc2, inembed, istride, idist, wr2, onembed, ostride, odist,FFTW_ESTIMATE);
	r2c2=fftw_plan_many_dft_r2c(1,n,howmany,wr2, onembed, ostride, odist, wc2, inembed, istride, idist,FFTW_ESTIMATE);

	c2r1=fftw_plan_many_dft_c2r(1,n,howmany,wc1, inembed, istride, idist, wr1, onembed, ostride, odist,FFTW_ESTIMATE);
	r2c1=fftw_plan_many_dft_r2c(1,n,howmany,wr1, onembed, ostride, odist, wc1, inembed, istride, idist,FFTW_ESTIMATE);

	c2r3=fftw_plan_many_dft_c2r(1,n,howmany,wc3, inembed, istride, idist, wr3, onembed, ostride, odist,FFTW_ESTIMATE);
	r2c3=fftw_plan_many_dft_r2c(1,n,howmany,wr3, onembed, ostride, odist, wc3, inembed, istride, idist,FFTW_ESTIMATE);

	return;

}
示例#3
0
double *fftr(double *Y, const double *X, const unsigned long nvar, const unsigned long nobs) {
  const unsigned long nvarout = (nvar>>1)+1; // Since FFT of real data are symmetric, we only store nvar/2+1 fftw_complex elements as output (symmetric part is not duplicated)
  const int n = nvar; // Store nvar within an int since we will pass it as a pointer
  
  // Create the FFTW plan 
  fftw_plan plan = fftw_plan_many_dft_r2c(1, // [int rank]     Rank 1 DFT
                                          &n, // [const int *n] Number of variables within input array
                                          nobs, // [int howmany] Number of observations (number of DFT to perform)
                                          (double *) X, // [double *in] Input array is X (it is cast to non-const but will not be modified since FFTW_DESTROY_INPUT is not set)
                                          NULL, // [const int *inembed] Distance between each rank in input array (Not used since rank=1)
                                          1, // [int istride] Distance between successive variables in input array (in unit of double)
                                          n, // [int idist]   Distance between 2 observations in input array (in unit of double)
                                          (fftw_complex *) Y, // [fftw_complex *out] Output array is Y
                                          NULL, // [const int *onembed] Distance between each rank in output array (Not used since rank=1)
                                          1, // [int ostride] Distance between successive variables in output array (in unit of fftw_complex)
                                          nvarout, // [int odist] Distance between 2 observations in output array (in unit of fftw_complex)
                                          FFTW_ESTIMATE // [unsigned flags] Quickly choose a plan without performing full benchmarks (maybe sub-optimal but take less time)
                                          );
                                          
  // If plan building fails, quit
  if(!plan)
    return NULL;
  
  // Execute FFTW plan
  fftw_execute(plan);
      
  
  // Destroy FFTW plan
  fftw_destroy_plan(plan);
  
  return Y;
}
示例#4
0
  void FFT::forward(VolumeList &volumes) {
    if (volumes.size() > 0 && volumes[0].domain != Domain::Frequency) {
      fftw_plan plan;
      try {
	plan = forward_plans.at(volumes.data);
      } catch (...) {
	int rank = 3;
	int n[] = { static_cast<int>(volumes[0].width), 
		    static_cast<int>(volumes[0].height),
		    static_cast<int>(volumes[0].depth) };
	int howmany = volumes.size();
	int idist = volumes.volume_size_real;
	int odist = volumes.volume_size_complex;
	int istride = 1, ostride = 1;
	int *inembed = 0, *onembed = 0;
	plan = fftw_plan_many_dft_r2c(rank, n, howmany,
				      volumes.data,
				      inembed, istride, idist,
				      reinterpret_cast<fftw_complex *>(volumes.data),
				      onembed, ostride, odist,
				      FFTW_ESTIMATE);
	forward_plans[volumes.data] = plan;
      }
      fftw_execute(plan);
      for (size_t i = 0; i < volumes.size(); ++i) {
	volumes[i].domain = Domain::Frequency;
      }
    }
  }
void kemo_fftw_plan_many_dft_r2c(fftw_plan *plan, int rank,
                                 int *n_size, int howmany,
                                 double *dble_in, const int *inembed,
                                 int istride, int idist,
                                 fftw_complex *cplx_out, int *onembed,
                                 int ostride, int odist,
                                 unsigned *flags){
	*plan = fftw_plan_many_dft_r2c(rank, n_size, howmany,
                                   dble_in, inembed, istride, idist, 
                                   cplx_out, onembed, ostride, odist, *flags);
	return;
}
int
ambit_dft_r2c_inplace(struct ambit_dense_array *a, int sign) {
    int       err  = 0;
    fftw_plan plan = NULL;
    
    if (!ambit_dense_array_fftw_r2c_embedded(a)) {
        fprintf(stderr, "%s: Invalid array or inappropriate embedding.\n", __func__);
        err = -1;
        goto cleanup;
    }
    
    switch (sign) {
    case FFTW_FORWARD:
        plan = fftw_plan_many_dft_r2c(a->rank, (const int *)a->dim, 1, 
                                      a->data, (const int *)a->dimembed, 1, 0, 
                                      (C *)a->data, (const int *)a->dimembed, 1, 0, 
                                      FFTW_ESTIMATE);
        break;
    case FFTW_BACKWARD:
        plan = fftw_plan_many_dft_c2r(a->rank, (const int *)a->dim, 1, 
                                      (C *)a->data, (const int *)a->dimembed, 1, 0, 
                                      a->data, (const int *)a->dimembed, 1, 0, 
                                      FFTW_ESTIMATE);
        break;
    default:
        fprintf(stderr, "%s: Sign must be FFTW_FORWARD or FFTW_BACKWARD.\n", __func__);
        err = -1;
        goto cleanup;
    }

    if (!plan) {
        fprintf(stderr, "%s: DFT planning failed.\n", __func__);
        err = -1;
        goto cleanup;
    }
    
    fftw_execute(plan);

  cleanup:
    fftw_destroy_plan(plan);
    return err;
}
示例#7
0
文件: FFT.cpp 项目: soundlocate/fft
FFT::FFT(int size, int count, int window) : m_size(size), m_count(count) {
	m_indata = (double *) malloc(sizeof(double) * size * count);
	m_outdata = (complex * ) malloc(sizeof(complex) * size * count);

	char filename[512];

	// sprintf(filename, "fftw_wisdom/dr2c%dx%d", size, count);

	// FILE * file = fopen(filename, "r");
	// fftw_import_wisdom_from_file(file);
	// fclose(file);

	int n = size;

//	fftw_plan_many_dft_r2c(int rank, const int *n, int howmany, double *in, const int *inembed, int istride, int idist, fftw_complex *out, const int *onembed, int ostride, int odist, unsigned int flags)
	m_plan = fftw_plan_many_dft_r2c(1, &n, count, m_indata, nullptr, count, 1, m_outdata, nullptr, count, 1, FFTW_PATIENT);

//	m_plan = fftw_plan_dft_r2c_1d(size, m_indata, m_outdata, FFTW_EXHAUSTIVE);

	// file = fopen(filename, "w");
	// fftw_export_wisdom_to_file(file);
	// fclose(file);

	// build hamming window table
	m_window = (double *) malloc(sizeof(double) * size);

	if(window == 0) {
		for(int i = 0; i < size; i++) {
			m_window[i] = 0.54 - 0.46 * cos((2.0 * M_PI * i) / ((double) size - 1.0));
		}
	} else if(window == 1) {
		for(int i = 0; i < size; i++) {
			m_window[i] = 0.5 - 0.5 * cos((2.0 * M_PI * i) / ((double) size - 1.0));
		}
	} else if(window == 2) {
		for(int i = 0; i < size; i++) {
			m_window[i] = 1;
		}
	}
}
示例#8
0
void caffe_cpu_fft<double>(const int howmany, const int n, const double* x, complex<double>* y) {
  /* FFTW plan handle */
  fftw_plan hplan = 0;
  const double *in = x;
  fftw_complex *out = reinterpret_cast<fftw_complex *>(y);
  int Ni[] = {n};
  int No[] = {n/2+1};

  hplan = fftw_plan_many_dft_r2c(1, Ni, howmany,
				  const_cast<double *>(in), Ni, 1, n,
                                  out, No, 1, n/2+1,
                                  FFTW_ESTIMATE);
  if (0 == hplan) goto failed;

  fftw_execute(hplan);

  fftw_destroy_plan(hplan);

 failed:

  return;
}
示例#9
0
int main(int argc, char *argv[])
{
  int ret = EXIT_FAILURE;

  // Set up the PRNG
  dsfmt_t *dsfmt = malloc(sizeof(dsfmt_t));
  if(dsfmt == NULL) {
    fprintf(stdout, "unable to allocate PRNG\n");
    goto skip_deallocate_prng;
  }
  dsfmt_init_gen_rand(dsfmt, SEED);

  // Set up the source values
  double *src = fftw_malloc(N*VL*sizeof(double));
  if(src == NULL) {
    fprintf(stdout, "unable to allocate source vector\n");
    goto skip_deallocate_src;
  }
  for(unsigned int i = 0; i < N*VL; ++i) {
    src[i] = dsfmt_genrand_open_close(dsfmt);
  }

  // Allocate the FFT destination array
  double complex *fft = fftw_malloc(N*VL*sizeof(double complex));
  if(fft == NULL) {
    fprintf(stdout, "unable to allocate fft vector\n");
    goto skip_deallocate_fft;
  }

  // Execute the forward FFT
  fftw_plan fwd_plan = fftw_plan_many_dft_r2c(1, &N, VL,
      src, NULL, VL, 1, fft, NULL, VL, 1, FFTW_ESTIMATE);
  if(fwd_plan == NULL) {
    fprintf(stdout, "unable to allocate fft forward plan\n");
    goto skip_deallocate_fwd_plan;
  }
  fftw_execute(fwd_plan);

  // Fill in the rest of the destination values using the Hermitian property.
  fft_r2c_1d_vec_finish(fft, N, VL);

  // Allocate the reverse FFT destination array
  double complex *dst = fftw_malloc(N*VL*sizeof(double complex));
  if(dst == NULL) {
    fprintf(stdout, "unable to allocate dst vector\n");
    goto skip_deallocate_dst;
  }

  // Perform the reverse FFT
  fftw_plan rev_plan = fftw_plan_many_dft(1, &N, VL, fft, NULL, VL, 1,
      dst, NULL, VL, 1, FFTW_BACKWARD, FFTW_ESTIMATE);
  if(rev_plan == NULL) {
    fprintf(stdout, "unable to allocate fft reverse plan\n");
    goto skip_deallocate_rev_plan;
  }
  fftw_execute(rev_plan);

  // Compare the two vectors by sup norm
  double norm = 0.0;
  for(unsigned int i = 0; i < N*VL; ++i) {
    // Divide the resulting by N, because FFTW computes the un-normalized DFT:
    // the forward followed by reverse transform scales the data by N.
    norm = fmax(norm, cabs(dst[i]/N - src[i]));
  }
  if(norm <= 1e-6) {
    ret = EXIT_SUCCESS;
  }

  fftw_destroy_plan(rev_plan);
skip_deallocate_rev_plan:
  fftw_free(dst);
skip_deallocate_dst:
  fftw_destroy_plan(fwd_plan);
skip_deallocate_fwd_plan:
  fftw_free(fft);
skip_deallocate_fft:
  fftw_free(src);
skip_deallocate_src:
  free(dsfmt);
skip_deallocate_prng:
  // Keep valgrind happy by having fftw clean up its internal structures. This
  // helps ensure we aren't leaking memory.
  fftw_cleanup();
  return ret;
}
void CCorrelationFilters::fft_data(struct CDataStruct *img)
{	
	// Need to make this work with fftw threads, there seem to be some compiling and linking errors.
	
	int num_dim = img->size_data.size();
	int rank = num_dim;
	int *n = new int(num_dim);
	int *m = new int(num_dim);
	
	for (int i=0; i<num_dim; i++) {
		n[i] = img->size_data[i];
		m[i] = img->size_data_freq[i];
	}
	
	//int numCPU = 2; Is there is a way to automatically figure out the number of CPUs?
	//fftw_init_threads();
	//fftw_plan_with_nthreads(numCPU);
	
	fftw_plan plan;
	fftw_complex *out;
	
	int val = memcmp(n, m, num_dim*sizeof(int));
	img->num_elements_freq = (img->size_data_freq.prod()/img->size_data_freq(num_dim-1))*(img->size_data_freq(num_dim-1)/2+1);
    double scale_factor = sqrt(img->size_data_freq.prod());
    
    int istride = 1;
    int ostride = 1;
    int idist = img->size_data_freq.prod();
    int odist = (img->size_data_freq.prod()/img->size_data_freq(num_dim-1))*(img->size_data_freq(num_dim-1)/2+1);
	
	if (val != 0){
		// If the FFT size is NOT the same as the size of the data, zero pad the data.
		
		int num_channels = img->num_channels;
		double *data;
		data = new double[num_channels*img->size_data_freq.prod()];
		int howmany = num_channels;
		
		delete[] img->data_freq;
		img->data_freq = new complex<double>[odist*num_channels*img->num_data];
		
		out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex)*odist*num_channels);
		plan = fftw_plan_many_dft_r2c(rank, m, howmany, data, NULL, istride, idist, out, NULL, ostride, odist, FFTW_ESTIMATE);
				
		CDataStruct img1;
		img1.size_data = img->size_data;
		img1.size_data_freq = img->size_data_freq;
		img1.num_data = 1;
		img1.num_channels = img->num_channels;
		img1.data = new double[num_channels*img->size_data.prod()];
		
		for (int i=0; i<img->num_data; i++) {
			memcpy(img1.data,img->data+i*num_channels*img->size_data.prod(),sizeof(double)*num_channels*img->size_data.prod());
			zero_pad_data(data, &img1);
			fftw_execute(plan);
			memcpy(img->data_freq+i*odist*num_channels,reinterpret_cast<complex <double>*>(out),sizeof(complex<double>)*odist*num_channels);
		}
		
		for (int i=0; i<img->num_data; i++) {
			img->ptr_data_freq.push_back((img->data_freq+i*odist*num_channels));
		}
		
		fftw_destroy_plan(plan);
		fftw_free(out);
		delete[] data;
	}
	else{
		int howmany = img->num_data*img->num_channels;
		double *in = img->data;
		
		out = (fftw_complex*) fftwf_malloc(sizeof(fftw_complex)*odist*img->num_channels*img->num_data);
		plan = fftw_plan_many_dft_r2c(rank, n, howmany, in, NULL, istride, idist, out, NULL, ostride, odist, FFTW_ESTIMATE);
		fftw_execute(plan);
		img->data_freq = reinterpret_cast<complex <double>*>(out);
		
		for (int i=0; i<img->num_data; i++) {
			img->ptr_data_freq.push_back((img->data_freq+i*odist*img->num_channels));
		}
		fftw_destroy_plan(plan);
	}
    
    for (int i=0; i<img->num_data*img->num_channels*odist; i++){
        img->data_freq[i] = img->data_freq[i]/scale_factor;
    }
	
	//fftw_cleanup_threads();
	delete[] n;
	delete[] m;
}
示例#11
0
int main(int argc, char *argv[])
{
    char const *inname = NULL;
    char const *outname = NULL;
    char const *initstring = NULL;
    char const *preamble = NULL;
    char const *postamble = NULL;
    uint32_t length = 16384;
    sf_count_t seek = 0;
    double seek_sec = 0;
    sf_count_t step = 0;
    double step_sec = 0;
    SNDFILE *infile = NULL;
    FILE *outfile = NULL;
    int decimate = 1;
    SF_INFO sfinfo;
    double *in = NULL;
    fftw_complex *out = NULL;
    fftw_plan plan = NULL;
    int opt;

    while ((opt = getopt(argc, argv, "i:o:l:s:S:t:T:w:d:I:p:P:")) != -1)
        switch (opt)
        {
        case 'i': inname = optarg;                          break;
        case 'o': outname = optarg;                         break;
        case 'l': length = atoi(optarg);                    break;
        case 's': seek_sec = atof(optarg);                  break;
        case 'S': seek = atoll(optarg);                     break;
        case 't': step_sec = atof(optarg);                  break;
        case 'T': step = atoll(optarg);                     break;
        case 'w':
            if (strcmp(optarg, "rectangular") && strcmp(optarg, "boxcar"))
                fprintf(stderr, "only rectangular and boxcar window functions supported.\n");
            break;
        case 'd': decimate = atoi(optarg);                  break;
        case 'I': initstring = optarg;                      break;
        case 'p': preamble = optarg;                        break;
        case 'P': postamble = optarg;                       break;
        default:
            fprintf(stderr, "unknown option '%c'\n", opt);
            exit(EXIT_FAILURE);
        }

    if ((infile = sf_open(inname, SFM_READ, &sfinfo)) == NULL)
    {
        fprintf(stderr, "couldn't open input outfile '%s'\n", inname);
        exit(EXIT_FAILURE);
    }

    if (outname == NULL)
        outfile = stdout;
    else if ((outfile = fopen(outname, "wt")) == NULL)
    {
        fprintf(stderr, "couldn't open output outfile '%s'\n", outname);
        exit(EXIT_FAILURE);
    }

    if (initstring) fprintf(outfile, "%s\n", initstring);

    in = fftw_malloc(sizeof(*in) * sfinfo.channels * length);
    out = fftw_malloc(sizeof(*out) * sfinfo.channels * length);
    if (in && out)
    {
        int n[1] = { length };

        plan = fftw_plan_many_dft_r2c(1, n, sfinfo.channels,
                                    in, NULL, sfinfo.channels, 1,
                                    out, NULL, sfinfo.channels, 1,
                                    FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
    }
    if (plan == NULL)
    {
        fprintf(stderr, "couldn't initialise fftw.\n");
        exit(EXIT_FAILURE);
    }

    seek += rint(seek_sec * sfinfo.samplerate);
    step += rint(step_sec * sfinfo.samplerate);

    do
    {
        int r, c;

        sf_seek(infile, (sf_count_t)rint(seek), SEEK_SET);
        r = sf_readf_double(infile, in, length) * sfinfo.channels;
        if (r <= 0)
            break;
        if (r < length * sfinfo.channels)
            step = 0;
        while (r < length * sfinfo.channels)
            in[r++] = 0.0;

        fftw_execute(plan);

        if (preamble) fprintf(outfile, "%s\n", preamble);

        for (r = 0; r * 2 < length; r += decimate)
        {
            double f = (double)r * sfinfo.samplerate / length;

            fprintf(outfile, "%lf", f);

            for (c = 0; c < sfinfo.channels; c++)
            {
                double x = 0.0;
                int i;
                for (i = 0; i < decimate; i++)
                {
                    fftw_complex *p = &out[(r + i) * sfinfo.channels + c];
                    double y = p[0][0] * p[0][0] + p[0][1] * p[0][1];
                    if (x < y)
                        x = y;
                }
                x = log10(x) / 2.0 * 20.0 - log10(length * 0.5) * 20.0;
                fprintf(outfile, " %lf", x);
            }
            fprintf(outfile, "\n");
        }

        if (postamble) fprintf(outfile, "%s\n", postamble);

        seek += step;
    } while (step > 0);

    fftw_destroy_plan(plan);
    fftw_free(in);
    fftw_free(out);
    sf_close(infile);
    if (outname != NULL)
        fclose(outfile);

    return EXIT_SUCCESS;
}
示例#12
0
void init_gfft() {
	// This will init the plans needed by gfft
	// Transform of NY/NPROC arrays of (logical) size [NX, NZ]
	// The physical size is [NX, NZ+2]
	// We use in-place transforms
	int i;
	double complex *wi1, *whi1;
	double *wir1, *whir1;
	
	const int n_size2D[2] = {NX, NZ};
	const int n_size1D[1] = {NY_COMPLEX};
	

	wi1 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (wi1 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for wi1 allocation");
	whi1 = (double complex *) fftw_malloc( sizeof(double complex) * NX*(NY/2+1));
	if (whi1 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for wi1 allocation");

	wir1 = (double *) wi1;
	whir1= (double *) whi1;
	
	for(i = 0 ; i < NTOTAL_COMPLEX; i++) {
		wi1[i]=1.0;
	}
	
#ifdef _OPENMP
	fftw_plan_with_nthreads( nthreads );
#endif
	r2c_2d = fftw_plan_many_dft_r2c(2, n_size2D, NY / NPROC, wir1, NULL, 1, (NZ+2)*NX,
															 wi1,  NULL, 1, (NZ+2)*NX/2, FFT_PLANNING);
	if (r2c_2d == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW R2C_2D plan creation failed");
														   
	c2r_2d = fftw_plan_many_dft_c2r(2, n_size2D, NY / NPROC, wi1,  NULL, 1, (NZ+2)*NX/2,
														    wir1, NULL, 1, (NZ+2)*NX  , FFT_PLANNING);
	if (c2r_2d == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW C2R_2D plan creation failed");
	
	r2cfft_2Dslice = fftw_plan_dft_r2c_2d(NX,NY,wrh3,wh3,FFT_PLANNING); //,whir1,whi1
	if (r2cfft_2Dslice == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW r2c slice plan creation failed");
															 
	// 1D transforms: This are actually c2c transforms, but are used for global 3D transforms.
	// We will transform forward and backward an array of logical size [NX/NPROC, NY, (NZ+2)/2] along the 2nd dimension
	// We will do NZ_COMPLEX transforms along Y. Will need a loop on NX/NPROC
	// We use &w1[NZ_COMPLEX] so that alignement check is done properly (see SIMD in fftw Documentation)
	
#ifdef _OPENMP	
	fftw_plan_with_nthreads( 1 );
#endif	
	r2c_1d = fftw_plan_many_dft(1, n_size1D, NZ_COMPLEX, &wi1[NZ_COMPLEX], NULL, NZ_COMPLEX, 1,
														 &wi1[NZ_COMPLEX], NULL, NZ_COMPLEX, 1, FFTW_FORWARD, FFT_PLANNING);
	if (r2c_1d == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW R2C_1D plan creation failed");
																			  
	c2r_1d = fftw_plan_many_dft(1, n_size1D, NZ_COMPLEX, &wi1[NZ_COMPLEX], NULL, NZ_COMPLEX, 1,
														 &wi1[NZ_COMPLEX], NULL, NZ_COMPLEX, 1, FFTW_BACKWARD, FFT_PLANNING);
	if (c2r_1d == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW C2R_1D plan creation failed");

	// init transpose routines
	init_transpose();
	// Let's see which method is faster (with our without threads)
		
	fftw_free(wi1); fftw_free(whi1);
	
	fft_timer=0.0;

	
	return;
}
示例#13
0
int main(int argc, char **argv)
{
  // Error handling scheme: this function has failed until proven otherwise.
  int ret = EXIT_FAILURE;

  if(MPI_Init(&argc, &argv) != MPI_SUCCESS) {
    // Theoretically, an error at this point will abort the program, and this
    // code path is never followed. This is here for completeness.
    fprintf(stderr, "unable to initialize MPI\n");
    goto die_immed;
  }

  // Install the MPI error handler that returns error codes, so we can perform
  // the usual process suicide ritual.
  if(MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN)
      != MPI_SUCCESS) {
    // Again, theoretically, the previous error handler (MPI_Abort) gets called
    // instead of reaching this fail point.
    fprintf(stderr, "unable to reset MPI error handler\n");
    goto die_finalize_mpi;
  }

  int size, rank;
  if(MPI_Comm_size(MPI_COMM_WORLD, &size) != MPI_SUCCESS ||
      MPI_Comm_rank(MPI_COMM_WORLD, &rank) != MPI_SUCCESS) {
    fprintf(stderr, "unable to determine rank and size\n");
    goto die_finalize_mpi;
  }

  dsfmt_t *prng = malloc(sizeof(dsfmt_t));
  if(prng == NULL) {
    fprintf(stderr, "unable to allocate PRNG\n");
    goto die_finalize_mpi;
  }
  dsfmt_init_gen_rand(prng, SEED);

  const int master_elems = proc_elems * size;

  double *const master = fftw_malloc(VL*master_elems*sizeof(double));
  if(master == NULL) {
    fprintf(stderr, "unable to allocate master array\n");
    goto die_free_prng;
  }
  for(int i = 0; i < master_elems*VL; ++i) {
    master[i] = 2*dsfmt_genrand_open_close(prng) - 1;
  }

  /* Allocate the array holding the serial result */
  double complex *const serial = fftw_malloc(VL*master_elems*sizeof(*serial));
  if(serial == NULL) {
    fprintf(stderr, "unable to allocate serial array\n");
    goto die_free_master;
  }

  /* Perform serial transform */
  fftw_plan serial_plan = fftw_plan_many_dft_r2c(1, &master_elems, VL,
      master, NULL, VL, 1, serial, NULL, VL, 1, FFTW_ESTIMATE);
  if(serial_plan == NULL) {
    fprintf(stderr, "unable to construct forward transform plan\n");
    goto die_free_serial;
  }

  /* Perform the serial transform, and complete it */
  fftw_execute(serial_plan);
  fft_r2c_1d_vec_finish(serial, master_elems, VL);

  /* Allocate space to hold the parallel transform result */
  double complex *const parallel = fftw_malloc(
      proc_elems*VL*sizeof(double complex));
  if(parallel == NULL) {
    fprintf(stderr, "unable to allocate space for parallel array\n");
    goto die_destroy_serial_plan;
  }

  /* Create the parallel plan */
  fft_par_plan par_plan = fft_par_plan_r2c_1d(MPI_COMM_WORLD, proc_elems, VL,
      master + rank*proc_elems*VL, parallel, NULL);
  if(par_plan == NULL) {
    fprintf(stderr, "unable to create parallel transform plan\n");
    goto die_free_parallel;
  }

  /* Execute the parallel transform */
  if(fft_par_execute_fwd(par_plan) != MPI_SUCCESS) {
    fprintf(stderr, "unable to execute parallel transform\n");
    goto die_destroy_par_plan;
  }

  /* Compare values */
  int sup = 0.0;
  for(int i = 0; i < proc_elems*VL; ++i) {
    sup = fmax(sup, cabs(serial[rank*proc_elems*VL + i] - parallel[i]));
  }
  if(sup < 1.0e-6) {
    ret = EXIT_SUCCESS;
  }

die_destroy_par_plan:
  fft_par_plan_destroy(par_plan);
die_free_parallel:
  fftw_free(parallel);
die_destroy_serial_plan:
  fftw_destroy_plan(serial_plan);
die_free_serial:
  fftw_free(serial);
die_free_master:
  fftw_free(master);
die_free_prng:
  free(prng);
die_finalize_mpi:
  if(MPI_Finalize() != MPI_SUCCESS) {
    fprintf(stderr, "unable to finalize MPI\n");
    ret = EXIT_FAILURE;
  }
die_immed:
  fftw_cleanup();
  return ret;
}