Пример #1
0
SO3_trans::SO3_trans(const Config & configSettings):Data(configSettings)
{
  workspace_cx =(fftw_complex**)malloc(sizeof(fftw_complex*)*NUM_THREADS);
  workspace_cx2=(fftw_complex**)malloc(sizeof(fftw_complex*)*NUM_THREADS);
  workspace_re = (double **)malloc(sizeof(double*)*NUM_THREADS);

  for(int i = 0; i<NUM_THREADS; i++)
  {
    workspace_cx[i] = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * n3);
    workspace_cx2[i] = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * n3);
    workspace_re[i] = (double *)malloc(sizeof(double)*(24*bw + 2*bw*bw));
  }

  wignerSpace = ((bw*bw)*(2+3*bw+bw*bw))/3 ;
  wigners = (double *)malloc(sizeof(double) * wignerSpace);
  wignersTrans = (double *)malloc(sizeof(double) * wignerSpace);

  weights = (double *) malloc(sizeof(double) * (2*bw));

  genWigAll( bw, wigners, workspace_re[0] );
  genWigAllTrans( bw, wignersTrans, workspace_re[0]);
  makeweights2( bw, weights );

  {
    int na[2], inembed[2], onembed[2];
    int rank, howmany, istride, idist, ostride, odist;
    howmany = n*n;
    idist = n;
    odist = n;
    rank = 2 ;
    inembed[0] = n;
    inembed[1] = n*n;
    onembed[0] = n;
    onembed[1] = n*n;
    istride = 1;
    ostride = 1;
    na[0] = 1;
    na[1] = n;

    p1 = fftw_plan_many_dft( rank, na, howmany,
        workspace_cx2[0], inembed,
        istride, idist,
        workspace_cx[0], onembed,
        ostride, odist,
        FFTW_BACKWARD, FFTW_MEASURE );
        //FFTW_BACKWARD, FFTW_PATIENT);

    p2 = fftw_plan_many_dft( rank, na, howmany,
        workspace_cx[0], inembed,
        istride, idist,
        workspace_cx2[0], onembed,
        ostride, odist,
        FFTW_FORWARD, FFTW_MEASURE );
        //FFTW_FORWARD, FFTW_PATIENT);
  }
}
Пример #2
0
PetscErrorCode MatApply_USFFT_Private(Mat A, fftw_plan *plan, int direction, Vec x,Vec y)
{
#if 0
  PetscErrorCode ierr;
  PetscScalar    *r_array, *y_array;
  Mat_USFFT* = (Mat_USFFT*)(A->data);
#endif

  PetscFunctionBegin;
#if 0
  /* resample x to usfft->resample */
  ierr = MatResample_USFFT_Private(A, x);CHKERRQ(ierr);

  /* NB: for now we use outdim for both x and y; this will change once a full USFFT is implemented */
  ierr = VecGetArray(usfft->resample,&r_array);CHKERRQ(ierr);
  ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
  if (!*plan) { /* create a plan then execute it*/
    if (usfft->dof == 1) {
#if defined(PETSC_DEBUG_USFFT)
      ierr = PetscPrintf(PetscObjectComm((PetscObject)A), "direction = %d, usfft->ndim = %d\n", direction, usfft->ndim);CHKERRQ(ierr);
      for (int ii = 0; ii < usfft->ndim; ++ii) {
        ierr = PetscPrintf(PetscObjectComm((PetscObject)A), "usfft->outdim[%d] = %d\n", ii, usfft->outdim[ii]);CHKERRQ(ierr);
      }
#endif

      switch (usfft->dim) {
      case 1:
        *plan = fftw_plan_dft_1d(usfft->outdim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,direction,usfft->p_flag);
        break;
      case 2:
        *plan = fftw_plan_dft_2d(usfft->outdim[0],usfft->outdim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,direction,usfft->p_flag);
        break;
      case 3:
        *plan = fftw_plan_dft_3d(usfft->outdim[0],usfft->outdim[1],usfft->outdim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,direction,usfft->p_flag);
        break;
      default:
        *plan = fftw_plan_dft(usfft->ndim,usfft->outdim,(fftw_complex*)x_array,(fftw_complex*)y_array,direction,usfft->p_flag);
        break;
      }
      fftw_execute(*plan);
    } /* if (dof == 1) */
    else { /* if (dof > 1) */
      *plan = fftw_plan_many_dft(/*rank*/usfft->ndim, /*n*/usfft->outdim, /*howmany*/usfft->dof,
                                 (fftw_complex*)x_array, /*nembed*/usfft->outdim, /*stride*/usfft->dof, /*dist*/1,
                                 (fftw_complex*)y_array, /*nembed*/usfft->outdim, /*stride*/usfft->dof, /*dist*/1,
                                 /*sign*/direction, /*flags*/usfft->p_flag);
      fftw_execute(*plan);
    } /* if (dof > 1) */
  } /* if (!*plan) */
  else {  /* if (*plan) */
    /* use existing plan */
    fftw_execute_dft(*plan,(fftw_complex*)x_array,(fftw_complex*)y_array);
  }
  ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
  ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
#endif
  PetscFunctionReturn(0);
} /* MatApply_USFFT_Private() */
Пример #3
0
void make_fftw_plans(int size, ft_data *ftd){
  int ncmp;
  int rank, howmany, istride, idist, ostride, odist;
  int n[1], inembed[1], onembed[1];
  int nxfm;
  unsigned flags;
  int dir;
  double dtime = start_timing();

  flags = FFTW_ESTIMATE;  /* Could try FFTW_MEASURE */
  rank = 1;
  /* Number of complex values in a 4D site datum */
  ncmp = size/sizeof(complex);
  idist = odist = 1;

  for(dir = 0; dir < NDIM; dir++)
    if(layout[dir] != NULL){
      
      nxfm = layout[dir]->nxfm;
      
      /* The FT dimension */
      n[0] = inembed[0] = onembed[0] = nxfm;
      
      /* Number of contiguous complex values per 1D coordinate being
	 transformed */
      howmany = (sites_on_node*ncmp)/nxfm;
      ostride = istride = howmany;
      
      fwd_plan[dir] = 
	fftw_plan_many_dft(rank, n, howmany, 
			   ftd->data, inembed, istride, idist, 
			   ftd->tmp, onembed, ostride, odist, 
			   FFTW_FORWARD, flags);
      bck_plan[dir] = 
	fftw_plan_many_dft(rank, n, howmany, 
			   ftd->data, inembed, istride, idist, 
			   ftd->tmp, onembed, ostride, odist, 
			   FFTW_BACKWARD, flags);
    }

  print_timing(dtime, "make FFTW plans");
}
Пример #4
0
void ifft(int N, double *in, int stride) {
	int i;
	fftw_plan p;
	p = fftw_plan_many_dft(1,&N, 1, (fftw_complex *)in, NULL, stride, 0, (fftw_complex *)in, NULL, stride, 0, FFTW_BACKWARD, FFTW_ESTIMATE);
	fftw_execute(p);
	fftw_destroy_plan(p);
	for (i=0; i<N; i++) {
		in[i*2*stride] /= N+0.0;
		in[i*2*stride+1] /= N+0.0;
	}
}	
Пример #5
0
  THREADABLE_FUNCTION_6ARG(fft4d, complex*,out, complex*,in, int*,ext_dirs, int,ncpp, double,sign, int,normalize)
  {
    GET_THREAD_ID();
    
    //first of all put in to out
    if(out!=in) vector_copy(out,in);
    
    //list all dirs
    int dirs[NDIM],ndirs=0;
    for(int mu=0;mu<NDIM;mu++) if(ext_dirs[mu]) dirs[ndirs++]=mu;
    verbosity_lv2_master_printf("Going to FFT: %d dimensions in total\n",ndirs);
    
    if(ndirs)
      {
	//allocate buffer
	complex *buf=nissa_malloc("buf",max_locd_size*ncpp,complex);
	
	//allocate plans
	fftw_plan *plans=nissa_malloc("plans",ndirs,fftw_plan);
	if(IS_MASTER_THREAD)
	  for(int idir=0;idir<ndirs;idir++)
	    plans[idir]=fftw_plan_many_dft(1,glb_size+dirs[idir],ncpp,buf,NULL,ncpp,1,buf,NULL,ncpp,1,sign,FFTW_ESTIMATE);
	THREAD_BARRIER();
	
	//transpose each dir in turn and take fft
	for(int idir=0;idir<ndirs;idir++)
	  {
	    int mu=dirs[idir];
	    verbosity_lv2_master_printf("FFT-ing dimension %d/%d=%d\n",idir+1,ndirs,mu);
	    remap_lx_vector_to_locd(buf,out,ncpp*sizeof(complex),mu);
	    
	    //makes all the fourier transform
	    NISSA_PARALLEL_LOOP(ioff,0,locd_perp_size_per_dir[mu])
	      fftw_execute_dft(plans[idir],buf+ioff*glb_size[mu]*ncpp,buf+ioff*glb_size[mu]*ncpp);
	    THREAD_BARRIER();
	    
	    remap_locd_vector_to_lx(out,buf,ncpp*sizeof(complex),mu);
	  }
	
	//destroy plans
	if(IS_MASTER_THREAD) for(int idir=0;idir<ndirs;idir++) fftw_destroy_plan(plans[idir]);
	
	//put normaliisation
	if(normalize)
	  {
	    double norm=glb_size[dirs[0]];
	    for(int idir=1;idir<ndirs;idir++) norm*=glb_size[idir];
	    double_vector_prod_double((double*)out,(double*)out,1/norm,2*ncpp*loc_vol);
	  }
	
	nissa_free(buf);
	nissa_free(plans);
      }
  }
Пример #6
0
void eigencoeffs(T *x, uint_t n, const double *tapers, const double *lambda, uint_t K, bool remove_mean, uint_t N, fftw_complex *Jkx){
        
    T mx=tmath::mean<T>(x,n);       //computes standard mean
    
    double tmp;             //temporary variable
    T wmx;                   //weighted mean
    const int nsize=N;
    
    // Remove weighted averages, reducing bias from non-centred data 
    //(forces eigenspectra to have zero DC component)
    for ( uint_t ii=0; ii<K; ii++){
        
        if (remove_mean==true){
            tmp=tmath::sum<double>(&tapers[ii*n],n);
        
            //tmp should be near zero for odd tapers (but due to round-off may not be exactly zero)
            if (  tmath::abs<double>(tmp) > ZERO_TOL ){
                wmx=tmath::dot_mult<T,double>(x,&tapers[ii*n],n);
                wmx=wmx/tmp;
            }
            else {
                // for odd DPSS sequences, the weighted average is zero
                // However, we remove the regular mean for good measure
                wmx=mx;
            } 
        }
        else {
            wmx=0;
        }
 
        for (uint_t jj=0; jj<n; jj++){
            Jkx[ii*N+jj]=x[jj]-wmx;       //prepares for in-place FFT
        }        
    }
    
    //Window the data
    for ( uint_t ii=0; ii<K; ii++){
        tmath::pw_mult<fftw_complex,double>(&Jkx[ii*N], &tapers[ii*n], n, &Jkx[ii*N] );
        //zero-pad rest of array
        for (uint_t jj=n; jj<N; jj++){
            Jkx[ii*N+jj]=0;
        }
    }
    
    // COMPUTE EIGENSPECTRA
    // computes all K ffts in one step
    fftw_load(); //potentially load fftw3
    fftw_plan px = fftw_plan_many_dft(1, &nsize, K , Jkx, NULL, 1, N, Jkx, NULL, 1, N, FFTW_FORWARD, FFTW_ESTIMATE);
    fftw_execute(px);
    fftw_destroy_plan(px);
 
}
Пример #7
0
bool do_ifft_1d_c2r(int M, int N, float* out, float* in)
{
    /*
	if (num_threads>1) {
		fftw_init_threads();
		fftw_plan_with_nthreads(num_threads);
	}
	*/

    int MN = M * N;

    fftw_complex* in2 = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * MN);
    fftw_complex* out2 = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * MN);
    for (int ii = 0; ii < MN; ii++) {
        in2[ii][0] = in[ii * 2];
        in2[ii][1] = in[ii * 2 + 1];
    }

    fftw_plan p;
    int rank = 1;
    int n[] = { N };
    int howmany = M;
    int* inembed = n;
    int istride = M;
    int idist = 1;
    int* onembed = n;
    int ostride = M;
    int odist = 1;
    int sign = FFTW_BACKWARD;
    unsigned flags = FFTW_ESTIMATE;
#pragma omp critical
    p = fftw_plan_many_dft(rank, n, howmany, in2, inembed, istride, idist, out2, onembed, ostride, odist, sign, flags);
    //p=fftw_plan_dft_1d(N,in2,out2,FFTW_BACKWARD,FFTW_ESTIMATE);

    fftw_execute(p);
    for (int ii = 0; ii < MN; ii++) {
        out[ii] = out2[ii][0];
    }
    fftw_free(in2);
    fftw_free(out2);

/*
	if (num_threads>1) {
		fftw_cleanup_threads();
	}
	*/

#pragma omp critical
    fftw_destroy_plan(p);

    return true;
}
Пример #8
0
/**
 * fft makes an 1D-ftt for every knot through
 * all layers
 */
static void fft(int N,int M,int Z, fftw_complex *mem)
{
  fftw_plan plan;
  plan = fftw_plan_many_dft(1, &Z, N*N,
                                  mem, NULL,
                                  N*N, 1,
                                  mem, NULL,
                                  N*N,1 ,
                                  FFTW_FORWARD, FFTW_ESTIMATE);

  fftw_execute(plan); /* execute the fft */
  fftw_destroy_plan(plan);
}
Пример #9
0
fftw_plan spinor_fftw_plan2d(spinor *spinor_in,spinor *spinor_out,int dim0,int dim1,int howmany_wospin,unsigned int forward,int fftw_flags){

/*    int index_s = gsi(get_index(it, ix, iy, iz, T, L)); */
/*    double *xi_ = xi + index_s; */

  int Dim1[2];
/*    cerr << "Trying to create a plan for T=" << T << " L=" << L ; */
/*    cerr.flush(); */

  int rank=2;

  int stride=12*howmany_wospin;
  int dist=1;
  int howmany=12*howmany_wospin;
  fftw_plan plan;


  Dim1[0]=dim0;
  Dim1[1]=dim1;


  if(fftw_flags==-1){fftw_flags=FFTW_ESTIMATE;}
  if(forward){
    plan=fftw_plan_many_dft(rank, Dim1, howmany, (fftw_complex*)spinor_in, NULL, stride, dist, 
				      (fftw_complex*)spinor_out,NULL,stride,dist,
				      FFTW_FORWARD,fftw_flags);
  } else {
    plan=fftw_plan_many_dft(rank, Dim1, howmany, (fftw_complex*)spinor_in, NULL, stride, dist, 
				      (fftw_complex*)spinor_out,NULL,stride,dist,
				      FFTW_BACKWARD,fftw_flags);
  }
/*    if(plan!=NULL) cerr << "  [OK]"<< endl; */
/*    else cerr << "  [FAIL]"<< endl; */
/*    cerr.flush(); */

 return plan;

}
Пример #10
0
Fourier::Fourier(int n)
{

  const char* fname = "void Fourier::Initialize()";

  VRB.Debug(fname, "Allocating memory and creating plans for FFTW.");

  batch_size = n;

#ifdef USE_SINGLE
  b = (fftComplex*) fftwf_malloc(batch_size*GJP.Vol()*sizeof(fftComplex));
#endif
#ifdef USE_DOUBLE
  b = (fftComplex*) fftw_malloc(batch_size*GJP.Vol()*sizeof(fftComplex));
#endif
#ifdef USE_LONG_DOUBLE
  b = (fftComplex*) fftwl_malloc(batch_size*GJP.Vol()*sizeof(fftComplex));
#endif

  // Below needs to be adjusted for batch ffts; double check in place
  int vol = GJP.Vol();
  int dims[3] = { GJP.Xsites(), GJP.Ysites(), GJP.Zsites() };

#ifdef USE_SINGLE
  p1 = fftwf_plan_many_dft(3, dims, batch_size, b, NULL, 1, vol, b, NULL, 1, vol, FFTW_BACKWARD, FFTW_EXHAUSTIVE); 
  p2 = fftwf_plan_many_dft(3, dims, batch_size, b, NULL, 1, vol, b, NULL, 1, vol, FFTW_FORWARD, FFTW_EXHAUSTIVE); 
#endif
#ifdef USE_DOUBLE
  p1 = fftw_plan_many_dft(3, dims, batch_size, b, NULL, 1, vol, b, NULL, 1, vol, FFTW_BACKWARD, FFTW_EXHAUSTIVE); 
  p2 = fftw_plan_many_dft(3, dims, batch_size, b, NULL, 1, vol, b, NULL, 1, vol, FFTW_FORWARD, FFTW_EXHAUSTIVE); 
#endif
#ifdef USE_LONG_DOUBLE
  p1 = fftwl_plan_many_dft(3, dims, batch_size, b, NULL, 1, vol, b, NULL, 1, vol, FFTW_BACKWARD, FFTW_EXHAUSTIVE); 
  p2 = fftwl_plan_many_dft(3, dims, batch_size, b, NULL, 1, vol, b, NULL, 1, vol, FFTW_FORWARD, FFTW_EXHAUSTIVE); 
#endif


}
Пример #11
0
int main(int argc, char **argv)
{
  fftw_complex *mem;
  fftw_plan plan;
  int N,M,Z;

  if (argc <= 6) {
    printf("usage: ./reconstruct_data_gridding FILENAME N M Z ITER WEIGHTS\n");
    return 1;
  }

  N=atoi(argv[2]);
  M=atoi(argv[3]);
  Z=atoi(argv[4]);

  /* Allocate memory to hold every slice in memory after the
  2D-infft */
  mem = (fftw_complex*) nfft_malloc(sizeof(fftw_complex) * atoi(argv[2]) * atoi(argv[2]) * atoi(argv[4]));

  /* Create plan for the 1d-ifft */
  plan = fftw_plan_many_dft(1, &Z, N*N,
                                  mem, NULL,
                                  N*N, 1,
                                  mem, NULL,
                                  N*N,1 ,
                                  FFTW_BACKWARD, FFTW_MEASURE);

  /* execute the 2d-nfft's */
  reconstruct(argv[1],atoi(argv[2]),atoi(argv[3]),atoi(argv[4]),atoi(argv[6]),mem);

  /* execute the 1d-fft's */
  fftw_execute(plan);

  /* write the memory back in files */
  print(N,M,Z, mem);

  /* free memory */
  nfft_free(mem);

  return 1;
}
Пример #12
0
Файл: fft.c Проект: darien0/cow
FFT_DATA *_fwd(cow_dfield *f, double *fx, int start, int stride)
{
  FFT_DATA *Fk = NULL;
  FFT_DATA *Fx = NULL;
  if (cow_mpirunning()) {
#if (COW_MPI)
    int nbuf;
    long long ntot = cow_domain_getnumglobalzones(f->domain, COW_ALL_DIMS);
    struct fft_plan_3d *plan = call_fft_plan_3d(f->domain, &nbuf);
    Fx = (FFT_DATA*) malloc(nbuf * sizeof(FFT_DATA));
    Fk = (FFT_DATA*) malloc(nbuf * sizeof(FFT_DATA));
    for (int n=0; n<nbuf; ++n) {
      Fx[n][0] = fx[stride * n + start] / ntot;
      Fx[n][1] = 0.0;
    }
    fft_3d(Fx, Fk, FFT_FWD, plan);
    free(Fx);
    fft_3d_destroy_plan(plan);
#endif // COW_MPI
  }
  else {
    int nbuf = cow_domain_getnumlocalzonesinterior(f->domain, COW_ALL_DIMS);
    long long ntot = cow_domain_getnumglobalzones(f->domain, COW_ALL_DIMS);
    Fx = (FFT_DATA*) malloc(nbuf * sizeof(FFT_DATA));
    Fk = (FFT_DATA*) malloc(nbuf * sizeof(FFT_DATA));
    for (int n=0; n<nbuf; ++n) {
      Fx[n][0] = fx[stride * n + start] / ntot;
      Fx[n][1] = 0.0;
    }
    int *N = f->domain->L_nint;
    fftw_plan plan = fftw_plan_many_dft(3, N, 1,
					Fx, NULL, 1, 0,
					Fk, NULL, 1, 0,
					FFTW_FORWARD, FFTW_ESTIMATE);
    fftw_execute(plan);
    fftw_destroy_plan(plan);
    free(Fx);
  }
  return Fk;
}
Пример #13
0
double *_rev(cow_domain *d, FFT_DATA *Fk)
{
  FFT_DATA *Fx = NULL;
  double *fx = NULL;
  if (cow_mpirunning()) {
#if (COW_MPI)
  int nbuf;
  long long ntot = cow_domain_getnumglobalzones(d, COW_ALL_DIMS);
  struct fft_plan_3d *plan = call_fft_plan_3d(d, &nbuf);
  fx = (double*) malloc(nbuf * sizeof(double));
  Fx = (FFT_DATA*) malloc(nbuf * sizeof(FFT_DATA));
  fft_3d(Fk, Fx, FFT_REV, plan);
  for (int n=0; n<nbuf; ++n) {
    fx[n] = Fx[n][0] / ntot;
  }
  free(Fx);
  fft_3d_destroy_plan(plan);
#endif // COW_MPI
  }
  else {
    int nbuf = cow_domain_getnumlocalzonesinterior(d, COW_ALL_DIMS);
    long long ntot = cow_domain_getnumglobalzones(d, COW_ALL_DIMS);
    fx = (double*) malloc(nbuf * sizeof(double));
    Fx = (FFT_DATA*) malloc(nbuf * sizeof(FFT_DATA));
    int *N = d->L_nint;
    fftw_plan plan = fftw_plan_many_dft(3, N, 1,
					Fk, NULL, 1, 0,
					Fx, NULL, 1, 0,
					FFTW_BACKWARD, FFTW_ESTIMATE);
    fftw_execute(plan);
    for (int n=0; n<nbuf; ++n) {
      fx[n] = Fx[n][0] / ntot;
    }
    free(Fx);
    fftw_destroy_plan(plan);
  }
  return fx;
}
Пример #14
0
fftw_plan nl_createplan (lua_State *L,
    nl_Matrix *m, int inverse, unsigned flags, lua_Number *scale) {
  fftw_plan plan;
  int i;
  nl_Buffer *dim = nl_getbuffer(L, m->ndims);
  for (i = 0; i < m->ndims; i++) /* reverse dims */
    dim->data.bint[i] = m->dim[m->ndims - 1 - i];
  *scale = 1.0 / m->size;
  if (m->iscomplex) { /* fft plan? */
    /* in-place, howmany == 1, dist ignored, nembed == n */
    plan = fftw_plan_many_dft(m->ndims, (const int *) dim->data.bint, 1,
        (fftw_complex *) m->data, NULL, m->stride, 0,
        (fftw_complex *) m->data, NULL, m->stride, 0,
        inverse ? FFTW_BACKWARD : FFTW_FORWARD, flags);
  }
  else { /* fct plan? */
    nl_Buffer *kind = nl_getbuffer(L, m->ndims);
    if (inverse) {
      for (i = 0; i < m->ndims; i++) {
        kind->data.bint[i] = FFTW_REDFT01;
        *scale *= 0.5;
      }
    }
    else {
      for (i = 0; i < m->ndims; i++)
        kind->data.bint[i] = FFTW_REDFT10;
    }
    /* in-place, howmany == 1, dist ignored, nembed == n */
    plan = fftw_plan_many_r2r(m->ndims, (const int *) dim->data.bint, 1,
        m->data, NULL, m->stride, 0,
        m->data, NULL, m->stride, 0,
        (const fftw_r2r_kind *) kind->data.bint, flags);
    nl_freebuffer(kind);
  }
  nl_freebuffer(dim);
  return plan;
}
Пример #15
0
/**
 * @brief Complex DFT and inverse DFT calculation wrapper
 *
 * Compute the DFT or inverse DFT of the complex matrix X of size (nvar x nobs) 
 * into the complex matrix Y of size (nvar x nobs) depending on the sign of sign
 * parameter.
 * 
 * @param[out] Y Output matrix of complex numbers (double[2]) of size (nvar x nobs)
 * @param[in] X Input matrix of complex numbers (double[2]) of size (nvar x nobs) [can be the same as Y]
 * @param[in] nvar Number of variables (rows) within X and Y
 * @param[in] nobs Number of observations (columns) within X and Y
 * @param[in] sign If -1 computes the DFT, if 1 computes the inverse DFT of X
 *
 * @return Pointer to Y or NULL if DFT fails
 */
double *_fft(double *Y, const double *X, const unsigned long nvar, const unsigned long nobs, int sign) {
  const int n = nvar;
  unsigned long i,nelem;
  
  fftw_plan plan = fftw_plan_many_dft(1, // [int rank] Rank 1 DFT
  																	&n, // [const int *n] Number of variables within input array
  																	nobs, // [int howmany] Number of observations (number of DFT to perform)
                                    (fftw_complex *) X, // [fftw_complex *in] Input array is X (it is cast to non-const but will not be modified since FFTW_DESTROY_INPUT is not set)
                                    NULL, // [const int *inembed] Distance between each rank in input array (Not used since rank=1)
                                    1, // [int istride] Distance between successive variables in input array (in unit of fftw_complex)
                                    nvar, // [int idist] Distance between 2 observations in input array (in unit of fftw_complex)
                                    (fftw_complex *) Y, // [fftw_complex *out] Output array is Y
                                    NULL, // [const int *onembed] Distance between each rank in output array (Not used since rank=1)
                                    1, // [int ostride] Distance between successive variables in output array (in unit of fftw_complex)
                                    nvar, // [int odist] Distance between 2 observations in output array (in unit of fftw_complex)
                                    sign, // sign of the exponent in the formula that defines the Fourier transform (-1 or +1)
                                    FFTW_ESTIMATE); // [unsigned flags] Quickly choose a plan without performing full benchmarks (maybe sub-optimal but take less time)
  // If plan building fails, quit
  if(!plan)
    return NULL;
  
  // Execute FFTW plan
  fftw_execute(plan);
  
  // If we compute the inverse transform (sign == 1), normalize result by nvar (FFTW compute unnormalized transform)
  if(sign == 1) {
    nelem = 2 * nvar * nobs;
    for(i = 0; i < nelem; i++)
      Y[i] /= nvar;
  }
      
  
  // Destroy FFTW plan
  fftw_destroy_plan(plan);
  
  return Y;
}
Пример #16
0
int main(int argc, char *argv[])
{
  int ret = EXIT_FAILURE;

  // Set up the PRNG
  dsfmt_t *dsfmt = malloc(sizeof(dsfmt_t));
  if(dsfmt == NULL) {
    fprintf(stdout, "unable to allocate PRNG\n");
    goto skip_deallocate_prng;
  }
  dsfmt_init_gen_rand(dsfmt, SEED);

  // Set up the source values
  double *src = fftw_malloc(N*VL*sizeof(double));
  if(src == NULL) {
    fprintf(stdout, "unable to allocate source vector\n");
    goto skip_deallocate_src;
  }
  for(unsigned int i = 0; i < N*VL; ++i) {
    src[i] = dsfmt_genrand_open_close(dsfmt);
  }

  // Allocate the FFT destination array
  double complex *fft = fftw_malloc(N*VL*sizeof(double complex));
  if(fft == NULL) {
    fprintf(stdout, "unable to allocate fft vector\n");
    goto skip_deallocate_fft;
  }

  // Execute the forward FFT
  fftw_plan fwd_plan = fftw_plan_many_dft_r2c(1, &N, VL,
      src, NULL, VL, 1, fft, NULL, VL, 1, FFTW_ESTIMATE);
  if(fwd_plan == NULL) {
    fprintf(stdout, "unable to allocate fft forward plan\n");
    goto skip_deallocate_fwd_plan;
  }
  fftw_execute(fwd_plan);

  // Fill in the rest of the destination values using the Hermitian property.
  fft_r2c_1d_vec_finish(fft, N, VL);

  // Allocate the reverse FFT destination array
  double complex *dst = fftw_malloc(N*VL*sizeof(double complex));
  if(dst == NULL) {
    fprintf(stdout, "unable to allocate dst vector\n");
    goto skip_deallocate_dst;
  }

  // Perform the reverse FFT
  fftw_plan rev_plan = fftw_plan_many_dft(1, &N, VL, fft, NULL, VL, 1,
      dst, NULL, VL, 1, FFTW_BACKWARD, FFTW_ESTIMATE);
  if(rev_plan == NULL) {
    fprintf(stdout, "unable to allocate fft reverse plan\n");
    goto skip_deallocate_rev_plan;
  }
  fftw_execute(rev_plan);

  // Compare the two vectors by sup norm
  double norm = 0.0;
  for(unsigned int i = 0; i < N*VL; ++i) {
    // Divide the resulting by N, because FFTW computes the un-normalized DFT:
    // the forward followed by reverse transform scales the data by N.
    norm = fmax(norm, cabs(dst[i]/N - src[i]));
  }
  if(norm <= 1e-6) {
    ret = EXIT_SUCCESS;
  }

  fftw_destroy_plan(rev_plan);
skip_deallocate_rev_plan:
  fftw_free(dst);
skip_deallocate_dst:
  fftw_destroy_plan(fwd_plan);
skip_deallocate_fwd_plan:
  fftw_free(fft);
skip_deallocate_fft:
  fftw_free(src);
skip_deallocate_src:
  free(dsfmt);
skip_deallocate_prng:
  // Keep valgrind happy by having fftw clean up its internal structures. This
  // helps ensure we aren't leaking memory.
  fftw_cleanup();
  return ret;
}
Пример #17
0
void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan)
{
  int i,total,length,num;
  double norm;
  FFT_DATA *data,*copy;

  /* pre-remap to prepare for 1st FFTs if needed
     copy = loc for remap result */

  if (plan->pre_plan) {
    if (plan->pre_target == 0)
      copy = out;
    else
      copy = plan->copy;
    remap_3d((double *) in, (double *) copy, (double *) plan->scratch,
             plan->pre_plan);
    data = copy;
  }
  else
    data = in;


  // ---------------------------------------------------------------------------
  // 1d FFTs along mid axis
  // ---------------------------------------------------------------------------
  total = plan->total1;
  length = plan->length1;
  {
    int sign = flag == +1 ? FFTW_FORWARD : FFTW_BACKWARD;
    int N = length;
    fftw_plan fftplan = fftw_plan_many_dft(1, &N, total/length,
					   data, NULL,
					   1, length,
					   data, NULL,
					   1, length,
					   sign, FFTW_ESTIMATE);
    fftw_execute(fftplan);
    fftw_destroy_plan(fftplan);
  }
  /* 1st mid-remap to prepare for 2nd FFTs
     copy = loc for remap result */
  if (plan->mid1_target == 0)
    copy = out;
  else
    copy = plan->copy;
  remap_3d((double *) data, (double *) copy, (double *) plan->scratch,
           plan->mid1_plan);
  data = copy;


  // ---------------------------------------------------------------------------
  // 1d FFTs along mid axis
  // ---------------------------------------------------------------------------
  total = plan->total2;
  length = plan->length2;
  {
    int sign = flag == +1 ? FFTW_FORWARD : FFTW_BACKWARD;
    int N = length;
    fftw_plan fftplan = fftw_plan_many_dft(1, &N, total/length,
					   data, NULL,
					   1, length,
					   data, NULL,
					   1, length,
					   sign, FFTW_ESTIMATE);
    fftw_execute(fftplan);
    fftw_destroy_plan(fftplan);
  }
  /* 2nd mid-remap to prepare for 3rd FFTs
     copy = loc for remap result */
  if (plan->mid2_target == 0)
    copy = out;
  else
    copy = plan->copy;
  remap_3d((double *) data, (double *) copy, (double *) plan->scratch,
           plan->mid2_plan);
  data = copy;


  // ---------------------------------------------------------------------------
  // 1d FFTs along slow axis
  // ---------------------------------------------------------------------------
  total = plan->total3;
  length = plan->length3;
  {
    int sign = flag == +1 ? FFTW_FORWARD : FFTW_BACKWARD;
    int N = length;
    fftw_plan fftplan = fftw_plan_many_dft(1, &N, total/length,
					   data, NULL,
					   1, length,
					   data, NULL,
					   1, length,
					   sign, FFTW_ESTIMATE);
    fftw_execute(fftplan);
    fftw_destroy_plan(fftplan);
  }

  /* post-remap to put data in output format if needed
     destination is always out */
  if (plan->post_plan)
    remap_3d((double *) data, (double *) out, (double *) plan->scratch,
             plan->post_plan);

  /* scaling if required */
  if (flag == -1 && plan->scaled) {
    norm = plan->norm;
    num = plan->normnum;
    for (i = 0; i < num; i++) {
      out[i][0] *= norm;
      out[i][1] *= norm;
    }
  }
}
Пример #18
0
int dfft_init(double **data, 
	      int *local_mesh_dim, int *local_mesh_margin, 
	      int* global_mesh_dim, double *global_mesh_off,
	      int *ks_pnum)
{
  int i,j;
  /* helpers */
  int mult[3];

  int n_grid[4][3]; /* The four node grids. */
  int my_pos[4][3]; /* The position of this_node in the node grids. */
  int *n_id[4];     /* linear node identity lists for the node grids. */
  int *n_pos[4];    /* positions of nodes in the node grids. */
  /* FFTW WISDOM stuff. */
  char wisdom_file_name[255];
  FILE *wisdom_file;
  int wisdom_status;

  FFT_TRACE(fprintf(stderr,"%d: dipolar dfft_init():\n",this_node));


  dfft.max_comm_size=0; dfft.max_mesh_size=0;
  for(i=0;i<4;i++) {
    n_id[i]  = (int *) malloc(1*n_nodes*sizeof(int));
    n_pos[i] = (int *) malloc(3*n_nodes*sizeof(int));
  }

  /* === node grids === */
  /* real space node grid (n_grid[0]) */
  for(i=0;i<3;i++) {
    n_grid[0][i] = node_grid[i];
    my_pos[0][i] = node_pos[i];
  }
  for(i=0;i<n_nodes;i++) {
    map_node_array(i,&(n_pos[0][3*i+0]));
    n_id[0][get_linear_index( n_pos[0][3*i+0],n_pos[0][3*i+1],n_pos[0][3*i+2], n_grid[0])] = i;
  }
    
  /* FFT node grids (n_grid[1 - 3]) */
  calc_2d_grid(n_nodes,n_grid[1]);
  /* resort n_grid[1] dimensions if necessary */
  dfft.plan[1].row_dir = map_3don2d_grid(n_grid[0], n_grid[1], mult);
  dfft.plan[0].n_permute = 0;
  for(i=1;i<4;i++) dfft.plan[i].n_permute = (dfft.plan[1].row_dir+i)%3;
  for(i=0;i<3;i++) {
    n_grid[2][i] = n_grid[1][(i+1)%3];
    n_grid[3][i] = n_grid[1][(i+2)%3];
  }
  dfft.plan[2].row_dir = (dfft.plan[1].row_dir-1)%3;
  dfft.plan[3].row_dir = (dfft.plan[1].row_dir-2)%3;



  /* === communication groups === */
  /* copy local mesh off real space charge assignment grid */
  for(i=0;i<3;i++) dfft.plan[0].new_mesh[i] = local_mesh_dim[i];
  for(i=1; i<4;i++) {
    dfft.plan[i].g_size=fft_find_comm_groups(n_grid[i-1], n_grid[i], n_id[i-1], n_id[i], 
					dfft.plan[i].group, n_pos[i], my_pos[i]);
    if(dfft.plan[i].g_size==-1) {
      /* try permutation */
      j = n_grid[i][(dfft.plan[i].row_dir+1)%3];
      n_grid[i][(dfft.plan[i].row_dir+1)%3] = n_grid[i][(dfft.plan[i].row_dir+2)%3];
      n_grid[i][(dfft.plan[i].row_dir+2)%3] = j;
      dfft.plan[i].g_size=fft_find_comm_groups(n_grid[i-1], n_grid[i], n_id[i-1], n_id[i], 
					  dfft.plan[i].group, n_pos[i], my_pos[i]);
      if(dfft.plan[i].g_size==-1) {
	fprintf(stderr,"%d: dipolar INTERNAL ERROR: fft_find_comm_groups error\n", this_node);
	errexit();
      }
    }

    dfft.plan[i].send_block = (int *)realloc(dfft.plan[i].send_block, 6*dfft.plan[i].g_size*sizeof(int));
    dfft.plan[i].send_size  = (int *)realloc(dfft.plan[i].send_size, 1*dfft.plan[i].g_size*sizeof(int));
    dfft.plan[i].recv_block = (int *)realloc(dfft.plan[i].recv_block, 6*dfft.plan[i].g_size*sizeof(int));
    dfft.plan[i].recv_size  = (int *)realloc(dfft.plan[i].recv_size, 1*dfft.plan[i].g_size*sizeof(int));

    dfft.plan[i].new_size = fft_calc_local_mesh(my_pos[i], n_grid[i], global_mesh_dim,
					   global_mesh_off, dfft.plan[i].new_mesh, 
					   dfft.plan[i].start);  
    permute_ifield(dfft.plan[i].new_mesh,3,-(dfft.plan[i].n_permute));
    permute_ifield(dfft.plan[i].start,3,-(dfft.plan[i].n_permute));
    dfft.plan[i].n_ffts = dfft.plan[i].new_mesh[0]*dfft.plan[i].new_mesh[1];

    /* === send/recv block specifications === */
    for(j=0; j<dfft.plan[i].g_size; j++) {
      int k, node;
      /* send block: this_node to comm-group-node i (identity: node) */
      node = dfft.plan[i].group[j];
      dfft.plan[i].send_size[j] 
	= fft_calc_send_block(my_pos[i-1], n_grid[i-1], &(n_pos[i][3*node]), n_grid[i],
			      global_mesh_dim, global_mesh_off, &(dfft.plan[i].send_block[6*j]));
      permute_ifield(&(dfft.plan[i].send_block[6*j]),3,-(dfft.plan[i-1].n_permute));
      permute_ifield(&(dfft.plan[i].send_block[6*j+3]),3,-(dfft.plan[i-1].n_permute));
      if(dfft.plan[i].send_size[j] > dfft.max_comm_size) 
	dfft.max_comm_size = dfft.plan[i].send_size[j];
      /* First plan send blocks have to be adjusted, since the CA grid
	 may have an additional margin outside the actual domain of the
	 node */
      if(i==1) {
	for(k=0;k<3;k++) 
	  dfft.plan[1].send_block[6*j+k  ] += local_mesh_margin[2*k];
      }
      /* recv block: this_node from comm-group-node i (identity: node) */
      dfft.plan[i].recv_size[j] 
	= fft_calc_send_block(my_pos[i], n_grid[i], &(n_pos[i-1][3*node]), n_grid[i-1],
			      global_mesh_dim, global_mesh_off,&(dfft.plan[i].recv_block[6*j]));
      permute_ifield(&(dfft.plan[i].recv_block[6*j]),3,-(dfft.plan[i].n_permute));
      permute_ifield(&(dfft.plan[i].recv_block[6*j+3]),3,-(dfft.plan[i].n_permute));
      if(dfft.plan[i].recv_size[j] > dfft.max_comm_size) 
	dfft.max_comm_size = dfft.plan[i].recv_size[j];
    }

    for(j=0;j<3;j++) dfft.plan[i].old_mesh[j] = dfft.plan[i-1].new_mesh[j];
    if(i==1) 
      dfft.plan[i].element = 1; 
    else {
      dfft.plan[i].element = 2;
      for(j=0; j<dfft.plan[i].g_size; j++) {
	dfft.plan[i].send_size[j] *= 2;
	dfft.plan[i].recv_size[j] *= 2;
      }
    }
    /* DEBUG */
    for(j=0;j<n_nodes;j++) {
      /* MPI_Barrier(comm_cart); */
      if(j==this_node) FFT_TRACE(fft_print_fft_plan(dfft.plan[i]));
    }
  }

  /* Factor 2 for complex fields */
  dfft.max_comm_size *= 2;
  dfft.max_mesh_size = (local_mesh_dim[0]*local_mesh_dim[1]*local_mesh_dim[2]);
  for(i=1;i<4;i++) 
    if(2*dfft.plan[i].new_size > dfft.max_mesh_size) dfft.max_mesh_size = 2*dfft.plan[i].new_size;

  FFT_TRACE(fprintf(stderr,"%d: dfft.max_comm_size = %d, dfft.max_mesh_size = %d\n",
		    this_node,dfft.max_comm_size,dfft.max_mesh_size));

  /* === pack function === */
  for(i=1;i<4;i++) {
    dfft.plan[i].pack_function = fft_pack_block_permute2; 
    FFT_TRACE(fprintf(stderr,"%d: forw plan[%d] permute 2 \n",this_node,i));
  }
  (*ks_pnum)=6;
  if(dfft.plan[1].row_dir==2) {
    dfft.plan[1].pack_function = fft_pack_block;
    FFT_TRACE(fprintf(stderr,"%d: forw plan[%d] permute 0 \n",this_node,1));
    (*ks_pnum)=4;
  }
  else if(dfft.plan[1].row_dir==1) {
    dfft.plan[1].pack_function = fft_pack_block_permute1;
    FFT_TRACE(fprintf(stderr,"%d: forw plan[%d] permute 1 \n",this_node,1));
    (*ks_pnum)=5;
  }
  
  /* Factor 2 for complex numbers */
  dfft.send_buf = (double *)realloc(dfft.send_buf, dfft.max_comm_size*sizeof(double));
  dfft.recv_buf = (double *)realloc(dfft.recv_buf, dfft.max_comm_size*sizeof(double));
  (*data)  = (double *)realloc((*data), dfft.max_mesh_size*sizeof(double));
  dfft.data_buf = (double *)realloc(dfft.data_buf, dfft.max_mesh_size*sizeof(double));
  if(!(*data) || !dfft.data_buf || !dfft.recv_buf || !dfft.send_buf) {
    fprintf(stderr,"%d: Could not allocate FFT data arays\n",this_node);
    errexit();
  }

  fftw_complex *c_data     = (fftw_complex *) (*data);

  /* === FFT Routines (Using FFTW / RFFTW package)=== */
  for(i=1;i<4;i++) {
    dfft.plan[i].dir = FFTW_FORWARD;   
    /* FFT plan creation. 
       Attention: destroys contents of c_data/data and c_data_buf/data_buf. */
    wisdom_status   = FFTW_FAILURE;
    sprintf(wisdom_file_name,"dfftw3_1d_wisdom_forw_n%d.file",
	    dfft.plan[i].new_mesh[2]);
    if( (wisdom_file=fopen(wisdom_file_name,"r"))!=NULL ) {
      wisdom_status = fftw_import_wisdom_from_file(wisdom_file);
      fclose(wisdom_file);
    }
    if(dfft.init_tag==1) fftw_destroy_plan(dfft.plan[i].our_fftw_plan);
//printf("dfft.plan[%d].n_ffts=%d\n",i,dfft.plan[i].n_ffts);
    dfft.plan[i].our_fftw_plan =
      fftw_plan_many_dft(1,&dfft.plan[i].new_mesh[2],dfft.plan[i].n_ffts,
                         c_data,NULL,1,dfft.plan[i].new_mesh[2],
                         c_data,NULL,1,dfft.plan[i].new_mesh[2],
                         dfft.plan[i].dir,FFTW_PATIENT);
    if( wisdom_status == FFTW_FAILURE && 
	(wisdom_file=fopen(wisdom_file_name,"w"))!=NULL ) {
      fftw_export_wisdom_to_file(wisdom_file);
      fclose(wisdom_file);
    }
    dfft.plan[i].fft_function = fftw_execute;        
  }

  /* === The BACK Direction === */
  /* this is needed because slightly different functions are used */
  for(i=1;i<4;i++) {
    dfft.back[i].dir = FFTW_BACKWARD;
    wisdom_status   = FFTW_FAILURE;
    sprintf(wisdom_file_name,"dfftw3_1d_wisdom_back_n%d.file",
	    dfft.plan[i].new_mesh[2]);
    if( (wisdom_file=fopen(wisdom_file_name,"r"))!=NULL ) {
      wisdom_status = fftw_import_wisdom_from_file(wisdom_file);
      fclose(wisdom_file);
    }    
    if(dfft.init_tag==1) fftw_destroy_plan(dfft.back[i].our_fftw_plan);
    dfft.back[i].our_fftw_plan =
      fftw_plan_many_dft(1,&dfft.plan[i].new_mesh[2],dfft.plan[i].n_ffts,
                         c_data,NULL,1,dfft.plan[i].new_mesh[2],
                         c_data,NULL,1,dfft.plan[i].new_mesh[2],
                         dfft.back[i].dir,FFTW_PATIENT);
    if( wisdom_status == FFTW_FAILURE && 
	(wisdom_file=fopen(wisdom_file_name,"w"))!=NULL ) {
      fftw_export_wisdom_to_file(wisdom_file);
      fclose(wisdom_file);
    }
    dfft.back[i].fft_function = fftw_execute;
    dfft.back[i].pack_function = fft_pack_block_permute1;
    FFT_TRACE(fprintf(stderr,"%d: back plan[%d] permute 1 \n",this_node,i));
  }
  if(dfft.plan[1].row_dir==2) {
    dfft.back[1].pack_function = fft_pack_block;
    FFT_TRACE(fprintf(stderr,"%d: back plan[%d] permute 0 \n",this_node,1));
  }
  else if(dfft.plan[1].row_dir==1) {
    dfft.back[1].pack_function = fft_pack_block_permute2;
    FFT_TRACE(fprintf(stderr,"%d: back plan[%d] permute 2 \n",this_node,1));
  }
  dfft.init_tag=1;
  /* free(data); */
  for(i=0;i<4;i++) { free(n_id[i]); free(n_pos[i]); }
  return dfft.max_mesh_size; 
}
Пример #19
0
Array<T> fftconvolve(Array<T> const& signal, Array<T> const& filter,
                     const bool expand, ConvolveBatchKind kind)
{
    const af::dim4 sd = signal.dims();
    const af::dim4 fd = filter.dims();

    dim_t fftScale = 1;

    af::dim4 packed_dims;
    int fft_dims[baseDim];
    af::dim4 sig_tmp_dims, sig_tmp_strides;
    af::dim4 filter_tmp_dims, filter_tmp_strides;

    // Pack both signal and filter on same memory array, this will ensure
    // better use of batched cuFFT capabilities
    for (dim_t k = 0; k < 4; k++) {
        if (k < baseDim)
            packed_dims[k] = nextpow2((unsigned)(sd[k] + fd[k] - 1));
        else if (k == baseDim)
            packed_dims[k] = sd[k] + fd[k];
        else
            packed_dims[k] = 1;

        if (k < baseDim) {
            fft_dims[baseDim-k-1] = (k == 0) ? packed_dims[k] / 2 : packed_dims[k];
            fftScale *= fft_dims[baseDim-k-1];
        }
    }

    Array<convT> packed = createEmptyArray<convT>(packed_dims);
    convT *packed_ptr = packed.get();

    const af::dim4 packed_strides = packed.strides();

    sig_tmp_dims[0]    = filter_tmp_dims[0] = packed_dims[0];
    sig_tmp_strides[0] = filter_tmp_strides[0] = 1;

    for (dim_t k = 1; k < 4; k++) {
        if (k < baseDim) {
            sig_tmp_dims[k]    = packed_dims[k];
            filter_tmp_dims[k] = packed_dims[k];
        }
        else {
            sig_tmp_dims[k]    = sd[k];
            filter_tmp_dims[k] = fd[k];
        }

        sig_tmp_strides[k]    = sig_tmp_strides[k - 1] * sig_tmp_dims[k - 1];
        filter_tmp_strides[k] = filter_tmp_strides[k - 1] * filter_tmp_dims[k - 1];
    }

    // Calculate memory offsets for packed signal and filter
    convT *sig_tmp_ptr    = packed_ptr;
    convT *filter_tmp_ptr = packed_ptr + sig_tmp_strides[3] * sig_tmp_dims[3];

    // Number of packed complex elements in dimension 0
    dim_t sig_half_d0 = divup(sd[0], 2);

    // Pack signal in a complex matrix where first dimension is half the input
    // (allows faster FFT computation) and pad array to a power of 2 with 0s
    packData<convT, T>(sig_tmp_ptr, sig_tmp_dims, sig_tmp_strides, signal);

    // Pad filter array with 0s
    padArray<convT, T>(filter_tmp_ptr, filter_tmp_dims, filter_tmp_strides, filter);

    // Compute forward FFT
    if (isDouble) {
        fftw_plan plan = fftw_plan_many_dft(baseDim,
                                            fft_dims,
                                            packed_dims[baseDim],
                                            (fftw_complex*)packed.get(),
                                            NULL,
                                            packed_strides[0],
                                            packed_strides[baseDim] / 2,
                                            (fftw_complex*)packed.get(),
                                            NULL,
                                            packed_strides[0],
                                            packed_strides[baseDim] / 2,
                                            FFTW_FORWARD,
                                            FFTW_ESTIMATE);

        fftw_execute(plan);
        fftw_destroy_plan(plan);
    }
    else {
        fftwf_plan plan = fftwf_plan_many_dft(baseDim,
                                              fft_dims,
                                              packed_dims[baseDim],
                                              (fftwf_complex*)packed.get(),
                                              NULL,
                                              packed_strides[0],
                                              packed_strides[baseDim] / 2,
                                              (fftwf_complex*)packed.get(),
                                              NULL,
                                              packed_strides[0],
                                              packed_strides[baseDim] / 2,
                                              FFTW_FORWARD,
                                              FFTW_ESTIMATE);

        fftwf_execute(plan);
        fftwf_destroy_plan(plan);
    }

    // Multiply filter and signal FFT arrays
    if (kind == ONE2MANY)
        complexMultiply<convT>(filter_tmp_ptr, filter_tmp_dims, filter_tmp_strides,
                               sig_tmp_ptr, sig_tmp_dims, sig_tmp_strides,
                               filter_tmp_ptr, filter_tmp_dims, filter_tmp_strides,
                               kind);
    else
        complexMultiply<convT>(sig_tmp_ptr, sig_tmp_dims, sig_tmp_strides,
                               sig_tmp_ptr, sig_tmp_dims, sig_tmp_strides,
                               filter_tmp_ptr, filter_tmp_dims, filter_tmp_strides,
                               kind);

    // Compute inverse FFT
    if (isDouble) {
        fftw_plan plan = fftw_plan_many_dft(baseDim,
                                            fft_dims,
                                            packed_dims[baseDim],
                                            (fftw_complex*)packed.get(),
                                            NULL,
                                            packed_strides[0],
                                            packed_strides[baseDim] / 2,
                                            (fftw_complex*)packed.get(),
                                            NULL,
                                            packed_strides[0],
                                            packed_strides[baseDim] / 2,
                                            FFTW_BACKWARD,
                                            FFTW_ESTIMATE);

        fftw_execute(plan);
        fftw_destroy_plan(plan);
    }
    else {
        fftwf_plan plan = fftwf_plan_many_dft(baseDim,
                                              fft_dims,
                                              packed_dims[baseDim],
                                              (fftwf_complex*)packed.get(),
                                              NULL,
                                              packed_strides[0],
                                              packed_strides[baseDim] / 2,
                                              (fftwf_complex*)packed.get(),
                                              NULL,
                                              packed_strides[0],
                                              packed_strides[baseDim] / 2,
                                              FFTW_BACKWARD,
                                              FFTW_ESTIMATE);

        fftwf_execute(plan);
        fftwf_destroy_plan(plan);
    }

    // Compute output dimensions
    dim4 oDims(1);
    if (expand) {
        for(dim_t d=0; d<4; ++d) {
            if (kind==ONE2ONE || kind==ONE2MANY) {
                oDims[d] = sd[d]+fd[d]-1;
            } else {
                oDims[d] = (d<baseDim ? sd[d]+fd[d]-1 : sd[d]);
            }
        }
    } else {
        oDims = sd;
        if (kind==ONE2MANY) {
            for (dim_t i=baseDim; i<4; ++i)
                oDims[i] = fd[i];
        }
    }

    Array<T> out = createEmptyArray<T>(oDims);
    T* out_ptr = out.get();
    const af::dim4 out_dims = out.dims();
    const af::dim4 out_strides = out.strides();

    const af::dim4 filter_dims = filter.dims();

    // Reorder the output
    if (kind == ONE2MANY) {
        reorderOutput<T, convT, roundOut>
            (out_ptr, out_dims, out_strides,
             filter_tmp_ptr, filter_tmp_dims, filter_tmp_strides,
             filter_dims, sig_half_d0, baseDim, fftScale, expand);
    }
    else {
        reorderOutput<T, convT, roundOut>
            (out_ptr, out_dims, out_strides,
             sig_tmp_ptr, sig_tmp_dims, sig_tmp_strides,
             filter_dims, sig_half_d0, baseDim, fftScale, expand);
    }

    return out;
}
Пример #20
0
void fft(int N, double *in, int stride) {
	fftw_plan p;
	p = fftw_plan_many_dft(1,&N, 1, (fftw_complex *)in, NULL, stride, 0, (fftw_complex *)in, NULL, stride, 0, FFTW_FORWARD, FFTW_ESTIMATE);
	fftw_execute(p);
	fftw_destroy_plan(p);	
}
Пример #21
0
void init_gfft() {
	// This will init the plans needed by gfft
	// Transform of NY/NPROC arrays of (logical) size [NX, NZ]
	// The physical size is [NX, NZ+2]
	// We use in-place transforms
	int i;
	double complex *wi1, *whi1;
	double *wir1, *whir1;
	
	const int n_size2D[2] = {NX, NZ};
	const int n_size1D[1] = {NY_COMPLEX};
	

	wi1 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (wi1 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for wi1 allocation");
	whi1 = (double complex *) fftw_malloc( sizeof(double complex) * NX*(NY/2+1));
	if (whi1 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for wi1 allocation");

	wir1 = (double *) wi1;
	whir1= (double *) whi1;
	
	for(i = 0 ; i < NTOTAL_COMPLEX; i++) {
		wi1[i]=1.0;
	}
	
#ifdef _OPENMP
	fftw_plan_with_nthreads( nthreads );
#endif
	r2c_2d = fftw_plan_many_dft_r2c(2, n_size2D, NY / NPROC, wir1, NULL, 1, (NZ+2)*NX,
															 wi1,  NULL, 1, (NZ+2)*NX/2, FFT_PLANNING);
	if (r2c_2d == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW R2C_2D plan creation failed");
														   
	c2r_2d = fftw_plan_many_dft_c2r(2, n_size2D, NY / NPROC, wi1,  NULL, 1, (NZ+2)*NX/2,
														    wir1, NULL, 1, (NZ+2)*NX  , FFT_PLANNING);
	if (c2r_2d == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW C2R_2D plan creation failed");
	
	r2cfft_2Dslice = fftw_plan_dft_r2c_2d(NX,NY,wrh3,wh3,FFT_PLANNING); //,whir1,whi1
	if (r2cfft_2Dslice == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW r2c slice plan creation failed");
															 
	// 1D transforms: This are actually c2c transforms, but are used for global 3D transforms.
	// We will transform forward and backward an array of logical size [NX/NPROC, NY, (NZ+2)/2] along the 2nd dimension
	// We will do NZ_COMPLEX transforms along Y. Will need a loop on NX/NPROC
	// We use &w1[NZ_COMPLEX] so that alignement check is done properly (see SIMD in fftw Documentation)
	
#ifdef _OPENMP	
	fftw_plan_with_nthreads( 1 );
#endif	
	r2c_1d = fftw_plan_many_dft(1, n_size1D, NZ_COMPLEX, &wi1[NZ_COMPLEX], NULL, NZ_COMPLEX, 1,
														 &wi1[NZ_COMPLEX], NULL, NZ_COMPLEX, 1, FFTW_FORWARD, FFT_PLANNING);
	if (r2c_1d == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW R2C_1D plan creation failed");
																			  
	c2r_1d = fftw_plan_many_dft(1, n_size1D, NZ_COMPLEX, &wi1[NZ_COMPLEX], NULL, NZ_COMPLEX, 1,
														 &wi1[NZ_COMPLEX], NULL, NZ_COMPLEX, 1, FFTW_BACKWARD, FFT_PLANNING);
	if (c2r_1d == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW C2R_1D plan creation failed");

	// init transpose routines
	init_transpose();
	// Let's see which method is faster (with our without threads)
		
	fftw_free(wi1); fftw_free(whi1);
	
	fft_timer=0.0;

	
	return;
}
Пример #22
0
//squares the symbol by autoconvolving the fourier coefficients
//size of input FQ: nx*nz*k, size of outpout FQ2:nx*nz*(2k-1)
void Qsquare(complex const *FQ,complex *FQ2,int nx,int nz,int k){

  complex *FQ1;
  FQ1 = (complex *) calloc((2*k-1)*nx*nz,sizeof(complex));

  int m,n,l;
  // printf(" k = %d  \n",k);
  for (l=0;l<k;l++)
    {//printf("l= %d\n",l);
      for (m=0;m<nx;m++)
	{
	  for(n=0;n<nz;n++)
	    {//printf("%d ",l+(k)*(m*nz+n));
	    FQ1[l+(2*k-1)*(m*nz+n)] = FQ[l+k*(m*nz+n)]; //printf("set \n");
	    }
	}
    }


  // printf("padded array set in Qsquare \n");

  int rank =1;
  int *dims;
  dims = (int*) malloc(rank*sizeof(int)); 
  dims[0]=(2*k-1); //dims[1]=nz; dims[2]=2*k-1;
  int howmany = nx*nz;
  //inembed=onembed=dims

  int istride = 1;
  int ostride = 1;
  int idist = (2*k-1);
  int odist = idist;
  // printf("variables set ...\n");
  /*
 clock_t start,end;
 double elapsed;
 start=clock();
  */
  fftw_plan plan_forward,plan_backward;
  plan_forward = fftw_plan_many_dft(rank,dims,howmany,FQ1,NULL,istride,idist,FQ1,NULL,ostride,odist,FFTW_FORWARD,FFTW_ESTIMATE);
  plan_backward = fftw_plan_many_dft(rank,dims,howmany,FQ1,NULL,istride,idist,FQ2,NULL,ostride,odist,FFTW_BACKWARD,FFTW_ESTIMATE);

  fftw_execute(plan_forward);
  //printf("plan executed \n");

 for (l=0;l<2*k-1;l++)
    {//printf("l= %d\n",l);
      for (m=0;m<nx;m++)
	{
	  for(n=0;n<nz;n++)
	    {//printf("%d ",l+(k)*(m*nz+n));
	    FQ1[l+(2*k-1)*(m*nz+n)] = FQ1[l+(2*k-1)*(m*nz+n)]*FQ1[l+(2*k-1)*(m*nz+n)]/(2*k-1); //printf("set \n");
	    }
	}
    }
 /*
 end=clock();
 elapsed = ((double)(end-start))/CLOCKS_PER_SEC;
 printf(" time taken to set up plans in Qsquare: %lf \n",elapsed);
 */
 fftw_execute(plan_backward);

 //cleaning
 fftw_destroy_plan(plan_forward);
 fftw_destroy_plan(plan_backward);
 free(FQ1);


}
Пример #23
0
void dgt_fac(ltfat_complex *f, ltfat_complex *gf, const int L, const int W,
	     const int R, const int a, const int M, ltfat_complex *cout, int dotime)
{

   /*  --------- initial declarations -------------- */

   int b, N, c, d, p, q, h_a, h_m;
   
   double *gbase, *fbase, *cbase;

   int l, k, r, s, u, w, rw, nm, mm, km;
   int ld2a, ld1b, ld3b;
   int ld4c, ld5c;
   int rem;

   fftw_plan p_before, p_after, p_veryend;
   double *ff, *cf, *ffp, *sbuf, *fp, *cfp;

   double scalconst;
   
   double st0, st1, st6, st7;

   /*  ----------- calculation of parameters and plans -------- */
   
   if (dotime)
   {
      st0=ltfat_time();
   }

   b=L/M;
   N=L/a;
   
   c=gcd(a, M,&h_a, &h_m);
   p=a/c;
   q=M/c;
   d=b/p;

   h_a=-h_a;

   /* Scaling constant needed because of FFTWs normalization. */
   scalconst=1.0/((double)d*sqrt((double)M));

   ff = (double*)ltfat_malloc(2*d*p*q*W*sizeof(double));
   cf = (double*)ltfat_malloc(2*d*q*q*W*R*sizeof(double));
   sbuf = (double*)ltfat_malloc(2*d*sizeof(double));

   /* Create plans. In-place. */

   p_before = fftw_plan_dft_1d(d, (ltfat_complex*)sbuf, (ltfat_complex*)sbuf,
			       FFTW_FORWARD, FFTW_MEASURE);

   p_after  = fftw_plan_dft_1d(d, (ltfat_complex*)sbuf, (ltfat_complex*)sbuf,
			       FFTW_BACKWARD, FFTW_MEASURE);
   
  /* Create plan. In-place. */
   p_veryend = fftw_plan_many_dft(1, &M, N*R*W,
				  cout, NULL,
				  1, M,
				  cout, NULL,
				  1, M,
				  FFTW_FORWARD, FFTW_OPTITYPE);
      

   if (dotime)
   {
      st1=ltfat_time();
      printf("DGT_FAC_7: Planning phase %f\n",st1-st0);
   }


   /* Leading dimensions of the 4dim array. */
   ld2a=2*p*q*W;

   /* Leading dimensions of cf */
   ld1b=q*R;
   ld3b=2*q*R*q*W;
   
   /* --------- main loop begins here ------------------- */
   for (r=0;r<c;r++)
   {
      
      
      /*  ---------- compute signal factorization ----------- */
      ffp=ff;
      fp=(double*)f+2*r;
      if (p==1)

	 /* Integer oversampling case */
      {
	 
	 for (w=0;w<W;w++)
	 {
	    for (l=0;l<q;l++)
	    {
	       for (s=0;s<d;s++)
	       {		  
		  rem = 2*((s*M+l*a)%L);
		  sbuf[2*s]   = fp[rem];
		  sbuf[2*s+1] = fp[rem+1];
	       }
	       
	       fftw_execute(p_before);
	       
	       for (s=0;s<d;s++)
	       {		  
		 ffp[s*ld2a]   = sbuf[2*s]*scalconst;
		 ffp[s*ld2a+1] = sbuf[2*s+1]*scalconst;
	       }
	       ffp+=2;
	    }
	    fp+=2*L;
	 }
	 fp-=2*L*W;
	 
      }
      else
      {      
	 /* rational sampling case */

	 for (w=0;w<W;w++)
	 {
	    for (l=0;l<q;l++)
	    {
	       for (k=0;k<p;k++)
	       {
		  for (s=0;s<d;s++)
		  {		  
		     rem = 2*positiverem(k*M+s*p*M-l*h_a*a, L);
		     sbuf[2*s]   = fp[rem];
		     sbuf[2*s+1] = fp[rem+1];
		  }
		  
		  fftw_execute(p_before);
		  
		  for (s=0;s<d;s++)
		  {		  
		     ffp[s*ld2a]   = sbuf[2*s]*scalconst;
		     ffp[s*ld2a+1] = sbuf[2*s+1]*scalconst;
		  }
		  ffp+=2;
	       }
	    }
	    fp+=2*L;
	 }
	 fp-=2*L*W;
      }

      /* ----------- compute matrix multiplication ----------- */

      /* Do the matmul  */
      if (p==1)
      {
	 /* Integer oversampling case */
	 

	 /* Rational oversampling case */
	 for (s=0;s<d;s++)
	 {	
	    gbase=(double*)gf+2*(r+s*c)*q*R;
	    fbase=ff+2*s*q*W;
	    cbase=cf+2*s*q*q*W*R;
	    
	    for (nm=0;nm<q*W;nm++)
	    {
	       for (mm=0;mm<q*R;mm++)
	       {
		  cbase[0]=gbase[0]*fbase[0]+gbase[1]*fbase[1];
		  cbase[1]=gbase[0]*fbase[1]-gbase[1]*fbase[0];
		  gbase+=2;
		  cbase+=2;
	       }			       
	       gbase-=2*q*R;
	       fbase+=2;
	    }
	    cbase-=2*q*R*q*W;
	 }




      }
      else
      {

	 /* Rational oversampling case */
	 for (s=0;s<d;s++)
	 {	
	    gbase=(double*)gf+2*(r+s*c)*p*q*R;
	    fbase=ff+2*s*p*q*W;
	    cbase=cf+2*s*q*q*W*R;
	    
	    for (nm=0;nm<q*W;nm++)
	    {
	       for (mm=0;mm<q*R;mm++)
	       {
		  cbase[0]=0.0;
		  cbase[1]=0.0;
		  for (km=0;km<p;km++)
		  {
		     cbase[0]+=gbase[0]*fbase[0]+gbase[1]*fbase[1];
		     cbase[1]+=gbase[0]*fbase[1]-gbase[1]*fbase[0];
		     gbase+=2;
		     fbase+=2;
		  }
		  fbase-=2*p;
		  cbase+=2;
	       }			       
	       gbase-=2*q*R*p;
	       fbase+=2*p;
	    }
	    cbase-=2*q*R*q*W;
	    fbase-=2*p*q*W;
	 }
      }



      /*  -------  compute inverse coefficient factorization ------- */
      cfp=cf;
      ld4c=M*N;
      ld5c=M*N*R;

      /* Cover both integer and rational sampling case */
      for (w=0;w<W;w++)
      {
	 /* Complete inverse fac of coefficients */
	 for (l=0;l<q;l++)
	 {
	    for (rw=0;rw<R;rw++)
	    {
	       for (u=0;u<q;u++)
	       {	       	       
		  for (s=0;s<d;s++)	       
		  {	
		     sbuf[2*s]   = cfp[s*ld3b];
		     sbuf[2*s+1] = cfp[s*ld3b+1];
		  }
		  cfp+=2;
		  
		  /* Do inverse fft of length d */
		  fftw_execute(p_after);
		  
		  for (s=0;s<d;s++)	       
		  {	
		     rem= r+l*c+positiverem(u+s*q-l*h_a,N)*M+rw*ld4c+w*ld5c;
		     cout[rem][0]=sbuf[2*s];
		     cout[rem][1]=sbuf[2*s+1];
		  }		    
	       }
	    }
	 }
      }            

      
      /* ----------- Main loop ends here ------------------------ */
   }     

   if (dotime)
   {
      st6=ltfat_time();
      printf("DGT_FAC_7: Main loop done %f\n",st6-st1);
   }

   /* FFT to modulate the coefficients. */
   fftw_execute(p_veryend);   

   if (dotime)
   {
      st7=ltfat_time();
      printf("DGT_FAC_7: Final FFT %f\n",st7-st6);
      printf("DGT_FAC_7: Total time %f\n",st7-st0);
   }

    /* -----------  Clean up ----------------- */   
   fftw_destroy_plan(p_before);
   fftw_destroy_plan(p_after);
   fftw_destroy_plan(p_veryend);

   ltfat_free(sbuf);
   ltfat_free(ff);
   ltfat_free(cf);
   
}
Пример #24
0
// This is an operator that applies (A - shift*B) on a vector using the FFT
void SPB::BandSolver_Ez::OpForw(size_t n, const complex_t &shift, const complex_t *x, complex_t *y) const{
	const int Ngrid = res[0]*res[1];
	complex_t *t = (complex_t*)fftw_malloc(sizeof(complex_t)*n);
	// Data layout: Hx, Hy, Ez, divH
	fftw_plan plan_forward = fftw_plan_many_dft(
		2/*rank*/, res, 4 /*howmany*/,
		(fftw_complex*)t, NULL/*inembed*/,
		1/*istride*/, Ngrid/*idist*/,
		(fftw_complex*)t, NULL/*onembed*/,
		1/*ostride*/, Ngrid/*odist*/,
		FFTW_BACKWARD, FFTW_ESTIMATE);
	fftw_plan plan_backward = fftw_plan_many_dft(
		2/*rank*/, res, 4 /*howmany*/,
		(fftw_complex*)t, NULL/*inembed*/,
		1/*istride*/, Ngrid/*idist*/,
		(fftw_complex*)t, NULL/*onembed*/,
		1/*ostride*/, Ngrid/*odist*/,
		FFTW_FORWARD, FFTW_ESTIMATE);
	
	const double kshiftsign = 1.0;
	
	RNP::TBLAS::Copy(n, x,1, t,1);
	for(int i = 0; i < res[0]; ++i){
		for(int j = 0; j < res[1]; ++j){
			double phase = kshiftsign*2*M_PI*(last_k[0]*((double)i)/res[0] + last_k[1]*((double)j)/res[1]);
			t[HX_OFF + IDX(i,j)] *= complex_t(cos(phase), sin(phase));
			t[HY_OFF + IDX(i,j)] *= complex_t(cos(phase), sin(phase));
			t[EZ_OFF + IDX(i,j)] *= complex_t(cos(phase), sin(phase));
			t[DIVH_OFF+IDX(i,j)] *= complex_t(cos(phase), sin(phase));
		}
	}
	fftw_execute(plan_forward);

	for(int i = 0; i < res[0]; ++i){
		const int fi = (i > res[0]/2 ? i-res[0] : i);
		for(int j = 0; j < res[1]; ++j){
			const int fj = (j > res[1]/2 ? j-res[1] : j);
			double kpG[2] = {
				(L.Lk[0]*(last_k[0]+fi) + L.Lk[2]*(last_k[1]+fj)),
				(L.Lk[1]*(last_k[0]+fi) + L.Lk[3]*(last_k[1]+fj))
			};
			kpG[0] *= 2*M_PI;
			kpG[1] *= 2*M_PI;
			const double klen2 = kpG[0]*kpG[0] + kpG[1]*kpG[1];
			const double klen = sqrt(klen2);
			if(klen < std::numeric_limits<double>::epsilon() * L.CharacteristicKLength()){ continue; }
			
			// [ -q mu     k x     k       0         0     ] [ H ]   [ H ]
			// [  -k x   -q eps    0       0       -i wp   ] [ E ] = [ E ]
			// [   k.       0      0       0         0     ] [dvH] = [dvH]
			// [   0        0      0    -q eta      i w0   ] [ P ]   [ P ]
			// [   0      i wp     0   -i w0 eta   -q eta  ] [ V ]   [ V ]
			//                                               given
/*
// Forward and backward differences
#define FDIFF(VEC,D) ((std::exp(complex_t(0,-(VEC)[D]/res[D]))-1.) * (double)res[D])
#define BDIFF(VEC,D) ((1.-std::exp(complex_t(0,(VEC)[D]/res[D]))) * (double)res[D])
			static const complex_t I(0.,1.);
			y[HX_OFF + IDX(i,j)] = -I*FDIFF(kpG,1)*t[EZ_OFF + IDX(i,j)] + I*BDIFF(kpG,0)*t[DIVH_OFF+IDX(i,j)];
			y[HX_OFF + IDX(i,j)] /= ((double)Ngrid);
			y[HY_OFF + IDX(i,j)] =  I*FDIFF(kpG,0)*t[EZ_OFF + IDX(i,j)] + I*BDIFF(kpG,1)*t[DIVH_OFF+IDX(i,j)];
			y[HY_OFF + IDX(i,j)] /= ((double)Ngrid);
			y[EZ_OFF + IDX(i,j)] = -I*BDIFF(kpG,1)*t[HX_OFF + IDX(i,j)] + I*BDIFF(kpG,0)*t[HY_OFF + IDX(i,j)];
			y[EZ_OFF + IDX(i,j)] /= ((double)Ngrid);
			y[DIVH_OFF+IDX(i,j)] =  I*FDIFF(kpG,0)*t[HX_OFF + IDX(i,j)] + I*FDIFF(kpG,1)*t[HY_OFF + IDX(i,j)];
			y[DIVH_OFF+IDX(i,j)] /= ((double)Ngrid);
			
			y[HX_OFF + IDX(i,j)] -= shift*t[HX_OFF + IDX(i,j)]/((double)Ngrid);
			y[HY_OFF + IDX(i,j)] -= shift*t[HY_OFF + IDX(i,j)]/((double)Ngrid);
			y[EZ_OFF + IDX(i,j)] -= shift*t[EZ_OFF + IDX(i,j)]/((double)Ngrid);
*/
			static const complex_t I(0.,1.);
			const size_t n_res = 0;
			const size_t nh = 4+2*n_res;
			complex_t *A = new complex_t[nh*nh+2*nh];
			complex_t *b = A+nh*nh;
			complex_t *c = b+nh;
			memset(A, 0, sizeof(complex_t)*nh*nh);
			
			A[0+2*nh] = -I*FDIFF(kpG,1);
			A[0+3*nh] = I*BDIFF(kpG,0);
			A[1+2*nh] = I*FDIFF(kpG,0);
			A[1+3*nh] = I*BDIFF(kpG,1);
			A[2+0*nh] = -I*BDIFF(kpG,1);
			A[2+1*nh] =  I*BDIFF(kpG,0);
			A[3+0*nh] = I*FDIFF(kpG,0);
			A[3+1*nh] = I*FDIFF(kpG,1);
			
			A[0+0*nh] = -shift;
			A[1+1*nh] = -shift;
			A[2+2*nh] = -shift;
			
			b[0] = t[HX_OFF + IDX(i,j)];
			b[1] = t[HY_OFF + IDX(i,j)];
			b[2] = t[EZ_OFF + IDX(i,j)];
			b[3] = t[DIVH_OFF+IDX(i,j)];
			
			RNP::TBLAS::MultMV<'N'>(nh,nh, 1.,A,nh, b,1, 0.,c,1);
			
			y[HX_OFF + IDX(i,j)] = c[0] / ((double)Ngrid);
			y[HY_OFF + IDX(i,j)] = c[1] / ((double)Ngrid);
			y[EZ_OFF + IDX(i,j)] = c[2] / ((double)Ngrid);
			y[DIVH_OFF+IDX(i,j)] = c[3] / ((double)Ngrid);
			
			delete [] A;
		}
	}
	RNP::TBLAS::Copy(n, y,1, t,1);
	fftw_execute(plan_backward);
	fftw_destroy_plan(plan_forward);
	fftw_destroy_plan(plan_backward);
	RNP::TBLAS::Copy(n, t,1, y,1);
	
	for(int i = 0; i < res[0]; ++i){
		for(int j = 0; j < res[1]; ++j){
			double phase = -kshiftsign*2*M_PI*(last_k[0]*((double)i)/res[0] + last_k[1]*((double)j)/res[1]);
			y[HX_OFF + IDX(i,j)] *= complex_t(cos(phase), sin(phase));
			y[HY_OFF + IDX(i,j)] *= complex_t(cos(phase), sin(phase));
			y[EZ_OFF + IDX(i,j)] *= complex_t(cos(phase), sin(phase));
			y[DIVH_OFF+IDX(i,j)] *= complex_t(cos(phase), sin(phase));
			/*
			y[HX_OFF + IDX(i,j)] -= shift*x[HX_OFF + IDX(i,j)];
			y[HY_OFF + IDX(i,j)] -= shift*x[HY_OFF + IDX(i,j)];
			y[EZ_OFF + IDX(i,j)] -= shift*x[EZ_OFF + IDX(i,j)];*/
		}
	}
	fftw_free(t);
}
Пример #25
0
int SPB::BandSolver_Ez::SolveK(const double *k){
	SPB_VERB(1, "Solving k-point (%.14g, %.14g)\n", k[0], k[1]);
	ClearSolution();
	
	last_k[0] = k[0];
	last_k[1] = k[1];

	// Prepare the indexing
	const size_t Ngrid = res[0] * res[1];
	
	if(impl->structure_changed_since_last_solve){
		free(impl->ind);
		impl->ind = (int*)malloc(sizeof(int) * 2*Ngrid);

		fftw_free(impl->eps_z_fft);
		impl->eps_z_fft = (complex_t*)fftw_malloc(sizeof(complex_t)*Ngrid);
		
		size_t next_index = 0;
		for(int i = 0; i < res[0]; ++i){
			const double fi = ((double)i/(double)res[0]) - 0.5;
			for(int j = 0; j < res[1]; ++j){
				const double fj = ((double)j/(double)res[1]) - 0.5;
				impl->ind[2*IDX(i,j)+0] = 4*Ngrid+next_index;
				
				// get material of this cell (simple pointwise check)
				int tag, num_poles;
				//if(2 == dim){
					double p[2] = {
						L.Lr[0]*fi + L.Lr[2]*fj,
						L.Lr[1]*fi + L.Lr[3]*fj
					};
					if(!shapeset.QueryPt(p, &tag)){
						tag = -1;
					}
				/*}else{
					double p[3] = {
						L.Lr[0]*fi + L.Lr[3]*fj + L.Lr[6]*fk,
						L.Lr[1]*fi + L.Lr[4]*fj + L.Lr[7]*fk,
						L.Lr[2]*fi + L.Lr[5]*fj + L.Lr[8]*fk
					};
					if(ShapeSet3_query_pt(shapeset.d3, p, NULL, &tag)){
					}else{
						tag = -1;
					}
				}*/
				if(-1 == tag){
					num_poles = 0;
					impl->eps_z_fft[IDX(i,j)] = 1.;
				}else{
					num_poles = material[tag].poles.size();
					impl->eps_z_fft[IDX(i,j)] = material[tag].eps_inf.value[8];
std::cout << i << "\t" << j << "\t" << impl->eps_z_fft[IDX(i,j)] << "\t" << num_poles << std::endl;
				}
				impl->ind[2*IDX(i,j)+1] = tag;
				// update next index
				next_index += 2*num_poles;
			}
		}
		//impl->N = 4*Ngrid + 3*zero_constraint + next_index;
		impl->N = 4*Ngrid + next_index;


		/*
		switch(pol){
		case 1:
			// Hx,Hy,Ez, divH
			N = (3+1)*Ngrid + 3*zero_constraint + next_index;
			break;
		case 2:
			// Hz,Ex,Ey (Hz is already div-free)
			N = (3+0)*Ngrid + 3*zero_constraint + next_index;
			break;
		default:
			// Hx,Hy,Hz,Ex,Ey,Ez, divH
			N = (6+1)*Ngrid + 6*zero_constraint + next_index;
			break;
		}*/
		
		fftw_plan plan_eps = fftw_plan_many_dft(
			2/*rank*/, res, 1 /*howmany*/,
			(fftw_complex*)impl->eps_z_fft, NULL/*inembed*/,
			1/*istride*/, Ngrid/*idist*/,
			(fftw_complex*)impl->eps_z_fft, NULL/*onembed*/,
			1/*ostride*/, Ngrid/*odist*/,
			FFTW_BACKWARD, FFTW_ESTIMATE);
		fftw_execute(plan_eps);
		fftw_destroy_plan(plan_eps);
		impl->structure_changed_since_last_solve = false;
	}
	
	sparse_t::entry_map_t Amap;
	sparse_t::entry_map_t Bmap;
	
	{
		const double Lrl[2] = {
			hypot(L.Lr[0], L.Lr[1]),
			hypot(L.Lr[2], L.Lr[3])
		};
		const double idr[2] = {
			(double)res[0] / Lrl[0],
			(double)res[1] / Lrl[1]
		};
				
		const complex_t Bloch[2] = {
			complex_t(cos(k[0]*2*M_PI), sin(k[0]*2*M_PI)),
			complex_t(cos(k[1]*2*M_PI), sin(k[1]*2*M_PI))
		};
		for(int i = 0; i < res[0]; ++i){
			for(int j = 0; j < res[1]; ++j){
				size_t row, col;
				complex_t coeff;
				
				const int curmat = impl->ind[2*IDX(i,j)+1];
				complex_t eps_z(1.);
				if(curmat >= 0){
					eps_z = material[curmat].eps_inf.value[8];
				}
				
#define ASET(ROW,COL,COEFF) Amap[sparse_t::index_t((ROW),(COL))] = (COEFF)
#define BSET(ROW,COL,COEFF) Bmap[sparse_t::index_t((ROW),(COL))] = (COEFF)
				// divH ~ dx Hx + dy Hy + dz Hz
				// E ~ -i wp V
				// V ~ +i wp E - i G V - i w0 P
				// P ~ +i w0 V
			
				//for(size_t idbg=0;idbg<ne+nh+1;++idbg){
					//ASET(row0+idbg,row0+idbg,1); // for debugging
				//}
			
				// Hx ~ -i dy Ez
				// Hy ~ +i dx Ez
				// Ez ~ -i dy Hx + i dx Hy

				// Hx = complex_t(0,-idr[1]) * (Ez[i,j+1,k] - Ez[i,j,k])
				row = HX_OFF + IDX(i,j);
				coeff = complex_t(0,-idr[1]);
				col = EZ_OFF + IDX(i,j); // Ez
				ASET(row,col, -coeff);
				if(j+1 == res[1]){
					col = EZ_OFF + IDX(i,0); // Ez
					ASET(row,col, coeff/Bloch[1]);
				}else{
					col = EZ_OFF + IDX(i,j+1); // Ez
					ASET(row,col, coeff);
				}
				BSET(row,row, 1);
				
				// Hy = complex_t(0, idr[0]) * (Ez[i+1,j,k] - Ez[i,j,k])
				row = HY_OFF + IDX(i,j);
				coeff = complex_t(0, idr[0]);
				col = EZ_OFF + IDX(i,j); // Ez
				ASET(row,col, -coeff);
				if(i+1 == res[0]){
					col = EZ_OFF + IDX(0,j); // Ez
					ASET(row,col, coeff/Bloch[0]);
				}else{
					col = EZ_OFF + IDX(i+1,j); // Ez
					ASET(row,col, coeff);
				}
				BSET(row,row, 1);
				
				// divH = idr[0] * (Hx[i+1,j,k] - Hx[i,j,k])
				//      + idr[1] * (Hy[i,j+1,k] - Hx[i,j,k])
				row = DIVH_OFF + IDX(i,j);
				coeff = complex_t(0,idr[0]);
				col = HX_OFF + IDX(i,j); // Hx
				ASET(row,col, -coeff);
				ASET(col,row, -std::conj(coeff));
				if(i+1 == res[0]){
					col = HX_OFF + IDX(0,j); // Hx
					ASET(row,col, coeff/Bloch[0]);
					ASET(col,row, std::conj(coeff/Bloch[0]));
				}else{
					col = HX_OFF + IDX(i+1,j); // Hx
					ASET(row,col, coeff);
					ASET(col,row, std::conj(coeff));
				}
				
				coeff = complex_t(0,idr[1]);
				col = HY_OFF + IDX(i,j); // Hy
				ASET(row,col, -coeff);
				ASET(col,row, -std::conj(coeff));
				if(j+1 == res[1]){
					col = HY_OFF + IDX(i,0); // Hy
					ASET(row,col, coeff/Bloch[1]);
					ASET(col,row, std::conj(coeff/Bloch[1]));
				}else{
					col = HY_OFF + IDX(i,j+1); // Hy
					ASET(row,col, coeff);
					ASET(col,row, std::conj(coeff));
				}
				BSET(row,row, 0);

				// Ez = complex_t(0,-idr[1]) * (Hx[i,j,k] - Hx[i,j-1,k])
				//    + complex_t(0, idr[0]) * (Hy[i,j,k] - Hy[i-1,j,k])
				row = EZ_OFF + IDX(i,j);
				
				coeff = complex_t(0,-idr[1]);
				col = HX_OFF + IDX(i,j); // Hx
				ASET(row,col, coeff);
				if(0 == j){
					col = HX_OFF + IDX(i,res[1]-1); // Hx
					ASET(row,col, -coeff*Bloch[1]);
				}else{
					col = HX_OFF + IDX(i,j-1); // Hx
					ASET(row,col, -coeff);
				}
				
				coeff = complex_t(0, idr[0]);
				col = HY_OFF + IDX(i,j); // Hy
				ASET(row,col, coeff);
				if(0 == i){
					col = HY_OFF + IDX(res[0]-1,j); // Hy
					ASET(row,col, -coeff*Bloch[0]);
				}else{
					col = HY_OFF + IDX(i-1,j); // Hy
					ASET(row,col, -coeff);
				}
				BSET(row,row, eps_z);
				
				if(curmat >= 0){
					const int row0 = impl->ind[2*IDX(i,j)+0];
					const Material &m = material[curmat];
					const size_t np = m.poles.size();
					for(size_t p = 0; p < np; ++p){
						row = row0 + 2*p + 0; // V_p
						coeff = complex_t(0, m.poles[p].omega_p) * eps_z;
						col = EZ_OFF + IDX(i,j); // E
						ASET(row,col, coeff);
						ASET(col,row, std::conj(coeff));
						
						if(0 != m.poles[p].Gamma){
							coeff = complex_t(0,-m.poles[p].Gamma) * eps_z;
							ASET(row,row, coeff);
						}
						BSET(row,row, 1);
						
						
						coeff = complex_t(0, -m.poles[p].omega_0) * eps_z;
						col = row0 + 2*p + 1; // P
						ASET(row,col, coeff);
						ASET(col,row, std::conj(coeff));
						BSET(col,col, 1);
					}
				}
				
				/*
				}else if(2 == pol){
					// Hz ~ +i dy Ex - i dx Ey
					// Ex ~ +i dy Hz
					// Ey ~ -i dx Hz
				}else{
					// Hx ~ +i dz Ey - i dy Ez
					// Hy ~ -i dz Ex + i dx Ez
					// Hz ~ +i dy Ex - i dx Ey
					// Ex ~ -i dz Hy + i dy Hz
					// Ey ~ +i dz Hx - i dx Hz
					// Ez ~ -i dy Hx + i dx Hy
				}*/
			}
		}
	}
	impl->A = new sparse_t(impl->N,impl->N, Amap);
	impl->B = new sparse_t(impl->N,impl->N, Bmap);
	
	if(0){
		std::cout << "A="; RNP::Sparse::PrintSparseMatrix(*(impl->A)) << ";" << std::endl;
		std::cout << "B="; RNP::Sparse::PrintSparseMatrix(*(impl->B)) << ";" << std::endl;
		exit(0);
	}
	/*
	complex_t *tmp = new complex_t[4*Ngrid];
	complex_t *tmp2 = new complex_t[16*Ngrid*Ngrid];
	for(size_t i = 0; i < res[0]; ++i){
		for(size_t j = 0; j < res[1]; ++j){
			tmp[IDX(i,j)] = 0;
		}
	}
	for(size_t i = 0; i < res[0]; ++i){
		for(size_t j = 0; j < res[1]; ++j){
			tmp[IDX(i,j)] = 1;
			Precond(tmp, &tmp2[0+IDX(i,j)*Ngrid]);
			tmp[IDX(i,j)] = 0;
		}
	}
	delete [] tmp2;
	delete [] tmp;
	*/
	return solver->Solve();
	/*
	{
		const size_t n = 4*Ngrid;
		complex_t *x = (complex_t*)fftw_malloc(sizeof(complex_t)*n);
		complex_t *y = (complex_t*)fftw_malloc(sizeof(complex_t)*n);
		complex_t *z = (complex_t*)fftw_malloc(sizeof(complex_t)*n);
		const double theta = 0.6;
		
		memset(x, 0, sizeof(complex_t)*n);
		for(int i = 0; i < n; ++i){
			x[i] = frand();
		}
		std::cout << "x = "; RNP::IO::PrintVector(n, x, 1) << std::endl;
		
		Aop(x, y);
		Bop(x, z);
		RNP::TBLAS::Axpy(n, -theta, z,1, y,1);
		// At this point y = A*x-theta*B*x
		std::cout << "y = "; RNP::IO::PrintVector(n, y, 1) << std::endl;
		
		Op(n, theta, y, z);
		// At this point z should be the same as x
		
		std::cout << "z = "; RNP::IO::PrintVector(n, z, 1) << std::endl;
		
		RNP::TBLAS::Axpy(n, -1., x,1,z,1);
		std::cout << "diff = "; RNP::IO::PrintVector(n, z, 1) << std::endl;
		
		fftw_free(z);
		fftw_free(y);
		fftw_free(x);
	}*/
	
	
	/*
	size_t n_wanted = 10;
	size_t ncv = 2*n_wanted+1;
	SPB::complex_t *w = new SPB::complex_t[n_wanted+ncv*4*Ngrid];
	SPB::complex_t *v = w+n_wanted;
	int nconv = RNP::IRA::ShiftInvert(
		4*Ngrid, 0.0, &op_, &bv_,
		n_wanted, ncv, &RNP::IRA::LargestMagnitude,
		w, v, 4*Ngrid,
		NULL,
		NULL,
		(void*)this,
		(void*)this);
	for(size_t i = 0; i < n_wanted;++i){
		std::cout << w[i] << std::endl;
	}
	*/
}
Пример #26
0
bool internal_fftw(Mda &X, integer dim, integer do_inverse_transform) {
	QTime time;
	time.start();
	int d1=(int)dim-1;
	qint64 N=X.size();
	if (!N) return false;

	if ((d1>=X.dimCount())||(d1<0)) {
		qWarning() << "Dimension out of range";
		return false;
	}

	fftw_complex *in;
	fftw_plan p;

	in = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * N);
	//out = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * N);

	int n[MAX_MDA_DIMS];
	qint32 j,k;
	n[0]=X.size(d1);
	int istride=1;
	int idist=X.size(d1);
	int howmany=N/idist;

	if (do_inverse_transform) {
		p = fftw_plan_many_dft(1, n, howmany,
							in, NULL,istride, idist,
							in, NULL,istride, idist,
							FFTW_BACKWARD, FFTW_ESTIMATE);
	}
	else {
		p = fftw_plan_many_dft(1, n, howmany,
							in, NULL,istride, idist,
							in, NULL,istride, idist,
							FFTW_FORWARD, FFTW_ESTIMATE);
	}
	
	long factor1=1;
	long factor2=1;
	for (int dimind=0; dimind<d1; dimind++) {
		factor1*=X.size(dimind);
	}
	for (int dimind=d1; dimind<X.dimCount(); dimind++) {
		factor2*=X.size(dimind);
	}

	qint32 ind[MAX_MDA_DIMS];
	bool done;

	if (X.data_real) {
		real *D=X.data_real;
		long ct=0;
		for (long j1=0; j1<factor1; j1++) {
		for (long j2=0; j2<factor2; j2++) {
			in[ct][0]=D[j1+factor1*j2];
			in[ct][1]=0;
			ct++;
		}}
	}
	else if (X.data_complex) {
		complex_struct *D=X.data_complex;
		long ct=0;
		for (long j1=0; j1<factor1; j1++) {
		for (long j2=0; j2<factor2; j2++) {
			in[ct][0]=D[j1+factor1*j2].re;
			in[ct][1]=D[j1+factor1*j2].im;
			ct++;
		}}
	}
	else {
		long ct=0;
		for (long j1=0; j1<factor1; j1++) {
		for (long j2=0; j2<factor2; j2++) {
			in[ct][0]=X[j1+factor1*j2].re();
			in[ct][1]=X[j1+factor1*j2].im();
			ct++;
		}}
	}

	//printf("fftw_execute...");
	fftw_execute(p);
	//printf("fftw_execute finished.");
	

	if (X.dataType()!=MDA_TYPE_COMPLEX) {
		//printf("Converting to complex...");
		X.convertToComplex();
		//printf("Done.");
	}

	
	if (X.data_complex) {
		complex_struct *D=X.data_complex;
		long ct=0;
		for (long j2=0; j2<factor2; j2++) {
		for (long j1=0; j1<factor1; j1++) {
			D[ct].re=in[j2+factor2*j1][0];
			D[ct].im=in[j2+factor2*j1][1];
			ct++;
		}}
	}
	else {
		long ct=0;
		for (long j2=0; j2<factor2; j2++) {
		for (long j1=0; j1<factor1; j1++) {
			X[ct]=Complex(in[j2+factor2*j1][0],in[j2+factor2*j1][1]);
			ct++;
		}}
	}
	

	fftw_destroy_plan(p);
	fftw_free(in); 

	if (do_inverse_transform) {
		real factor=1.0F/X.size(d1);
		if (X.data_complex) {
			complex_struct *D=X.data_complex;
			for (j=0; j<X.size(); j++) {
				D[j].re*=factor;
				D[j].im*=factor;
			}
		}
		else {
			for (qint64 j=0; j<X.size(); j++)
				X[j]=X[j]*factor;
		}
	}
	
	return true;
}
Пример #27
0
struct fft_plan_3d *fft_3d_create_plan(
       MPI_Comm comm, int nfast, int nmid, int nslow,
       int in_ilo, int in_ihi, int in_jlo, int in_jhi,
       int in_klo, int in_khi,
       int out_ilo, int out_ihi, int out_jlo, int out_jhi,
       int out_klo, int out_khi,
       int scaled, int permute, int *nbuf)

{
  struct fft_plan_3d *plan;
  int me,nprocs;
  int i,num,flag,remapflag,fftflag;
  int first_ilo,first_ihi,first_jlo,first_jhi,first_klo,first_khi;
  int second_ilo,second_ihi,second_jlo,second_jhi,second_klo,second_khi;
  int third_ilo,third_ihi,third_jlo,third_jhi,third_klo,third_khi;
  int out_size,first_size,second_size,third_size,copy_size,scratch_size;
  int np1,np2,ip1,ip2;
  int list[50];

/* query MPI info */

  MPI_Comm_rank(comm,&me);
  MPI_Comm_size(comm,&nprocs);

/* compute division of procs in 2 dimensions not on-processor */

  bifactor(nprocs,&np1,&np2);
  ip1 = me % np1;
  ip2 = me/np1;

/* allocate memory for plan data struct */

  plan = (struct fft_plan_3d *) malloc(sizeof(struct fft_plan_3d));
  if (plan == NULL) return NULL;

/* remap from initial distribution to layout needed for 1st set of 1d FFTs
   not needed if all procs own entire fast axis initially
   first indices = distribution after 1st set of FFTs */

  if (in_ilo == 0 && in_ihi == nfast-1)
    flag = 0;
  else
    flag = 1;

  MPI_Allreduce(&flag,&remapflag,1,MPI_INT,MPI_MAX,comm);

  if (remapflag == 0) {
    first_ilo = in_ilo;
    first_ihi = in_ihi;
    first_jlo = in_jlo;
    first_jhi = in_jhi;
    first_klo = in_klo;
    first_khi = in_khi;
    plan->pre_plan = NULL;
  }
  else {
    first_ilo = 0;
    first_ihi = nfast - 1;
    first_jlo = ip1*nmid/np1;
    first_jhi = (ip1+1)*nmid/np1 - 1;
    first_klo = ip2*nslow/np2;
    first_khi = (ip2+1)*nslow/np2 - 1;
    plan->pre_plan =
      remap_3d_create_plan(comm,in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi,
			   first_ilo,first_ihi,first_jlo,first_jhi,
			   first_klo,first_khi,
			   FFT_PRECISION,0,0,2);
    if (plan->pre_plan == NULL) return NULL;
  }

/* 1d FFTs along fast axis */

  plan->length1 = nfast;
  plan->total1 = nfast * (first_jhi-first_jlo+1) * (first_khi-first_klo+1);

/* remap from 1st to 2nd FFT
   choose which axis is split over np1 vs np2 to minimize communication
   second indices = distribution after 2nd set of FFTs */

  second_ilo = ip1*nfast/np1;
  second_ihi = (ip1+1)*nfast/np1 - 1;
  second_jlo = 0;
  second_jhi = nmid - 1;
  second_klo = ip2*nslow/np2;
  second_khi = (ip2+1)*nslow/np2 - 1;
  plan->mid1_plan =
      remap_3d_create_plan(comm,
			   first_ilo,first_ihi,first_jlo,first_jhi,
			   first_klo,first_khi,
			   second_ilo,second_ihi,second_jlo,second_jhi,
			   second_klo,second_khi,
			   FFT_PRECISION,1,0,2);
  if (plan->mid1_plan == NULL) return NULL;

/* 1d FFTs along mid axis */

  plan->length2 = nmid;
  plan->total2 = (second_ihi-second_ilo+1) * nmid * (second_khi-second_klo+1);

/* remap from 2nd to 3rd FFT
   if final distribution is permute=2 with all procs owning entire slow axis
     then this remapping goes directly to final distribution
   third indices = distribution after 3rd set of FFTs */

  if (permute == 2 && out_klo == 0 && out_khi == nslow-1)
    flag = 0;
  else
    flag = 1;

  MPI_Allreduce(&flag,&remapflag,1,MPI_INT,MPI_MAX,comm);

  if (remapflag == 0) {
    third_ilo = out_ilo;
    third_ihi = out_ihi;
    third_jlo = out_jlo;
    third_jhi = out_jhi;
    third_klo = out_klo;
    third_khi = out_khi;
  }
  else {
    third_ilo = ip1*nfast/np1;
    third_ihi = (ip1+1)*nfast/np1 - 1;
    third_jlo = ip2*nmid/np2;
    third_jhi = (ip2+1)*nmid/np2 - 1;
    third_klo = 0;
    third_khi = nslow - 1;
  }
  
  plan->mid2_plan =
    remap_3d_create_plan(comm,
			 second_jlo,second_jhi,second_klo,second_khi,
			 second_ilo,second_ihi,
			 third_jlo,third_jhi,third_klo,third_khi,
			 third_ilo,third_ihi,
			 FFT_PRECISION,1,0,2);
  if (plan->mid2_plan == NULL) return NULL;

/* 1d FFTs along slow axis */

  plan->length3 = nslow;
  plan->total3 = (third_ihi-third_ilo+1) * (third_jhi-third_jlo+1) * nslow;

/* remap from 3rd FFT to final distribution
   not needed if permute = 2 and third indices = out indices on all procs */

  if (permute == 2 &&
      out_ilo == third_ilo && out_ihi == third_ihi &&
      out_jlo == third_jlo && out_jhi == third_jhi &&
      out_klo == third_klo && out_khi == third_khi)
    flag = 0;
  else
    flag = 1;

  MPI_Allreduce(&flag,&remapflag,1,MPI_INT,MPI_MAX,comm);

  if (remapflag == 0)
    plan->post_plan = NULL;
  else {
    plan->post_plan =
      remap_3d_create_plan(comm,
			   third_klo,third_khi,third_ilo,third_ihi,
			   third_jlo,third_jhi,
			   out_klo,out_khi,out_ilo,out_ihi,
			   out_jlo,out_jhi,
			   FFT_PRECISION,(permute+1)%3,0,2);
    if (plan->post_plan == NULL) return NULL;
  }

/* configure plan memory pointers and allocate work space
   out_size = amount of memory given to FFT by user
   first/second/third_size = amount of memory needed after pre,mid1,mid2 remaps
   copy_size = amount needed internally for extra copy of data
   scratch_size = amount needed internally for remap scratch space
   for each remap:
     use out space for result if big enough, else require copy buffer
     accumulate largest required remap scratch space */

  out_size = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * (out_khi-out_klo+1);
  first_size = (first_ihi-first_ilo+1) * (first_jhi-first_jlo+1) * 
    (first_khi-first_klo+1);
  second_size = (second_ihi-second_ilo+1) * (second_jhi-second_jlo+1) * 
    (second_khi-second_klo+1);
  third_size = (third_ihi-third_ilo+1) * (third_jhi-third_jlo+1) * 
    (third_khi-third_klo+1);

  copy_size = 0;
  scratch_size = 0;

  if (plan->pre_plan) {
    if (first_size <= out_size)
      plan->pre_target = 0;
    else {
      plan->pre_target = 1;
      copy_size = MAX(copy_size,first_size);
    }
    scratch_size = MAX(scratch_size,first_size);
  }

  if (plan->mid1_plan) {
    if (second_size <= out_size)
      plan->mid1_target = 0;
    else {
      plan->mid1_target = 1;
      copy_size = MAX(copy_size,second_size);
    }
    scratch_size = MAX(scratch_size,second_size);
  }

  if (plan->mid2_plan) {
    if (third_size <= out_size)
      plan->mid2_target = 0;
    else {
      plan->mid2_target = 1;
      copy_size = MAX(copy_size,third_size);
    }
    scratch_size = MAX(scratch_size,third_size);
  }

  if (plan->post_plan)
    scratch_size = MAX(scratch_size,out_size);

  *nbuf = copy_size + scratch_size;

  if (copy_size) {
    plan->copy = (FFT_DATA *) malloc(copy_size*sizeof(FFT_DATA));
    if (plan->copy == NULL) return NULL;
  }
  else plan->copy = NULL;

  if (scratch_size) {
    plan->scratch = (FFT_DATA *) malloc(scratch_size*sizeof(FFT_DATA));
    if (plan->scratch == NULL) return NULL;
  }
  else plan->scratch = NULL;

/* system specific pre-computation of 1d FFT coeffs 
   and scaling normalization */

  plan->plan_fast_forward =
    fftw_plan_many_dft(1,&(plan->length1),plan->total1/plan->length1,
                       plan->scratch,NULL,1,plan->length1,plan->scratch,
                       NULL,1,plan->length1,FFTW_FORWARD,FFTW_ESTIMATE);
  plan->plan_fast_backward =
    fftw_plan_many_dft(1,&(plan->length1),plan->total1/plan->length1,
                       plan->scratch,NULL,1,plan->length1,plan->scratch,
                       NULL,1,plan->length1,FFTW_BACKWARD,FFTW_ESTIMATE);

  if (plan->length2 == plan->length1) {
    plan->plan_mid_forward = plan->plan_fast_forward;
    plan->plan_mid_backward = plan->plan_fast_backward;
  }
  else {
    plan->plan_mid_forward =
      fftw_plan_many_dft(1,&(plan->length2),plan->total2/plan->length2,
                         plan->scratch,NULL,1,plan->length2,plan->scratch,
                         NULL,1,plan->length2,FFTW_FORWARD,FFTW_ESTIMATE);
    plan->plan_mid_backward =
      fftw_plan_many_dft(1,&(plan->length2),plan->total2/plan->length2,
                         plan->scratch,NULL,1,plan->length2,plan->scratch,
                         NULL,1,plan->length2,FFTW_BACKWARD,FFTW_ESTIMATE);
  }

  if (plan->length3 == plan->length1) {
    plan->plan_slow_forward = plan->plan_fast_forward;
    plan->plan_slow_backward = plan->plan_fast_backward;
  }
  else if (plan->length3 == plan->length2) {
    plan->plan_slow_forward = plan->plan_mid_forward;
    plan->plan_slow_backward = plan->plan_mid_backward;
  }
  else {
    plan->plan_slow_forward =
      fftw_plan_many_dft(1,&(plan->length3),plan->total3/plan->length3,
                         plan->scratch,NULL,1,plan->length3,plan->scratch,
                         NULL,1,plan->length3,FFTW_FORWARD,FFTW_ESTIMATE);
    plan->plan_slow_backward =
      fftw_plan_many_dft(1,&(plan->length3),plan->total3/plan->length3,
                         plan->scratch,NULL,1,plan->length3,plan->scratch,
                         NULL,1,plan->length3,FFTW_BACKWARD,FFTW_ESTIMATE);
  }

  if (scaled == 0)
    plan->scaled = 0;
  else {
    plan->scaled = 1;
    plan->norm = 1.0/(nfast*nmid*nslow);
    plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) *
      (out_khi-out_klo+1);
  }

  return plan;
}
Пример #28
0
bool do_fft_1d_r2c(int M, int N, float* out, float* in)
{
    /*
	if (num_threads>1) {
		fftw_init_threads();
		fftw_plan_with_nthreads(num_threads);
	}
	*/

    int MN = M * N;

    fftw_complex* in2 = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * MN);
    fftw_complex* out2 = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * MN);
    for (int ii = 0; ii < MN; ii++) {
        //in2[ii][0]=in[ii*2];
        //in2[ii][1]=in[ii*2+1];
        in2[ii][0] = in[ii];
        in2[ii][1] = 0;
    }

    /*
	 * From FFTW docs:
	 * howmany is the number of transforms to compute.
	 * The resulting plan computes howmany transforms,
	 * where the input of the k-th transform is at
	 * location in+k*idist (in C pointer arithmetic),
	 * and its output is at location out+k*odist.
	 * Plans obtained in this way can often be faster
	 * than calling FFTW multiple times for the individual
	 * transforms. The basic fftw_plan_dft interface corresponds
	 * to howmany=1 (in which case the dist parameters are ignored).
	 *
	 * Each of the howmany transforms has rank rank
	 * and size n, as in the basic interface.
	 * In addition, the advanced interface allows the
	 * input and output arrays of each transform to be
	 * row-major subarrays of larger rank-rank arrays,
	 * described by inembed and onembed parameters,
	 * respectively. {i,o}nembed must be arrays of length
	 * rank, and n should be elementwise less than or equal
	 * to {i,o}nembed. Passing NULL for an nembed parameter
	 * is equivalent to passing n (i.e. same physical and
	 * logical dimensions, as in the basic interface.)
	 *
	 * The stride parameters indicate that the j-th element
	 * of the input or output arrays is located at j*istride
	 * or j*ostride, respectively. (For a multi-dimensional array,
	 * j is the ordinary row-major index.) When combined with
	 * the k-th transform in a howmany loop, from above, this
	 * means that the (j,k)-th element is at j*stride+k*dist.
	 * (The basic fftw_plan_dft interface corresponds to a stride
	 * of 1.)
	 */
    fftw_plan p;
    int rank = 1;
    int n[] = { N };
    int howmany = M;
    int* inembed = n;
    int istride = M;
    int idist = 1;
    int* onembed = n;
    int ostride = M;
    int odist = 1;
    int sign = FFTW_FORWARD;
    unsigned flags = FFTW_ESTIMATE;
#pragma omp critical
    p = fftw_plan_many_dft(rank, n, howmany, in2, inembed, istride, idist, out2, onembed, ostride, odist, sign, flags);
    //p=fftw_plan_dft_1d(N,in2,out2,FFTW_FORWARD,FFTW_ESTIMATE);

    fftw_execute(p);
    for (int ii = 0; ii < MN; ii++) {
        out[ii * 2] = out2[ii][0];
        out[ii * 2 + 1] = out2[ii][1];
    }
    fftw_free(in2);
    fftw_free(out2);

/*
	if (num_threads>1) {
		fftw_cleanup_threads();
	}
	*/

#pragma omp critical
    fftw_destroy_plan(p);

    return true;
}
Пример #29
0
void SPB::BandSolver_Ez::ShiftInv(const complex_t &shift, const complex_t *x, complex_t *y) const{
	const int Ngrid = res[0]*res[1];
	size_t n = 4*Ngrid;
	
	RNP::TBLAS::Copy(n, x,1, y,1);
	
	// Invert V and P fields
	// For zero shift, the main E/H field block matrix inverse is unchanged
	// v = inv(D) h
	// u = inv(C - W^H inv(D) W) (g - W^H v)
	for(int i = 0; i < res[0]; ++i){
		for(int j = 0; j < res[1]; ++j){
			const int tag = impl->ind[2*IDX(i,j)+1];
			if(tag < 0){ continue; }
			const int row0 = impl->ind[2*IDX(i,j)+0];
			const Material &mat = material[tag];
			const int np = mat.poles.size();
			for(int p = 0; p < np; ++p){
				const LorentzPole &pole = mat.poles[p];
				const complex_t iwp = complex_t(0,pole.omega_p) * mat.eps_inf.value[8];
				const complex_t i_w0 = complex_t(0,1./pole.omega_0) / mat.eps_inf.value[8];
				const complex_t iG_w0w0 = i_w0 * pole.Gamma / pole.omega_0;
				y[row0 + 2*p + 0] = -i_w0 * x[row0 + 2*p + 1];
				y[row0 + 2*p + 1] = i_w0 * x[row0 + 2*p + 0] + iG_w0w0 * x[row0 + 2*p + 1];
				y[EZ_OFF + IDX(i,j)] += iwp * y[row0 + 2*p + 0];
			}
		}
	}
	
	// Data layout: divH, Ez, Hx, Hy
	fftw_plan plan_forward = fftw_plan_many_dft(
		2/*rank*/, res, 4 /*howmany*/,
		(fftw_complex*)y, NULL/*inembed*/,
		1/*istride*/, Ngrid/*idist*/,
		(fftw_complex*)y, NULL/*onembed*/,
		1/*ostride*/, Ngrid/*odist*/,
		FFTW_BACKWARD, FFTW_ESTIMATE);
	fftw_plan plan_backward = fftw_plan_many_dft(
		2/*rank*/, res, 4 /*howmany*/,
		(fftw_complex*)y, NULL/*inembed*/,
		1/*istride*/, Ngrid/*idist*/,
		(fftw_complex*)y, NULL/*onembed*/,
		1/*ostride*/, Ngrid/*odist*/,
		FFTW_FORWARD, FFTW_ESTIMATE);
	const double kshiftsign = 1.0;
	for(int i = 0; i < res[0]; ++i){
		for(int j = 0; j < res[1]; ++j){
			double phase = kshiftsign*2*M_PI*(last_k[0]*(double)i/res[0] + last_k[1]*(double)j/res[1]);
			y[HX_OFF + IDX(i,j)] *= complex_t(cos(phase), sin(phase));
			y[HY_OFF + IDX(i,j)] *= complex_t(cos(phase), sin(phase));
			y[EZ_OFF + IDX(i,j)] *= complex_t(cos(phase), sin(phase));
			y[DIVH_OFF+IDX(i,j)] *= complex_t(cos(phase), sin(phase));
		}
	}
	fftw_execute(plan_forward);

	complex_t A[4*4], b[4];
	size_t ipiv[4];
	for(int i = 0; i < res[0]; ++i){
		const int fi = (i > res[0]/2 ? i-res[0] : i);
		for(int j = 0; j < res[1]; ++j){
			const int fj = (j > res[1]/2 ? j-res[1] : j);
			double kpG[2] = {
				(L.Lk[0]*(last_k[0]+fi) + L.Lk[2]*(last_k[1]+fj)),
				(L.Lk[1]*(last_k[0]+fi) + L.Lk[3]*(last_k[1]+fj))
			};
			kpG[0] *= 2*M_PI;
			kpG[1] *= 2*M_PI;
			const double klen2 = kpG[0]*kpG[0] + kpG[1]*kpG[1];
			const double klen = sqrt(klen2);
			
			// At the Gamma point, project out the constant basis vector
			if(klen < std::numeric_limits<double>::epsilon() * L.CharacteristicKLength()){
				y[DIVH_OFF+IDX(i,j)] = 0;
				y[EZ_OFF + IDX(i,j)] = 0;
				y[HX_OFF + IDX(i,j)] = 0;
				y[HY_OFF + IDX(i,j)] = 0;
				continue;
			}
			
			// [   0      0      k.        0         0     ] [dvH] = [dvH]
			// [   0   -q eps   -k x       0       -i wp   ] [ E ] = [ E ]
			// [   k     k x   -q mu       0         0     ] [ H ]   [ H ]
			// [   0      0      0      -q eta      i w0   ] [ P ]   [ P ]
			// [   0    i wp     0     -i w0 eta   -q eta  ] [ V ]   [ V ]
			//                                                       given
			memset(A, 0, sizeof(complex_t)*4*4);
			
// Forward and backward differences
#define FDIFF(VEC,D) ((std::exp(complex_t(0,-(VEC)[D]/res[D]))-1.) * (double)res[D])
#define BDIFF(VEC,D) ((1.-std::exp(complex_t(0,(VEC)[D]/res[D]))) * (double)res[D])
			static const complex_t I(0.,1.);
			
			A[2+1*4] = -I*FDIFF(kpG,1);
			A[2+0*4] = I*BDIFF(kpG,0);
			A[3+1*4] = I*FDIFF(kpG,0);
			A[3+0*4] = I*BDIFF(kpG,1);
			A[1+2*4] = -I*BDIFF(kpG,1);
			A[1+3*4] =  I*BDIFF(kpG,0);
			A[0+2*4] = I*FDIFF(kpG,0);
			A[0+3*4] = I*FDIFF(kpG,1);
			
			b[0] = y[DIVH_OFF+IDX(i,j)];
			b[1] = y[EZ_OFF + IDX(i,j)];
			b[2] = y[HX_OFF + IDX(i,j)];
			b[3] = y[HY_OFF + IDX(i,j)];
			
			RNP::LinearSolve<'N'>(4,1, A,4, b,4);
			
			y[DIVH_OFF+IDX(i,j)] = b[0] / ((double)Ngrid);
			y[EZ_OFF + IDX(i,j)] = b[1] / ((double)Ngrid);
			y[HX_OFF + IDX(i,j)] = b[2] / ((double)Ngrid);
			y[HY_OFF + IDX(i,j)] = b[3] / ((double)Ngrid);
		}
	}
	fftw_execute(plan_backward);
	fftw_destroy_plan(plan_forward);
	fftw_destroy_plan(plan_backward);
	
	for(int i = 0; i < res[0]; ++i){
		for(int j = 0; j < res[1]; ++j){
			double phase = -kshiftsign*2*M_PI*(last_k[0]*(double)i/res[0] + last_k[1]*(double)j/res[1]);
			y[DIVH_OFF+IDX(i,j)] *= complex_t(cos(phase), sin(phase));
			y[EZ_OFF + IDX(i,j)] *= complex_t(cos(phase), sin(phase));
			y[HX_OFF + IDX(i,j)] *= complex_t(cos(phase), sin(phase));
			y[HY_OFF + IDX(i,j)] *= complex_t(cos(phase), sin(phase));
		}
	}
	
	// v += inv(D) W u
	for(int i = 0; i < res[0]; ++i){
		for(int j = 0; j < res[1]; ++j){
			const int tag = impl->ind[2*IDX(i,j)+1];
			if(tag < 0){ continue; }
			const int row0 = impl->ind[2*IDX(i,j)+0];
			const Material &mat = material[tag];
			const int np = mat.poles.size();
			for(int p = 0; p < np; ++p){
				const LorentzPole &pole = mat.poles[p];
				y[row0 + 2*p + 0] -= (pole.omega_p/pole.omega_0) * y[EZ_OFF + IDX(i,j)];
			}
		}
	}
}
Пример #30
0
/* \brief Create FFT Plan
 *
 */
void CFFTScalar::CreatePlan(int nSign, bool* bDimTrans)
{
	//If plan already exists, destroy it for the new one
	if(_bPlanSet)
		fftw_destroy_plan(_plan);
	//Set default values for rank and how many
	_nRank=0;
	for (int i=0;i<3;i++) _nN[i]=1;
	_nHowMany=1;
	_nInembed=NULL;
	_nOnembed=NULL;
	_nIstride=1;
	_nIdist=1;
	_nOstride=1;
	_nOdist=1;

	//determine how many ffts to perform and size of ffts
	_nHowMany=_myGrid.GetTotal();
	int myN{0};
	for(int i=_myGrid.GetDim()-1;i>=0;i--)
	{
		if(bDimTrans[i])
		{
			_nHowMany/=_myGrid.GetSize(i);
			_nN[myN]=_myGrid.GetSize(i);
			myN++;
			_nIdist*=_myGrid.GetSize(i);
			_nRank++;
		}
		/*else
		{
			_nIstride*=_myGrid.GetSize(i);
		}*/
	}

	_nOdist=_nIdist;
	_nOstride=_nIstride;
	//from manual
	_nInembed=_nN;
	_nOnembed=_nN;
	/*
	std::cout<<"_nRank " << _nRank<< "\n";
	std::cout<<"_nN " << _nN[0]<< "\n";
	std::cout<<"_nN " << _nN[1]<< "\n";
	std::cout<<"_nN " << _nN[2]<< "\n";
	std::cout<<"_nHowMany " << _nHowMany<< "\n";
	std::cout<<"_nIstride " << _nIstride<< "\n";
	std::cout<<"_nIdist " << _nIdist<< "\n";
	*/
	//setup the plan
	_plan=fftw_plan_many_dft(_nRank,
							 _nN,
							 _nHowMany,
							 _dVal,
							 _nInembed,
							 _nIstride,
							 _nIdist,
							 _dVal,
							 _nOnembed,
							 _nOstride,
							 _nOdist,
							 nSign,
							 FFTW_ESTIMATE);
	_bPlanSet=true;
	}