Beispiel #1
1
int sync_init() {
	int i, arraysize;
	nreceivers = conf.nreceivers;
	corrlen = conf.sync_len;
	if(nreceivers > NRECEIVERS_MAX) return -1;
	
	/* half of fft input will be zero-padded */
	fft1n = corrlen*2;
	fft2n = fft1n * CORRELATION_OVERSAMPLE;
	
	arraysize = nreceivers * fft1n;
	fft1in  = fftwf_malloc(arraysize * sizeof(*fft1in));
	fft1out = fftwf_malloc(arraysize * sizeof(*fft1out));
	for(i = 0; i < arraysize; i++)
		fft1in[i] = fft1out[i] = 0;
	
	arraysize = (nreceivers-1) * fft2n;
	fft2in  = fftwf_malloc(arraysize * sizeof(*fft2in));
	fft2out = fftwf_malloc(arraysize * sizeof(*fft2out));
	for(i = 0; i < arraysize; i++)
		fft2in[i] = fft2out[i] = 0;
	
	fft1plan = fftwf_plan_many_dft(
		1, &fft1n, nreceivers,
		fft1in,  NULL, 1, fft1n,
		fft1out, NULL, 1, fft1n,
		FFTW_FORWARD, FFTW_ESTIMATE);

	fft2plan = fftwf_plan_many_dft(
		1, &fft2n, nreceivers-1,
		fft2in,  NULL, 1, fft2n,
		fft2out, NULL, 1, fft2n,
		FFTW_BACKWARD, FFTW_ESTIMATE);
	return 0;
}
Beispiel #2
0
Fourier::Fourier(int n)
{

  const char* fname = "void Fourier::Initialize()";

  VRB.Debug(fname, "Allocating memory and creating plans for FFTW.");

  batch_size = n;

#ifdef USE_SINGLE
  b = (fftComplex*) fftwf_malloc(batch_size*GJP.Vol()*sizeof(fftComplex));
#endif
#ifdef USE_DOUBLE
  b = (fftComplex*) fftw_malloc(batch_size*GJP.Vol()*sizeof(fftComplex));
#endif
#ifdef USE_LONG_DOUBLE
  b = (fftComplex*) fftwl_malloc(batch_size*GJP.Vol()*sizeof(fftComplex));
#endif

  // Below needs to be adjusted for batch ffts; double check in place
  int vol = GJP.Vol();
  int dims[3] = { GJP.Xsites(), GJP.Ysites(), GJP.Zsites() };

#ifdef USE_SINGLE
  p1 = fftwf_plan_many_dft(3, dims, batch_size, b, NULL, 1, vol, b, NULL, 1, vol, FFTW_BACKWARD, FFTW_EXHAUSTIVE); 
  p2 = fftwf_plan_many_dft(3, dims, batch_size, b, NULL, 1, vol, b, NULL, 1, vol, FFTW_FORWARD, FFTW_EXHAUSTIVE); 
#endif
#ifdef USE_DOUBLE
  p1 = fftw_plan_many_dft(3, dims, batch_size, b, NULL, 1, vol, b, NULL, 1, vol, FFTW_BACKWARD, FFTW_EXHAUSTIVE); 
  p2 = fftw_plan_many_dft(3, dims, batch_size, b, NULL, 1, vol, b, NULL, 1, vol, FFTW_FORWARD, FFTW_EXHAUSTIVE); 
#endif
#ifdef USE_LONG_DOUBLE
  p1 = fftwl_plan_many_dft(3, dims, batch_size, b, NULL, 1, vol, b, NULL, 1, vol, FFTW_BACKWARD, FFTW_EXHAUSTIVE); 
  p2 = fftwl_plan_many_dft(3, dims, batch_size, b, NULL, 1, vol, b, NULL, 1, vol, FFTW_FORWARD, FFTW_EXHAUSTIVE); 
#endif


}
Beispiel #3
0
void Qsquaref(fftwf_complex const *FQ,fftwf_complex *FQ2,int nx,int nz,int k){

 fftwf_complex *FQ1;
  FQ1 = (fftwf_complex *) fftwf_malloc((2*k-1)*nx*nz*sizeof(fftwf_complex));

  int m,n,l;
  // printf(" k = %d  \n",k);
  for (l=0;l<k;l++)
    {//printf("l= %d\n",l);
      for (m=0;m<nx;m++)
	{
	  for(n=0;n<nz;n++)
	    {//printf("%d ",l+(k)*(m*nz+n));
	    FQ1[l+(2*k-1)*(m*nz+n)] = FQ[l+k*(m*nz+n)]; 
	    }
	}
    }


  // printf("padded array set in Qsquare \n");

  int rank =1;
  int *dims;
  dims = (int*) malloc(rank*sizeof(int)); 
  dims[0]=(2*k-1); //dims[1]=nz; dims[2]=2*k-1;
  int howmany = nx*nz;
  //inembed=onembed=dims

  int istride = 1;
  int ostride = 1;
  int idist = (2*k-1);
  int odist = idist;
  // printf("variables set ...\n");

  fftwf_plan plan_forward,plan_backward;
  plan_forward = fftwf_plan_many_dft(rank,dims,howmany,FQ1,NULL,istride,idist,FQ1,NULL,ostride,odist,FFTW_FORWARD,FFTW_ESTIMATE);
  plan_backward = fftwf_plan_many_dft(rank,dims,howmany,FQ1,NULL,istride,idist,FQ2,NULL,ostride,odist,FFTW_BACKWARD,FFTW_ESTIMATE);

  fftwf_execute(plan_forward);
  //printf("plan executed \n");

 for (l=0;l<2*k-1;l++)
    {//printf("l= %d\n",l);
      for (m=0;m<nx;m++)
	{
	  for(n=0;n<nz;n++)
	    {//printf("%d ",l+(k)*(m*nz+n));
	    FQ1[l+(2*k-1)*(m*nz+n)] = FQ1[l+(2*k-1)*(m*nz+n)]*FQ1[l+(2*k-1)*(m*nz+n)]/(2*k-1); //printf("set \n");
	    }
	}
    }

 fftwf_execute(plan_backward);

 //cleaning
 fftwf_destroy_plan(plan_forward);
 fftwf_destroy_plan(plan_backward);
 fftwf_free(FQ1);


}
Beispiel #4
0
int main (int argc, char **argv)
{
    int n1, nx, n3, dim, n[SF_MAX_DIM];     /* dimensions */
    int i1, ix, i3, j;       /* loop counters */
    int nk;                  /* number of wavenumbers */
    int npad;                /* padding */

    float dx;                /* space sampling interval */
    float dk;                /* wavenumber sampling interval */
    float x0;                /* staring space */
    float k0;                /* starting wavenumber */
    float wt;                /* Fourier scaling */

    kiss_fft_cpx **cp;       /* frequency-wavenumber */

    bool inv;                /* forward or inverse */
    bool sym;                /* symmetric scaling */
    bool opt;                /* optimal padding */
    int sign;                /* transform sign */
    int axis;                /* transform axis */

    char varname[12];        /* variable name */
    char *label;             /* transformed axis label */

#ifdef SF_HAS_FFTW
    fftwf_plan cfg;
#else 
    kiss_fft_cpx *ctrace;
    kiss_fft_cfg cfg;
#endif

    sf_file in=NULL, out=NULL;

    sf_init(argc,argv);
    in  = sf_input ( "in");
    out = sf_output("out");

    if (SF_COMPLEX != sf_gettype(in)) sf_error ("Need complex input");

    if (!sf_getbool("inv",&inv)) inv = false;
    /* if y, perform inverse transform */

    if (!sf_getbool("sym",&sym)) sym=false;
    /* if y, apply symmetric scaling to make the FFT operator Hermitian */

    if (!sf_getint("sign",&sign)) sign = inv? 1: 0;
    /* transform sign (0 or 1) */

    if (!sf_getbool("opt",&opt)) opt=true;
    /* if y, determine optimal size for efficiency */

    if (!sf_getint("axis",&axis)) axis=2;
    /* Axis to transform */

    dim = sf_filedims(in,n);

    n1=n3=1;
    for (j=0; j < dim; j++) {
	if      (j < axis-1) n1 *= n[j];
	else if (j > axis-1) n3 *= n[j]; 
    }

    if (inv) {
	sprintf(varname,"n%d",axis);
	if (!sf_histint  (in,varname,&nk)) sf_error("No %s= in input",varname);
	sprintf(varname,"d%d",axis);
	if (!sf_histfloat(in,varname,&dk)) sf_error("No %s= in input",varname);
	
	sprintf(varname,"fft3_n%d",axis);
	if (!sf_histint  (in,varname,&nx)) nx=nk;
	sprintf(varname,"fft3_o%d",axis);
	if (!sf_histfloat(in,varname,&x0)) x0 = 0.; 
	sprintf(varname,"fft3_label%d",axis);
	label = sf_histstring(in,varname);
	
	dx = 1./(nk*dk);
	
	sprintf(varname,"n%d",axis);
	sf_putint (out,varname,nx);
	sprintf(varname,"d%d",axis);
	sf_putfloat (out,varname,dx);
	sprintf(varname,"o%d",axis);
	sf_putfloat (out,varname,x0);
	sprintf(varname,"label%d",axis);
	if (NULL != label) {
	    sf_putstring(out,varname,label);
	} else if (NULL != (label = sf_histstring(in,varname))) {
	    (void) sf_fft_label(axis,label,out);
	}
    } else {
	sprintf(varname,"n%d",axis);
	if (!sf_histint  (in,varname,&nx)) sf_error("No %s= in input",varname);
	sprintf(varname,"d%d",axis);
	if (!sf_histfloat(in,varname,&dx)) sf_error("No %s= in input",varname);
	sprintf(varname,"o%d",axis);
	if (!sf_histfloat(in,varname,&x0)) x0 = 0.;
	sprintf(varname,"label%d",axis);
	label = sf_histstring(in,varname);
	
	sprintf(varname,"fft3_n%d",axis);
	sf_putint(out,varname,nx);
	sprintf(varname,"fft3_o%d",axis);
	sf_putfloat(out,varname,x0);
	if (NULL != label) {
	    sprintf(varname,"fft3_label%d",axis);
	    sf_putstring(out,varname,label);
	}
	
	if (!sf_getint("pad",&npad)) npad=2;
	/* padding factor */
	
	/* determine wavenumber sampling */
	nk = opt? kiss_fft_next_fast_size(nx*npad): nx*npad;
	if (nk != nx) sf_warning("padded to %d",nk);
	
	dk = 1./(nk*dx);
	k0 = -0.5/dx;
	
	sprintf(varname,"n%d",axis);
	sf_putint (out,varname,nk);
	sprintf(varname,"d%d",axis);
	sf_putfloat (out,varname,dk);
	sprintf(varname,"o%d",axis);
	sf_putfloat (out,varname,k0);
	if (NULL != label && !sf_fft_label(axis,label,out)) {
	    sprintf(varname,"label%d",axis);
	    sf_putstring(out,varname,"Wavenumber");
	}
    }
    sprintf(varname,"unit%d",axis);
    sf_fft_unit(axis,sf_histstring(in,varname),out);

    cp     = (kiss_fft_cpx**) sf_complexalloc2(n1,nk);

#ifdef SF_HAS_FFTW
    ix = nk;
    cfg = fftwf_plan_many_dft(1, &ix, n1,
			      (fftwf_complex*) cp[0], NULL, n1, 1,
			      (fftwf_complex*) cp[0], NULL, n1, 1,
			      sign? FFTW_BACKWARD: FFTW_FORWARD, 
			      FFTW_ESTIMATE);
    if (NULL == cfg) sf_error("FFTW failure.");
#else
    ctrace = (kiss_fft_cpx*)  sf_complexalloc(nk);
    cfg = kiss_fft_alloc(nk,sign,NULL,NULL);
#endif

    /* FFT scaling */
    wt = sym? 1./sqrtf((float) nk): 1./nk;

    for (i3=0; i3<n3; i3++) {
	if (inv) {
	    sf_floatread((float*) cp[0],n1*nk*2,in);

#ifdef SF_HAS_FFTW
	    fftwf_execute(cfg);
	    
	    for (ix=0; ix<nx; ix++) {
		for (i1=0; i1 < n1; i1++) {
		    cp[ix][i1] = sf_crmul(cp[ix][i1],ix%2? -wt: wt);
		}
	    }
#else	    
	    for (i1=0; i1 < n1; i1++) {
		/* Fourier transform k to x */
		kiss_fft_stride(cfg,cp[0]+i1,ctrace,n1);
		
		for (ix=0; ix<nx; ix++) {
		    cp[ix][i1] = sf_crmul(ctrace[ix],ix%2? -wt: wt);
		}
	    }
#endif

	    sf_floatwrite((float*) cp[0],n1*nx*2,out);
	} else {
	    sf_floatread((float*) cp[0],n1*nx*2,in);

	    /* FFT centering */
	    for (ix=1; ix<nx; ix+=2) {
		for (i1=0; i1<n1; i1++) {
		    cp[ix][i1] = sf_cneg(cp[ix][i1]);
		}
	    }

	    if (sym) {
		for (ix=0; ix<nx; ix++) {
		    for (i1=0; i1 < n1; i1++) {
			cp[ix][i1] = sf_crmul(cp[ix][i1],wt);
		    }
		}
	    }

	    /* pad with zeros */
	    for (ix=nx; ix<nk; ix++) {
		for (i1=0; i1<n1; i1++) {
		    cp[ix][i1].r = 0.;
		    cp[ix][i1].i = 0.;
		}
	    }

#ifdef SF_HAS_FFTW
	    fftwf_execute(cfg);
#else
	    for (i1=0; i1 < n1; i1++) {
		/* Fourier transform x to k */
		kiss_fft_stride(cfg,cp[0]+i1,ctrace,n1);
		
		/* Transpose */
		for (ix=0; ix<nk; ix++) {
		    cp[ix][i1] = ctrace[ix];
		}
	    }
#endif

	    sf_floatwrite((float*) cp[0],n1*nk*2,out);
	}
    }


    exit (0);
}
Beispiel #5
0
int main(int argc, char* argv[])
{
    	bool verb, pow2;
    	char key[7], *mode;;
    	int n1, n2, n1padded, n2padded, num, dim, n[SF_MAX_DIM], npadded[SF_MAX_DIM], ii[SF_MAX_DIM];
	int i, j, i1, i2, index, nw, iter, niter, nthr, *pad;
    	float thr, pclip, normp;
    	float *dobs_t, *thresh, *mask;
    	fftwf_complex *mm, *dd, *dobs;
    	fftwf_plan fft1, ifft1, fftrem, ifftrem;/* execute plan for FFT and IFFT */
    	sf_file in, out, Fmask;	/* mask and I/O files*/ 

    	sf_init(argc,argv);	/* Madagascar initialization */
    	in=sf_input("in");	/* read the data to be interpolated */
    	out=sf_output("out"); 	/* output the reconstructed data */
    	Fmask=sf_input("mask");	/* read the (n-1)-D mask for n-D data */
 
    	if(!sf_getbool("verb",&verb))    	verb=false;
    	/* verbosity */
    	if(!sf_getbool("pow2",&pow2))    	pow2=false;
    	/* round up the length of each axis to be power of 2 */
    	if (!sf_getint("niter",&niter)) 	niter=100;
    	/* total number of iterations */
    	if (!sf_getfloat("pclip",&pclip)) 	pclip=10.;
    	/* starting data clip percentile (default is 10)*/
    	if ( !(mode=sf_getstring("mode")) ) 	mode = "exp";
    	/* thresholding mode: 'hard', 'soft','pthresh','exp';
	  'hard', hard thresholding;	   'soft', soft thresholding; 
	  'pthresh', generalized quasi-p;  'exp', exponential shrinkage */
    	if (pclip <=0. || pclip > 100.)	sf_error("pclip=%g should be > 0 and <= 100",pclip);
    	if (!sf_getfloat("normp",&normp)) 	normp=1.;
    	/* quasi-norm: normp<2 */
   	for (i=0; i < SF_MAX_DIM; i++) {/* dimensions */
		snprintf(key,3,"n%d",i+1);
		if (!sf_getint(key,n+i) && 
		    (NULL == in || !sf_histint(in,key,n+i))) break;
		/*( n# size of #-th axis )*/  
		sf_putint(out,key,n[i]);
    	}
    	if (0==i) sf_error("Need n1=");
    	dim=i;
    	pad=sf_intalloc (dim);
	for (i=0; i<dim; i++) pad[i]=0;
	sf_getints("pad",pad,dim); /* number of zeros to be padded for each axis */

    	n1=n[0];
    	n2=sf_leftsize(in,1);
	for (i=0; i<SF_MAX_DIM; i++) npadded[i]=1;
	npadded[0]=n1+pad[0];
	n1padded=npadded[0];
	n2padded=1;
	for (i=1; i<dim; i++){
	  npadded[i]=n[i]+pad[i];
	  if (pow2) {/* zero-padding to be power of 2 */
	    npadded[i]=nextpower2(n[i]);
	    fprintf(stderr,"n%d=%d n%dpadded=%d\n",i,n[i],i,npadded[i]);
	  }
	  n2padded*=npadded[i];
	}
	nw=npadded[0]/2+1;
	num=nw*n2padded;/* data: total number of elements in frequency domain */

    	/* allocate data and mask arrays */
	thresh=(float*)            malloc(nw*n2padded*sizeof(float));
    	dobs_t=(float*)      fftwf_malloc(n1padded*n2padded*sizeof(float));  /* time domain observation */
    	dobs=(fftwf_complex*)fftwf_malloc(nw*n2padded*sizeof(fftwf_complex));/* freq-domain observation */
    	dd=(fftwf_complex*)  fftwf_malloc(nw*n2padded*sizeof(fftwf_complex));
    	mm=(fftwf_complex*)  fftwf_malloc(nw*n2padded*sizeof(fftwf_complex));
 
    	if (NULL != sf_getstring("mask")){
		mask=sf_floatalloc(n2padded);
    	} else sf_error("mask needed!");

	/* initialize the input data and mask arrays */
	memset(dobs_t,0,n1padded*n2padded*sizeof(float));
	memset(mask,0,n2padded*sizeof(float));
	for (i=0; i<n1*n2; i+=n1){
	  sf_line2cart(dim,n,i,ii);
	  j=sf_cart2line(dim,npadded,ii);
	  sf_floatread(&dobs_t[j], n1, in);
	  sf_floatread(&mask[j/n1padded], 1, Fmask);
	}

	/* FFT for the 1st dimension and the remaining dimensions */
     	fft1=fftwf_plan_many_dft_r2c(1, &n1padded, n2padded, dobs_t, &n1padded, 1, n1padded, dobs, &n1padded, 1, nw, FFTW_MEASURE);
   	ifft1=fftwf_plan_many_dft_c2r(1, &n1padded, n2padded, dobs, &n1padded, 1, nw, dobs_t, &n1padded, 1, n1padded, FFTW_MEASURE);
	fftrem=fftwf_plan_many_dft(dim-1, &npadded[1], nw, dd, &npadded[1], nw, 1, dd, &npadded[1], nw, 1, FFTW_FORWARD, FFTW_MEASURE);
	ifftrem=fftwf_plan_many_dft(dim-1, &npadded[1], nw, dd, &npadded[1], nw, 1, dd, &npadded[1], nw, 1, FFTW_BACKWARD, FFTW_MEASURE);

	/* transform the data from time domain to frequency domain: dobs_t-->dobs */
	fftwf_execute(fft1);
	for(i=0; i<num; i++) dobs[i]/=sqrtf(n1padded);
	memset(mm,0,num*sizeof(fftwf_complex));

	/* Iterative Shrinkage-Thresholding (IST) Algorithm:
	   	mm^{k+1}=T[mm^k+A^* M^* (dobs-M A mm^k)] (M^*=M; Mdobs=dobs)
		   	=T[mm^k+A^*(dobs-M A mm^k)]; (k=0,1,...niter-1)
	   	dd^=A mm^; 
	*/
    	for(iter=0; iter<niter; iter++)
    	{
		/* dd<-- A mm^k */
		memcpy(dd, mm, num*sizeof(fftwf_complex));
		fftwf_execute(ifftrem);
		for(i=0; i<num; i++) dd[i]/=sqrtf(n2padded);

		/* apply mask: dd<--dobs-M A mm^k=dobs-M dd */
		for(i2=0; i2<n2padded; i2++)
		for(i1=0; i1<nw; i1++)
		{ 
			index=i1+nw*i2;
			dd[index]=dobs[index]-mask[i2]*dd[index];
		}

		/* mm^k += A^*(dobs-M A mm^k); dd=dobs-M A mm^k */
		fftwf_execute(fftrem);
		for(i=0; i<num; i++) mm[i]+=dd[i]/sqrtf(n2padded);		

		/* perform thresholding */
		for(i=0; i<num; i++)	thresh[i]=cabsf(mm[i]);
	   	nthr = 0.5+num*(1.-0.01*pclip);  /* round off */
	    	if (nthr < 0) nthr=0;
	    	if (nthr >= num) nthr=num-1;
		thr=sf_quantile(nthr, num, thresh);
		sf_cpthresh(mm, num, thr, normp, mode);

		if (verb) sf_warning("iteration %d;",iter+1);
    	}

	/* frequency--> time domain: dobs-->dobs_t */
	memcpy(dd, mm, num*sizeof(fftwf_complex));
	fftwf_execute(ifftrem);
	for(i=0; i<num; i++) dd[i]/=sqrtf(n2padded);
	memcpy(dobs, dd, num*sizeof(fftwf_complex));
	fftwf_execute(ifft1);
	for(i=0; i<n1padded*n2padded; i++) dobs_t[i]/=sqrtf(n1padded);
	
	for (i=0; i<n1*n2; i+=n1){
	  sf_line2cart(dim,n,i,ii);
	  j=sf_cart2line(dim,npadded,ii);
	  sf_floatwrite(&dobs_t[j],n1,out);
	}

	free(thresh);
	fftwf_free(dobs_t);
	fftwf_free(dobs);
	fftwf_free(dd);
	fftwf_free(mm);

    	exit(0);
}
Beispiel #6
0
void FFT::apply(Window im, bool transformX, bool transformY, bool transformT, bool inverse) {
    assert(im.channels % 2 == 0, "-fft requires an image with an even number of channels\n");

    if (im.width == 1) transformX = false;
    if (im.height == 1) transformY = false;
    if (im.frames == 1) transformT = false;

    // rank 0
    if (!transformX && !transformY && !transformT) return;

    if (transformX && transformY && transformT) { // rank 3
        int n[] = {im.frames, im.height, im.width};
        int nembed[] = {im.frames, im.tstride/im.ystride, im.ystride/im.xstride};
        
        fftwf_plan plan = fftwf_plan_many_dft(3, n, im.channels/2,
                                              (fftwf_complex *)im(0, 0, 0), nembed, im.channels/2, 1,
                                              (fftwf_complex *)im(0, 0, 0), nembed, im.channels/2, 1,
                                              inverse ? FFTW_BACKWARD : FFTW_FORWARD, FFTW_ESTIMATE);
        
        fftwf_execute(plan);
        fftwf_destroy_plan(plan);
    } else if (transformX && transformY) { // rank 2
        int n[] = {im.height, im.width};
        int nembed[] = {im.tstride/im.ystride, im.ystride/im.xstride};
            
        for (int t = 0; t < im.frames; t++) {
            fftwf_plan plan = fftwf_plan_many_dft(2, n, im.channels/2,
                                                  (fftwf_complex *)im(0, 0, t), nembed, im.channels/2, 1,
                                                  (fftwf_complex *)im(0, 0, t), nembed, im.channels/2, 1,
                                                  inverse ? FFTW_BACKWARD : FFTW_FORWARD, FFTW_ESTIMATE);
            
            fftwf_execute(plan);        
            fftwf_destroy_plan(plan);
        }
    } else if (transformT && transformY) { // rank 2
        int n[] = {im.frames, im.height};
        int nembed[] = {im.frames, im.tstride/im.ystride};
        
        fftwf_plan plan = fftwf_plan_many_dft(2, n, im.width*im.channels/2,
                                              (fftwf_complex *)im(0, 0, 0), nembed, im.width*im.channels/2, 1,
                                              (fftwf_complex *)im(0, 0, 0), nembed, im.width*im.channels/2, 1,
                                              inverse ? FFTW_BACKWARD : FFTW_FORWARD, FFTW_ESTIMATE);
        
        fftwf_execute(plan);        
        fftwf_destroy_plan(plan);
    } else if (transformT && transformX) { // rank 2
        int n[] = {im.frames, im.width};
        int nembed[] = {im.frames, im.tstride/im.xstride};

        for (int y = 0; y < im.height; y++) {        
            fftwf_plan plan = fftwf_plan_many_dft(2, n, im.channels/2,
                                                  (fftwf_complex *)im(0, y, 0), nembed, im.channels/2, 1,
                                                  (fftwf_complex *)im(0, y, 0), nembed, im.channels/2, 1,
                                                  inverse ? FFTW_BACKWARD : FFTW_FORWARD, FFTW_ESTIMATE);
            
            fftwf_execute(plan);        
            fftwf_destroy_plan(plan);        
        }
    } else if (transformX) { // rank 1
        int n[] = {im.width};
        int nembed[] = {im.width};

        for (int t = 0; t < im.frames; t++) {
            for (int y = 0; y < im.height; y++) {        
                fftwf_plan plan = fftwf_plan_many_dft(1, n, im.channels/2,
                                                      (fftwf_complex *)im(0, y, t), nembed, im.channels/2, 1,
                                                      (fftwf_complex *)im(0, y, t), nembed, im.channels/2, 1,
                                                      inverse ? FFTW_BACKWARD : FFTW_FORWARD, FFTW_ESTIMATE);
                
                fftwf_execute(plan);        
                fftwf_destroy_plan(plan);        
            }        
        }
    } else if (transformY) { // rank 1
        int n[] = {im.height};
        int nembed[] = {im.height};

        for (int t = 0; t < im.frames; t++) {
            fftwf_plan plan = fftwf_plan_many_dft(1, n, im.width*im.channels/2,
                                                  (fftwf_complex *)im(0, 0, t), nembed, im.ystride/2, 1,
                                                  (fftwf_complex *)im(0, 0, t), nembed, im.ystride/2, 1,
                                                  inverse ? FFTW_BACKWARD : FFTW_FORWARD, FFTW_ESTIMATE);
            
            fftwf_execute(plan);        
            fftwf_destroy_plan(plan);        
        }        
    } else if (transformT) { // rank 1
        int n[] = {im.frames};
        int nembed[] = {im.frames};
        for (int y = 0; y < im.height; y++) {
            fftwf_plan plan = fftwf_plan_many_dft(1, n, im.width*im.channels/2,
                                                  (fftwf_complex *)im(0, y, 0), nembed, im.tstride/2, 1,
                                                  (fftwf_complex *)im(0, y, 0), nembed, im.tstride/2, 1,
                                                  inverse ? FFTW_BACKWARD : FFTW_FORWARD, FFTW_ESTIMATE);
            fftwf_execute(plan);        
            fftwf_destroy_plan(plan);        
        }
    }       

    if (inverse) {
        float m = 1.0;
        if (transformX) m *= im.width;
        if (transformY) m *= im.height;
        if (transformT) m *= im.frames;
        Scale::apply(im, 1.0f/m);
    }
    

}
Array<T> fftconvolve(Array<T> const& signal, Array<T> const& filter,
                     const bool expand, ConvolveBatchKind kind)
{
    const af::dim4 sd = signal.dims();
    const af::dim4 fd = filter.dims();

    dim_t fftScale = 1;

    af::dim4 packed_dims;
    int fft_dims[baseDim];
    af::dim4 sig_tmp_dims, sig_tmp_strides;
    af::dim4 filter_tmp_dims, filter_tmp_strides;

    // Pack both signal and filter on same memory array, this will ensure
    // better use of batched cuFFT capabilities
    for (dim_t k = 0; k < 4; k++) {
        if (k < baseDim)
            packed_dims[k] = nextpow2((unsigned)(sd[k] + fd[k] - 1));
        else if (k == baseDim)
            packed_dims[k] = sd[k] + fd[k];
        else
            packed_dims[k] = 1;

        if (k < baseDim) {
            fft_dims[baseDim-k-1] = (k == 0) ? packed_dims[k] / 2 : packed_dims[k];
            fftScale *= fft_dims[baseDim-k-1];
        }
    }

    Array<convT> packed = createEmptyArray<convT>(packed_dims);
    convT *packed_ptr = packed.get();

    const af::dim4 packed_strides = packed.strides();

    sig_tmp_dims[0]    = filter_tmp_dims[0] = packed_dims[0];
    sig_tmp_strides[0] = filter_tmp_strides[0] = 1;

    for (dim_t k = 1; k < 4; k++) {
        if (k < baseDim) {
            sig_tmp_dims[k]    = packed_dims[k];
            filter_tmp_dims[k] = packed_dims[k];
        }
        else {
            sig_tmp_dims[k]    = sd[k];
            filter_tmp_dims[k] = fd[k];
        }

        sig_tmp_strides[k]    = sig_tmp_strides[k - 1] * sig_tmp_dims[k - 1];
        filter_tmp_strides[k] = filter_tmp_strides[k - 1] * filter_tmp_dims[k - 1];
    }

    // Calculate memory offsets for packed signal and filter
    convT *sig_tmp_ptr    = packed_ptr;
    convT *filter_tmp_ptr = packed_ptr + sig_tmp_strides[3] * sig_tmp_dims[3];

    // Number of packed complex elements in dimension 0
    dim_t sig_half_d0 = divup(sd[0], 2);

    // Pack signal in a complex matrix where first dimension is half the input
    // (allows faster FFT computation) and pad array to a power of 2 with 0s
    packData<convT, T>(sig_tmp_ptr, sig_tmp_dims, sig_tmp_strides, signal);

    // Pad filter array with 0s
    padArray<convT, T>(filter_tmp_ptr, filter_tmp_dims, filter_tmp_strides, filter);

    // Compute forward FFT
    if (isDouble) {
        fftw_plan plan = fftw_plan_many_dft(baseDim,
                                            fft_dims,
                                            packed_dims[baseDim],
                                            (fftw_complex*)packed.get(),
                                            NULL,
                                            packed_strides[0],
                                            packed_strides[baseDim] / 2,
                                            (fftw_complex*)packed.get(),
                                            NULL,
                                            packed_strides[0],
                                            packed_strides[baseDim] / 2,
                                            FFTW_FORWARD,
                                            FFTW_ESTIMATE);

        fftw_execute(plan);
        fftw_destroy_plan(plan);
    }
    else {
        fftwf_plan plan = fftwf_plan_many_dft(baseDim,
                                              fft_dims,
                                              packed_dims[baseDim],
                                              (fftwf_complex*)packed.get(),
                                              NULL,
                                              packed_strides[0],
                                              packed_strides[baseDim] / 2,
                                              (fftwf_complex*)packed.get(),
                                              NULL,
                                              packed_strides[0],
                                              packed_strides[baseDim] / 2,
                                              FFTW_FORWARD,
                                              FFTW_ESTIMATE);

        fftwf_execute(plan);
        fftwf_destroy_plan(plan);
    }

    // Multiply filter and signal FFT arrays
    if (kind == ONE2MANY)
        complexMultiply<convT>(filter_tmp_ptr, filter_tmp_dims, filter_tmp_strides,
                               sig_tmp_ptr, sig_tmp_dims, sig_tmp_strides,
                               filter_tmp_ptr, filter_tmp_dims, filter_tmp_strides,
                               kind);
    else
        complexMultiply<convT>(sig_tmp_ptr, sig_tmp_dims, sig_tmp_strides,
                               sig_tmp_ptr, sig_tmp_dims, sig_tmp_strides,
                               filter_tmp_ptr, filter_tmp_dims, filter_tmp_strides,
                               kind);

    // Compute inverse FFT
    if (isDouble) {
        fftw_plan plan = fftw_plan_many_dft(baseDim,
                                            fft_dims,
                                            packed_dims[baseDim],
                                            (fftw_complex*)packed.get(),
                                            NULL,
                                            packed_strides[0],
                                            packed_strides[baseDim] / 2,
                                            (fftw_complex*)packed.get(),
                                            NULL,
                                            packed_strides[0],
                                            packed_strides[baseDim] / 2,
                                            FFTW_BACKWARD,
                                            FFTW_ESTIMATE);

        fftw_execute(plan);
        fftw_destroy_plan(plan);
    }
    else {
        fftwf_plan plan = fftwf_plan_many_dft(baseDim,
                                              fft_dims,
                                              packed_dims[baseDim],
                                              (fftwf_complex*)packed.get(),
                                              NULL,
                                              packed_strides[0],
                                              packed_strides[baseDim] / 2,
                                              (fftwf_complex*)packed.get(),
                                              NULL,
                                              packed_strides[0],
                                              packed_strides[baseDim] / 2,
                                              FFTW_BACKWARD,
                                              FFTW_ESTIMATE);

        fftwf_execute(plan);
        fftwf_destroy_plan(plan);
    }

    // Compute output dimensions
    dim4 oDims(1);
    if (expand) {
        for(dim_t d=0; d<4; ++d) {
            if (kind==ONE2ONE || kind==ONE2MANY) {
                oDims[d] = sd[d]+fd[d]-1;
            } else {
                oDims[d] = (d<baseDim ? sd[d]+fd[d]-1 : sd[d]);
            }
        }
    } else {
        oDims = sd;
        if (kind==ONE2MANY) {
            for (dim_t i=baseDim; i<4; ++i)
                oDims[i] = fd[i];
        }
    }

    Array<T> out = createEmptyArray<T>(oDims);
    T* out_ptr = out.get();
    const af::dim4 out_dims = out.dims();
    const af::dim4 out_strides = out.strides();

    const af::dim4 filter_dims = filter.dims();

    // Reorder the output
    if (kind == ONE2MANY) {
        reorderOutput<T, convT, roundOut>
            (out_ptr, out_dims, out_strides,
             filter_tmp_ptr, filter_tmp_dims, filter_tmp_strides,
             filter_dims, sig_half_d0, baseDim, fftScale, expand);
    }
    else {
        reorderOutput<T, convT, roundOut>
            (out_ptr, out_dims, out_strides,
             sig_tmp_ptr, sig_tmp_dims, sig_tmp_strides,
             filter_dims, sig_half_d0, baseDim, fftScale, expand);
    }

    return out;
}
Beispiel #8
0
/* Set up fft plans.  Need to have npol, nphase, nchan 
 * already filled in struct */
int cyclic_init_ffts(struct cyclic_work *w) {

    /* Infer lag, harmonic sizes from chan/phase */
    w->nlag = w->nchan; // Total number of lags including + and -
    w->nharm = w->nphase/2 + 1; // Only DC and positive harmonics

    /* Alloc temp arrays for planning */
    PS ps;
    CS cs;
    CC cc;
    PC pc;
    struct filter_time ft;
    struct filter_freq ff;
    struct profile_phase pp;
    struct profile_harm ph;

    ps.npol = cs.npol = cc.npol = pc.npol = w->npol;
    ps.nphase = pc.nphase = w->nphase;
    ps.nchan = cs.nchan = w->nchan;
    cs.nharm = cc.nharm = w->nharm;
    cc.nlag = pc.nlag = w->nlag;

    cyclic_alloc_ps(&ps);
    cyclic_alloc_cs(&cs);
    cyclic_alloc_cc(&cc);
    cyclic_alloc_pc(&pc);

    ft.nlag = w->nlag;
    ff.nchan = w->nchan;
    pp.nphase = w->nphase;
    ph.nharm = w->nharm;

    filter_alloc_time(&ft);
    filter_alloc_freq(&ff);
    profile_alloc_phase(&pp);
    profile_alloc_harm(&ph);

    /* FFT plans */
    int rv=0;

    /* ps2cs - r2c fft along phase (fastest) axis */
    w->ps2cs = fftwf_plan_many_dft_r2c(1, &w->nphase, w->npol*w->nchan,
            ps.data, NULL, 1, w->nphase,
            cs.data, NULL, 1, w->nharm,
            FFTW_MEASURE | FFTW_PRESERVE_INPUT);
    if (w->ps2cs == NULL) rv++; 

    /* cs2cc - c2c ifft along channel axis */
    w->cs2cc = fftwf_plan_many_dft(1, &w->nchan, w->npol*w->nharm,
            cs.data, NULL, w->nharm*w->npol, 1,
            cc.data, NULL, w->nharm*w->npol, 1,
            FFTW_BACKWARD, FFTW_MEASURE | FFTW_PRESERVE_INPUT);
    if (w->cs2cc == NULL) rv++; 
    
    /* cc2cs - c2c fft along lag axis */
    w->cc2cs = fftwf_plan_many_dft(1, &w->nlag, w->npol*w->nharm,
            cc.data, NULL, w->nharm*w->npol, 1,
            cs.data, NULL, w->nharm*w->npol, 1,
            FFTW_FORWARD, FFTW_MEASURE | FFTW_PRESERVE_INPUT);
    if (w->cc2cs == NULL) rv++; 

    /* time2freq, freq2time for filters */
    w->time2freq = fftwf_plan_dft_1d(w->nlag, ft.data, ff.data,
            FFTW_FORWARD, FFTW_MEASURE | FFTW_PRESERVE_INPUT);
    if (w->time2freq == NULL) rv++;
    w->freq2time = fftwf_plan_dft_1d(w->nchan, ff.data, ft.data,
            FFTW_BACKWARD, FFTW_MEASURE | FFTW_PRESERVE_INPUT);
    if (w->freq2time == NULL) rv++;

    /* phase2harm, harm2phase for profiles */
    w->phase2harm = fftwf_plan_dft_r2c_1d(w->nphase, pp.data, ph.data, 
            FFTW_MEASURE | FFTW_PRESERVE_INPUT);
    if (w->phase2harm == NULL) rv++;
    w->harm2phase = fftwf_plan_dft_c2r_1d(w->nphase, ph.data, pp.data, 
            FFTW_MEASURE | FFTW_PRESERVE_INPUT);
    if (w->harm2phase == NULL) rv++;

    cyclic_free_ps(&ps);
    cyclic_free_cs(&cs);
    cyclic_free_cc(&cc);
    cyclic_free_pc(&pc);

    filter_free_time(&ft);
    filter_free_freq(&ff);
    profile_free_phase(&pp);
    profile_free_harm(&ph);

    return(rv);
}