int sync_init() { int i, arraysize; nreceivers = conf.nreceivers; corrlen = conf.sync_len; if(nreceivers > NRECEIVERS_MAX) return -1; /* half of fft input will be zero-padded */ fft1n = corrlen*2; fft2n = fft1n * CORRELATION_OVERSAMPLE; arraysize = nreceivers * fft1n; fft1in = fftwf_malloc(arraysize * sizeof(*fft1in)); fft1out = fftwf_malloc(arraysize * sizeof(*fft1out)); for(i = 0; i < arraysize; i++) fft1in[i] = fft1out[i] = 0; arraysize = (nreceivers-1) * fft2n; fft2in = fftwf_malloc(arraysize * sizeof(*fft2in)); fft2out = fftwf_malloc(arraysize * sizeof(*fft2out)); for(i = 0; i < arraysize; i++) fft2in[i] = fft2out[i] = 0; fft1plan = fftwf_plan_many_dft( 1, &fft1n, nreceivers, fft1in, NULL, 1, fft1n, fft1out, NULL, 1, fft1n, FFTW_FORWARD, FFTW_ESTIMATE); fft2plan = fftwf_plan_many_dft( 1, &fft2n, nreceivers-1, fft2in, NULL, 1, fft2n, fft2out, NULL, 1, fft2n, FFTW_BACKWARD, FFTW_ESTIMATE); return 0; }
Fourier::Fourier(int n) { const char* fname = "void Fourier::Initialize()"; VRB.Debug(fname, "Allocating memory and creating plans for FFTW."); batch_size = n; #ifdef USE_SINGLE b = (fftComplex*) fftwf_malloc(batch_size*GJP.Vol()*sizeof(fftComplex)); #endif #ifdef USE_DOUBLE b = (fftComplex*) fftw_malloc(batch_size*GJP.Vol()*sizeof(fftComplex)); #endif #ifdef USE_LONG_DOUBLE b = (fftComplex*) fftwl_malloc(batch_size*GJP.Vol()*sizeof(fftComplex)); #endif // Below needs to be adjusted for batch ffts; double check in place int vol = GJP.Vol(); int dims[3] = { GJP.Xsites(), GJP.Ysites(), GJP.Zsites() }; #ifdef USE_SINGLE p1 = fftwf_plan_many_dft(3, dims, batch_size, b, NULL, 1, vol, b, NULL, 1, vol, FFTW_BACKWARD, FFTW_EXHAUSTIVE); p2 = fftwf_plan_many_dft(3, dims, batch_size, b, NULL, 1, vol, b, NULL, 1, vol, FFTW_FORWARD, FFTW_EXHAUSTIVE); #endif #ifdef USE_DOUBLE p1 = fftw_plan_many_dft(3, dims, batch_size, b, NULL, 1, vol, b, NULL, 1, vol, FFTW_BACKWARD, FFTW_EXHAUSTIVE); p2 = fftw_plan_many_dft(3, dims, batch_size, b, NULL, 1, vol, b, NULL, 1, vol, FFTW_FORWARD, FFTW_EXHAUSTIVE); #endif #ifdef USE_LONG_DOUBLE p1 = fftwl_plan_many_dft(3, dims, batch_size, b, NULL, 1, vol, b, NULL, 1, vol, FFTW_BACKWARD, FFTW_EXHAUSTIVE); p2 = fftwl_plan_many_dft(3, dims, batch_size, b, NULL, 1, vol, b, NULL, 1, vol, FFTW_FORWARD, FFTW_EXHAUSTIVE); #endif }
void Qsquaref(fftwf_complex const *FQ,fftwf_complex *FQ2,int nx,int nz,int k){ fftwf_complex *FQ1; FQ1 = (fftwf_complex *) fftwf_malloc((2*k-1)*nx*nz*sizeof(fftwf_complex)); int m,n,l; // printf(" k = %d \n",k); for (l=0;l<k;l++) {//printf("l= %d\n",l); for (m=0;m<nx;m++) { for(n=0;n<nz;n++) {//printf("%d ",l+(k)*(m*nz+n)); FQ1[l+(2*k-1)*(m*nz+n)] = FQ[l+k*(m*nz+n)]; } } } // printf("padded array set in Qsquare \n"); int rank =1; int *dims; dims = (int*) malloc(rank*sizeof(int)); dims[0]=(2*k-1); //dims[1]=nz; dims[2]=2*k-1; int howmany = nx*nz; //inembed=onembed=dims int istride = 1; int ostride = 1; int idist = (2*k-1); int odist = idist; // printf("variables set ...\n"); fftwf_plan plan_forward,plan_backward; plan_forward = fftwf_plan_many_dft(rank,dims,howmany,FQ1,NULL,istride,idist,FQ1,NULL,ostride,odist,FFTW_FORWARD,FFTW_ESTIMATE); plan_backward = fftwf_plan_many_dft(rank,dims,howmany,FQ1,NULL,istride,idist,FQ2,NULL,ostride,odist,FFTW_BACKWARD,FFTW_ESTIMATE); fftwf_execute(plan_forward); //printf("plan executed \n"); for (l=0;l<2*k-1;l++) {//printf("l= %d\n",l); for (m=0;m<nx;m++) { for(n=0;n<nz;n++) {//printf("%d ",l+(k)*(m*nz+n)); FQ1[l+(2*k-1)*(m*nz+n)] = FQ1[l+(2*k-1)*(m*nz+n)]*FQ1[l+(2*k-1)*(m*nz+n)]/(2*k-1); //printf("set \n"); } } } fftwf_execute(plan_backward); //cleaning fftwf_destroy_plan(plan_forward); fftwf_destroy_plan(plan_backward); fftwf_free(FQ1); }
int main (int argc, char **argv) { int n1, nx, n3, dim, n[SF_MAX_DIM]; /* dimensions */ int i1, ix, i3, j; /* loop counters */ int nk; /* number of wavenumbers */ int npad; /* padding */ float dx; /* space sampling interval */ float dk; /* wavenumber sampling interval */ float x0; /* staring space */ float k0; /* starting wavenumber */ float wt; /* Fourier scaling */ kiss_fft_cpx **cp; /* frequency-wavenumber */ bool inv; /* forward or inverse */ bool sym; /* symmetric scaling */ bool opt; /* optimal padding */ int sign; /* transform sign */ int axis; /* transform axis */ char varname[12]; /* variable name */ char *label; /* transformed axis label */ #ifdef SF_HAS_FFTW fftwf_plan cfg; #else kiss_fft_cpx *ctrace; kiss_fft_cfg cfg; #endif sf_file in=NULL, out=NULL; sf_init(argc,argv); in = sf_input ( "in"); out = sf_output("out"); if (SF_COMPLEX != sf_gettype(in)) sf_error ("Need complex input"); if (!sf_getbool("inv",&inv)) inv = false; /* if y, perform inverse transform */ if (!sf_getbool("sym",&sym)) sym=false; /* if y, apply symmetric scaling to make the FFT operator Hermitian */ if (!sf_getint("sign",&sign)) sign = inv? 1: 0; /* transform sign (0 or 1) */ if (!sf_getbool("opt",&opt)) opt=true; /* if y, determine optimal size for efficiency */ if (!sf_getint("axis",&axis)) axis=2; /* Axis to transform */ dim = sf_filedims(in,n); n1=n3=1; for (j=0; j < dim; j++) { if (j < axis-1) n1 *= n[j]; else if (j > axis-1) n3 *= n[j]; } if (inv) { sprintf(varname,"n%d",axis); if (!sf_histint (in,varname,&nk)) sf_error("No %s= in input",varname); sprintf(varname,"d%d",axis); if (!sf_histfloat(in,varname,&dk)) sf_error("No %s= in input",varname); sprintf(varname,"fft3_n%d",axis); if (!sf_histint (in,varname,&nx)) nx=nk; sprintf(varname,"fft3_o%d",axis); if (!sf_histfloat(in,varname,&x0)) x0 = 0.; sprintf(varname,"fft3_label%d",axis); label = sf_histstring(in,varname); dx = 1./(nk*dk); sprintf(varname,"n%d",axis); sf_putint (out,varname,nx); sprintf(varname,"d%d",axis); sf_putfloat (out,varname,dx); sprintf(varname,"o%d",axis); sf_putfloat (out,varname,x0); sprintf(varname,"label%d",axis); if (NULL != label) { sf_putstring(out,varname,label); } else if (NULL != (label = sf_histstring(in,varname))) { (void) sf_fft_label(axis,label,out); } } else { sprintf(varname,"n%d",axis); if (!sf_histint (in,varname,&nx)) sf_error("No %s= in input",varname); sprintf(varname,"d%d",axis); if (!sf_histfloat(in,varname,&dx)) sf_error("No %s= in input",varname); sprintf(varname,"o%d",axis); if (!sf_histfloat(in,varname,&x0)) x0 = 0.; sprintf(varname,"label%d",axis); label = sf_histstring(in,varname); sprintf(varname,"fft3_n%d",axis); sf_putint(out,varname,nx); sprintf(varname,"fft3_o%d",axis); sf_putfloat(out,varname,x0); if (NULL != label) { sprintf(varname,"fft3_label%d",axis); sf_putstring(out,varname,label); } if (!sf_getint("pad",&npad)) npad=2; /* padding factor */ /* determine wavenumber sampling */ nk = opt? kiss_fft_next_fast_size(nx*npad): nx*npad; if (nk != nx) sf_warning("padded to %d",nk); dk = 1./(nk*dx); k0 = -0.5/dx; sprintf(varname,"n%d",axis); sf_putint (out,varname,nk); sprintf(varname,"d%d",axis); sf_putfloat (out,varname,dk); sprintf(varname,"o%d",axis); sf_putfloat (out,varname,k0); if (NULL != label && !sf_fft_label(axis,label,out)) { sprintf(varname,"label%d",axis); sf_putstring(out,varname,"Wavenumber"); } } sprintf(varname,"unit%d",axis); sf_fft_unit(axis,sf_histstring(in,varname),out); cp = (kiss_fft_cpx**) sf_complexalloc2(n1,nk); #ifdef SF_HAS_FFTW ix = nk; cfg = fftwf_plan_many_dft(1, &ix, n1, (fftwf_complex*) cp[0], NULL, n1, 1, (fftwf_complex*) cp[0], NULL, n1, 1, sign? FFTW_BACKWARD: FFTW_FORWARD, FFTW_ESTIMATE); if (NULL == cfg) sf_error("FFTW failure."); #else ctrace = (kiss_fft_cpx*) sf_complexalloc(nk); cfg = kiss_fft_alloc(nk,sign,NULL,NULL); #endif /* FFT scaling */ wt = sym? 1./sqrtf((float) nk): 1./nk; for (i3=0; i3<n3; i3++) { if (inv) { sf_floatread((float*) cp[0],n1*nk*2,in); #ifdef SF_HAS_FFTW fftwf_execute(cfg); for (ix=0; ix<nx; ix++) { for (i1=0; i1 < n1; i1++) { cp[ix][i1] = sf_crmul(cp[ix][i1],ix%2? -wt: wt); } } #else for (i1=0; i1 < n1; i1++) { /* Fourier transform k to x */ kiss_fft_stride(cfg,cp[0]+i1,ctrace,n1); for (ix=0; ix<nx; ix++) { cp[ix][i1] = sf_crmul(ctrace[ix],ix%2? -wt: wt); } } #endif sf_floatwrite((float*) cp[0],n1*nx*2,out); } else { sf_floatread((float*) cp[0],n1*nx*2,in); /* FFT centering */ for (ix=1; ix<nx; ix+=2) { for (i1=0; i1<n1; i1++) { cp[ix][i1] = sf_cneg(cp[ix][i1]); } } if (sym) { for (ix=0; ix<nx; ix++) { for (i1=0; i1 < n1; i1++) { cp[ix][i1] = sf_crmul(cp[ix][i1],wt); } } } /* pad with zeros */ for (ix=nx; ix<nk; ix++) { for (i1=0; i1<n1; i1++) { cp[ix][i1].r = 0.; cp[ix][i1].i = 0.; } } #ifdef SF_HAS_FFTW fftwf_execute(cfg); #else for (i1=0; i1 < n1; i1++) { /* Fourier transform x to k */ kiss_fft_stride(cfg,cp[0]+i1,ctrace,n1); /* Transpose */ for (ix=0; ix<nk; ix++) { cp[ix][i1] = ctrace[ix]; } } #endif sf_floatwrite((float*) cp[0],n1*nk*2,out); } } exit (0); }
int main(int argc, char* argv[]) { bool verb, pow2; char key[7], *mode;; int n1, n2, n1padded, n2padded, num, dim, n[SF_MAX_DIM], npadded[SF_MAX_DIM], ii[SF_MAX_DIM]; int i, j, i1, i2, index, nw, iter, niter, nthr, *pad; float thr, pclip, normp; float *dobs_t, *thresh, *mask; fftwf_complex *mm, *dd, *dobs; fftwf_plan fft1, ifft1, fftrem, ifftrem;/* execute plan for FFT and IFFT */ sf_file in, out, Fmask; /* mask and I/O files*/ sf_init(argc,argv); /* Madagascar initialization */ in=sf_input("in"); /* read the data to be interpolated */ out=sf_output("out"); /* output the reconstructed data */ Fmask=sf_input("mask"); /* read the (n-1)-D mask for n-D data */ if(!sf_getbool("verb",&verb)) verb=false; /* verbosity */ if(!sf_getbool("pow2",&pow2)) pow2=false; /* round up the length of each axis to be power of 2 */ if (!sf_getint("niter",&niter)) niter=100; /* total number of iterations */ if (!sf_getfloat("pclip",&pclip)) pclip=10.; /* starting data clip percentile (default is 10)*/ if ( !(mode=sf_getstring("mode")) ) mode = "exp"; /* thresholding mode: 'hard', 'soft','pthresh','exp'; 'hard', hard thresholding; 'soft', soft thresholding; 'pthresh', generalized quasi-p; 'exp', exponential shrinkage */ if (pclip <=0. || pclip > 100.) sf_error("pclip=%g should be > 0 and <= 100",pclip); if (!sf_getfloat("normp",&normp)) normp=1.; /* quasi-norm: normp<2 */ for (i=0; i < SF_MAX_DIM; i++) {/* dimensions */ snprintf(key,3,"n%d",i+1); if (!sf_getint(key,n+i) && (NULL == in || !sf_histint(in,key,n+i))) break; /*( n# size of #-th axis )*/ sf_putint(out,key,n[i]); } if (0==i) sf_error("Need n1="); dim=i; pad=sf_intalloc (dim); for (i=0; i<dim; i++) pad[i]=0; sf_getints("pad",pad,dim); /* number of zeros to be padded for each axis */ n1=n[0]; n2=sf_leftsize(in,1); for (i=0; i<SF_MAX_DIM; i++) npadded[i]=1; npadded[0]=n1+pad[0]; n1padded=npadded[0]; n2padded=1; for (i=1; i<dim; i++){ npadded[i]=n[i]+pad[i]; if (pow2) {/* zero-padding to be power of 2 */ npadded[i]=nextpower2(n[i]); fprintf(stderr,"n%d=%d n%dpadded=%d\n",i,n[i],i,npadded[i]); } n2padded*=npadded[i]; } nw=npadded[0]/2+1; num=nw*n2padded;/* data: total number of elements in frequency domain */ /* allocate data and mask arrays */ thresh=(float*) malloc(nw*n2padded*sizeof(float)); dobs_t=(float*) fftwf_malloc(n1padded*n2padded*sizeof(float)); /* time domain observation */ dobs=(fftwf_complex*)fftwf_malloc(nw*n2padded*sizeof(fftwf_complex));/* freq-domain observation */ dd=(fftwf_complex*) fftwf_malloc(nw*n2padded*sizeof(fftwf_complex)); mm=(fftwf_complex*) fftwf_malloc(nw*n2padded*sizeof(fftwf_complex)); if (NULL != sf_getstring("mask")){ mask=sf_floatalloc(n2padded); } else sf_error("mask needed!"); /* initialize the input data and mask arrays */ memset(dobs_t,0,n1padded*n2padded*sizeof(float)); memset(mask,0,n2padded*sizeof(float)); for (i=0; i<n1*n2; i+=n1){ sf_line2cart(dim,n,i,ii); j=sf_cart2line(dim,npadded,ii); sf_floatread(&dobs_t[j], n1, in); sf_floatread(&mask[j/n1padded], 1, Fmask); } /* FFT for the 1st dimension and the remaining dimensions */ fft1=fftwf_plan_many_dft_r2c(1, &n1padded, n2padded, dobs_t, &n1padded, 1, n1padded, dobs, &n1padded, 1, nw, FFTW_MEASURE); ifft1=fftwf_plan_many_dft_c2r(1, &n1padded, n2padded, dobs, &n1padded, 1, nw, dobs_t, &n1padded, 1, n1padded, FFTW_MEASURE); fftrem=fftwf_plan_many_dft(dim-1, &npadded[1], nw, dd, &npadded[1], nw, 1, dd, &npadded[1], nw, 1, FFTW_FORWARD, FFTW_MEASURE); ifftrem=fftwf_plan_many_dft(dim-1, &npadded[1], nw, dd, &npadded[1], nw, 1, dd, &npadded[1], nw, 1, FFTW_BACKWARD, FFTW_MEASURE); /* transform the data from time domain to frequency domain: dobs_t-->dobs */ fftwf_execute(fft1); for(i=0; i<num; i++) dobs[i]/=sqrtf(n1padded); memset(mm,0,num*sizeof(fftwf_complex)); /* Iterative Shrinkage-Thresholding (IST) Algorithm: mm^{k+1}=T[mm^k+A^* M^* (dobs-M A mm^k)] (M^*=M; Mdobs=dobs) =T[mm^k+A^*(dobs-M A mm^k)]; (k=0,1,...niter-1) dd^=A mm^; */ for(iter=0; iter<niter; iter++) { /* dd<-- A mm^k */ memcpy(dd, mm, num*sizeof(fftwf_complex)); fftwf_execute(ifftrem); for(i=0; i<num; i++) dd[i]/=sqrtf(n2padded); /* apply mask: dd<--dobs-M A mm^k=dobs-M dd */ for(i2=0; i2<n2padded; i2++) for(i1=0; i1<nw; i1++) { index=i1+nw*i2; dd[index]=dobs[index]-mask[i2]*dd[index]; } /* mm^k += A^*(dobs-M A mm^k); dd=dobs-M A mm^k */ fftwf_execute(fftrem); for(i=0; i<num; i++) mm[i]+=dd[i]/sqrtf(n2padded); /* perform thresholding */ for(i=0; i<num; i++) thresh[i]=cabsf(mm[i]); nthr = 0.5+num*(1.-0.01*pclip); /* round off */ if (nthr < 0) nthr=0; if (nthr >= num) nthr=num-1; thr=sf_quantile(nthr, num, thresh); sf_cpthresh(mm, num, thr, normp, mode); if (verb) sf_warning("iteration %d;",iter+1); } /* frequency--> time domain: dobs-->dobs_t */ memcpy(dd, mm, num*sizeof(fftwf_complex)); fftwf_execute(ifftrem); for(i=0; i<num; i++) dd[i]/=sqrtf(n2padded); memcpy(dobs, dd, num*sizeof(fftwf_complex)); fftwf_execute(ifft1); for(i=0; i<n1padded*n2padded; i++) dobs_t[i]/=sqrtf(n1padded); for (i=0; i<n1*n2; i+=n1){ sf_line2cart(dim,n,i,ii); j=sf_cart2line(dim,npadded,ii); sf_floatwrite(&dobs_t[j],n1,out); } free(thresh); fftwf_free(dobs_t); fftwf_free(dobs); fftwf_free(dd); fftwf_free(mm); exit(0); }
void FFT::apply(Window im, bool transformX, bool transformY, bool transformT, bool inverse) { assert(im.channels % 2 == 0, "-fft requires an image with an even number of channels\n"); if (im.width == 1) transformX = false; if (im.height == 1) transformY = false; if (im.frames == 1) transformT = false; // rank 0 if (!transformX && !transformY && !transformT) return; if (transformX && transformY && transformT) { // rank 3 int n[] = {im.frames, im.height, im.width}; int nembed[] = {im.frames, im.tstride/im.ystride, im.ystride/im.xstride}; fftwf_plan plan = fftwf_plan_many_dft(3, n, im.channels/2, (fftwf_complex *)im(0, 0, 0), nembed, im.channels/2, 1, (fftwf_complex *)im(0, 0, 0), nembed, im.channels/2, 1, inverse ? FFTW_BACKWARD : FFTW_FORWARD, FFTW_ESTIMATE); fftwf_execute(plan); fftwf_destroy_plan(plan); } else if (transformX && transformY) { // rank 2 int n[] = {im.height, im.width}; int nembed[] = {im.tstride/im.ystride, im.ystride/im.xstride}; for (int t = 0; t < im.frames; t++) { fftwf_plan plan = fftwf_plan_many_dft(2, n, im.channels/2, (fftwf_complex *)im(0, 0, t), nembed, im.channels/2, 1, (fftwf_complex *)im(0, 0, t), nembed, im.channels/2, 1, inverse ? FFTW_BACKWARD : FFTW_FORWARD, FFTW_ESTIMATE); fftwf_execute(plan); fftwf_destroy_plan(plan); } } else if (transformT && transformY) { // rank 2 int n[] = {im.frames, im.height}; int nembed[] = {im.frames, im.tstride/im.ystride}; fftwf_plan plan = fftwf_plan_many_dft(2, n, im.width*im.channels/2, (fftwf_complex *)im(0, 0, 0), nembed, im.width*im.channels/2, 1, (fftwf_complex *)im(0, 0, 0), nembed, im.width*im.channels/2, 1, inverse ? FFTW_BACKWARD : FFTW_FORWARD, FFTW_ESTIMATE); fftwf_execute(plan); fftwf_destroy_plan(plan); } else if (transformT && transformX) { // rank 2 int n[] = {im.frames, im.width}; int nembed[] = {im.frames, im.tstride/im.xstride}; for (int y = 0; y < im.height; y++) { fftwf_plan plan = fftwf_plan_many_dft(2, n, im.channels/2, (fftwf_complex *)im(0, y, 0), nembed, im.channels/2, 1, (fftwf_complex *)im(0, y, 0), nembed, im.channels/2, 1, inverse ? FFTW_BACKWARD : FFTW_FORWARD, FFTW_ESTIMATE); fftwf_execute(plan); fftwf_destroy_plan(plan); } } else if (transformX) { // rank 1 int n[] = {im.width}; int nembed[] = {im.width}; for (int t = 0; t < im.frames; t++) { for (int y = 0; y < im.height; y++) { fftwf_plan plan = fftwf_plan_many_dft(1, n, im.channels/2, (fftwf_complex *)im(0, y, t), nembed, im.channels/2, 1, (fftwf_complex *)im(0, y, t), nembed, im.channels/2, 1, inverse ? FFTW_BACKWARD : FFTW_FORWARD, FFTW_ESTIMATE); fftwf_execute(plan); fftwf_destroy_plan(plan); } } } else if (transformY) { // rank 1 int n[] = {im.height}; int nembed[] = {im.height}; for (int t = 0; t < im.frames; t++) { fftwf_plan plan = fftwf_plan_many_dft(1, n, im.width*im.channels/2, (fftwf_complex *)im(0, 0, t), nembed, im.ystride/2, 1, (fftwf_complex *)im(0, 0, t), nembed, im.ystride/2, 1, inverse ? FFTW_BACKWARD : FFTW_FORWARD, FFTW_ESTIMATE); fftwf_execute(plan); fftwf_destroy_plan(plan); } } else if (transformT) { // rank 1 int n[] = {im.frames}; int nembed[] = {im.frames}; for (int y = 0; y < im.height; y++) { fftwf_plan plan = fftwf_plan_many_dft(1, n, im.width*im.channels/2, (fftwf_complex *)im(0, y, 0), nembed, im.tstride/2, 1, (fftwf_complex *)im(0, y, 0), nembed, im.tstride/2, 1, inverse ? FFTW_BACKWARD : FFTW_FORWARD, FFTW_ESTIMATE); fftwf_execute(plan); fftwf_destroy_plan(plan); } } if (inverse) { float m = 1.0; if (transformX) m *= im.width; if (transformY) m *= im.height; if (transformT) m *= im.frames; Scale::apply(im, 1.0f/m); } }
Array<T> fftconvolve(Array<T> const& signal, Array<T> const& filter, const bool expand, ConvolveBatchKind kind) { const af::dim4 sd = signal.dims(); const af::dim4 fd = filter.dims(); dim_t fftScale = 1; af::dim4 packed_dims; int fft_dims[baseDim]; af::dim4 sig_tmp_dims, sig_tmp_strides; af::dim4 filter_tmp_dims, filter_tmp_strides; // Pack both signal and filter on same memory array, this will ensure // better use of batched cuFFT capabilities for (dim_t k = 0; k < 4; k++) { if (k < baseDim) packed_dims[k] = nextpow2((unsigned)(sd[k] + fd[k] - 1)); else if (k == baseDim) packed_dims[k] = sd[k] + fd[k]; else packed_dims[k] = 1; if (k < baseDim) { fft_dims[baseDim-k-1] = (k == 0) ? packed_dims[k] / 2 : packed_dims[k]; fftScale *= fft_dims[baseDim-k-1]; } } Array<convT> packed = createEmptyArray<convT>(packed_dims); convT *packed_ptr = packed.get(); const af::dim4 packed_strides = packed.strides(); sig_tmp_dims[0] = filter_tmp_dims[0] = packed_dims[0]; sig_tmp_strides[0] = filter_tmp_strides[0] = 1; for (dim_t k = 1; k < 4; k++) { if (k < baseDim) { sig_tmp_dims[k] = packed_dims[k]; filter_tmp_dims[k] = packed_dims[k]; } else { sig_tmp_dims[k] = sd[k]; filter_tmp_dims[k] = fd[k]; } sig_tmp_strides[k] = sig_tmp_strides[k - 1] * sig_tmp_dims[k - 1]; filter_tmp_strides[k] = filter_tmp_strides[k - 1] * filter_tmp_dims[k - 1]; } // Calculate memory offsets for packed signal and filter convT *sig_tmp_ptr = packed_ptr; convT *filter_tmp_ptr = packed_ptr + sig_tmp_strides[3] * sig_tmp_dims[3]; // Number of packed complex elements in dimension 0 dim_t sig_half_d0 = divup(sd[0], 2); // Pack signal in a complex matrix where first dimension is half the input // (allows faster FFT computation) and pad array to a power of 2 with 0s packData<convT, T>(sig_tmp_ptr, sig_tmp_dims, sig_tmp_strides, signal); // Pad filter array with 0s padArray<convT, T>(filter_tmp_ptr, filter_tmp_dims, filter_tmp_strides, filter); // Compute forward FFT if (isDouble) { fftw_plan plan = fftw_plan_many_dft(baseDim, fft_dims, packed_dims[baseDim], (fftw_complex*)packed.get(), NULL, packed_strides[0], packed_strides[baseDim] / 2, (fftw_complex*)packed.get(), NULL, packed_strides[0], packed_strides[baseDim] / 2, FFTW_FORWARD, FFTW_ESTIMATE); fftw_execute(plan); fftw_destroy_plan(plan); } else { fftwf_plan plan = fftwf_plan_many_dft(baseDim, fft_dims, packed_dims[baseDim], (fftwf_complex*)packed.get(), NULL, packed_strides[0], packed_strides[baseDim] / 2, (fftwf_complex*)packed.get(), NULL, packed_strides[0], packed_strides[baseDim] / 2, FFTW_FORWARD, FFTW_ESTIMATE); fftwf_execute(plan); fftwf_destroy_plan(plan); } // Multiply filter and signal FFT arrays if (kind == ONE2MANY) complexMultiply<convT>(filter_tmp_ptr, filter_tmp_dims, filter_tmp_strides, sig_tmp_ptr, sig_tmp_dims, sig_tmp_strides, filter_tmp_ptr, filter_tmp_dims, filter_tmp_strides, kind); else complexMultiply<convT>(sig_tmp_ptr, sig_tmp_dims, sig_tmp_strides, sig_tmp_ptr, sig_tmp_dims, sig_tmp_strides, filter_tmp_ptr, filter_tmp_dims, filter_tmp_strides, kind); // Compute inverse FFT if (isDouble) { fftw_plan plan = fftw_plan_many_dft(baseDim, fft_dims, packed_dims[baseDim], (fftw_complex*)packed.get(), NULL, packed_strides[0], packed_strides[baseDim] / 2, (fftw_complex*)packed.get(), NULL, packed_strides[0], packed_strides[baseDim] / 2, FFTW_BACKWARD, FFTW_ESTIMATE); fftw_execute(plan); fftw_destroy_plan(plan); } else { fftwf_plan plan = fftwf_plan_many_dft(baseDim, fft_dims, packed_dims[baseDim], (fftwf_complex*)packed.get(), NULL, packed_strides[0], packed_strides[baseDim] / 2, (fftwf_complex*)packed.get(), NULL, packed_strides[0], packed_strides[baseDim] / 2, FFTW_BACKWARD, FFTW_ESTIMATE); fftwf_execute(plan); fftwf_destroy_plan(plan); } // Compute output dimensions dim4 oDims(1); if (expand) { for(dim_t d=0; d<4; ++d) { if (kind==ONE2ONE || kind==ONE2MANY) { oDims[d] = sd[d]+fd[d]-1; } else { oDims[d] = (d<baseDim ? sd[d]+fd[d]-1 : sd[d]); } } } else { oDims = sd; if (kind==ONE2MANY) { for (dim_t i=baseDim; i<4; ++i) oDims[i] = fd[i]; } } Array<T> out = createEmptyArray<T>(oDims); T* out_ptr = out.get(); const af::dim4 out_dims = out.dims(); const af::dim4 out_strides = out.strides(); const af::dim4 filter_dims = filter.dims(); // Reorder the output if (kind == ONE2MANY) { reorderOutput<T, convT, roundOut> (out_ptr, out_dims, out_strides, filter_tmp_ptr, filter_tmp_dims, filter_tmp_strides, filter_dims, sig_half_d0, baseDim, fftScale, expand); } else { reorderOutput<T, convT, roundOut> (out_ptr, out_dims, out_strides, sig_tmp_ptr, sig_tmp_dims, sig_tmp_strides, filter_dims, sig_half_d0, baseDim, fftScale, expand); } return out; }
/* Set up fft plans. Need to have npol, nphase, nchan * already filled in struct */ int cyclic_init_ffts(struct cyclic_work *w) { /* Infer lag, harmonic sizes from chan/phase */ w->nlag = w->nchan; // Total number of lags including + and - w->nharm = w->nphase/2 + 1; // Only DC and positive harmonics /* Alloc temp arrays for planning */ PS ps; CS cs; CC cc; PC pc; struct filter_time ft; struct filter_freq ff; struct profile_phase pp; struct profile_harm ph; ps.npol = cs.npol = cc.npol = pc.npol = w->npol; ps.nphase = pc.nphase = w->nphase; ps.nchan = cs.nchan = w->nchan; cs.nharm = cc.nharm = w->nharm; cc.nlag = pc.nlag = w->nlag; cyclic_alloc_ps(&ps); cyclic_alloc_cs(&cs); cyclic_alloc_cc(&cc); cyclic_alloc_pc(&pc); ft.nlag = w->nlag; ff.nchan = w->nchan; pp.nphase = w->nphase; ph.nharm = w->nharm; filter_alloc_time(&ft); filter_alloc_freq(&ff); profile_alloc_phase(&pp); profile_alloc_harm(&ph); /* FFT plans */ int rv=0; /* ps2cs - r2c fft along phase (fastest) axis */ w->ps2cs = fftwf_plan_many_dft_r2c(1, &w->nphase, w->npol*w->nchan, ps.data, NULL, 1, w->nphase, cs.data, NULL, 1, w->nharm, FFTW_MEASURE | FFTW_PRESERVE_INPUT); if (w->ps2cs == NULL) rv++; /* cs2cc - c2c ifft along channel axis */ w->cs2cc = fftwf_plan_many_dft(1, &w->nchan, w->npol*w->nharm, cs.data, NULL, w->nharm*w->npol, 1, cc.data, NULL, w->nharm*w->npol, 1, FFTW_BACKWARD, FFTW_MEASURE | FFTW_PRESERVE_INPUT); if (w->cs2cc == NULL) rv++; /* cc2cs - c2c fft along lag axis */ w->cc2cs = fftwf_plan_many_dft(1, &w->nlag, w->npol*w->nharm, cc.data, NULL, w->nharm*w->npol, 1, cs.data, NULL, w->nharm*w->npol, 1, FFTW_FORWARD, FFTW_MEASURE | FFTW_PRESERVE_INPUT); if (w->cc2cs == NULL) rv++; /* time2freq, freq2time for filters */ w->time2freq = fftwf_plan_dft_1d(w->nlag, ft.data, ff.data, FFTW_FORWARD, FFTW_MEASURE | FFTW_PRESERVE_INPUT); if (w->time2freq == NULL) rv++; w->freq2time = fftwf_plan_dft_1d(w->nchan, ff.data, ft.data, FFTW_BACKWARD, FFTW_MEASURE | FFTW_PRESERVE_INPUT); if (w->freq2time == NULL) rv++; /* phase2harm, harm2phase for profiles */ w->phase2harm = fftwf_plan_dft_r2c_1d(w->nphase, pp.data, ph.data, FFTW_MEASURE | FFTW_PRESERVE_INPUT); if (w->phase2harm == NULL) rv++; w->harm2phase = fftwf_plan_dft_c2r_1d(w->nphase, ph.data, pp.data, FFTW_MEASURE | FFTW_PRESERVE_INPUT); if (w->harm2phase == NULL) rv++; cyclic_free_ps(&ps); cyclic_free_cs(&cs); cyclic_free_cc(&cc); cyclic_free_pc(&pc); filter_free_time(&ft); filter_free_freq(&ff); profile_free_phase(&pp); profile_free_harm(&ph); return(rv); }