int main(int argc, char **argv) { int N = (argc >= 2) ? atoi(argv[1]) : 64; // size of image int n1 = (argc >= 3) ? atoi(argv[2]) : 1000; // times to try // input is NxN real numbers int M = N*(N/2+1); // output is this many complex numbers double *in1 = (double *)fftw_malloc(sizeof(double) * N*N); double *in2 = (double *)fftw_malloc(sizeof(double) * N*N); double *out = (double *)fftw_malloc(sizeof(double) * N*N); CD *out1 = (CD*) fftw_malloc(sizeof(CD) * M); CD *out2 = (CD*) fftw_malloc(sizeof(CD) * M); // make up some data for(int i=0; i<N*N; i++) { in1[i] = rand()/1000000000.0; in2[(i+2*N+3)%(N*N)] = in1[i]; // offsets should be 3 and 2 pixels //in2[i] = in1[i]; } fftw_plan p1, p2, p3; p1 = fftw_plan_dft_r2c_2d( N, N, in1, (double (*)[2])out1, FFTW_ESTIMATE ); p2 = fftw_plan_dft_r2c_2d( N, N, in2, (double (*)[2])out2, FFTW_ESTIMATE ); p3 = fftw_plan_dft_c2r_2d( N, N, (double (*)[2])out1, out, FFTW_ESTIMATE ); double big; int bigj; printf("Starting %d passes\n", n1); for(int i=0; i<n1; i++) { fftw_execute(p1); /* repeat as needed */ fftw_execute(p2); for(int j=0; j<M; j++) out1[j] = out1[j]*conj(out2[j]); fftw_execute(p3); big=-1.0E30; bigj = -1; for(int j=0; j<N*N; j++) { if( out[j] > big ) { big = out[j]; bigj = j; } } } int x = bigj/N; int y = bigj - N*x; if (x > N/2) x -= N; if (y > N/2) y -= N; printf("x,y %d %d\n", x, y); fftw_destroy_plan(p1); fftw_destroy_plan(p2); fftw_destroy_plan(p3); return 0; }
void init_gfft() { int n_size2D_ZXY[] = {NX,NY}; #ifdef _OPENMP fftw_plan_with_nthreads( nthreads ); #endif r2cfft_mpi_t = fftw_mpi_plan_dft_r2c_3d( NY, NX, NZ, wr1, w1, MPI_COMM_WORLD, FFT_PLANNING | FFTW_MPI_TRANSPOSED_OUT); if (r2cfft_mpi_t == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW R2C_T plan creation failed"); r2cfft_mpi = fftw_mpi_plan_dft_r2c_3d( NX, NY, NZ, wr1, w1, MPI_COMM_WORLD, FFT_PLANNING); if (r2cfft_mpi == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW R2C plan creation failed"); c2rfft_mpi_t = fftw_mpi_plan_dft_c2r_3d( NY, NX, NZ, w1, wr1, MPI_COMM_WORLD, FFT_PLANNING | FFTW_MPI_TRANSPOSED_IN); if (c2rfft_mpi_t == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW C2R_T plan creation failed"); c2rfft_mpi = fftw_mpi_plan_dft_c2r_3d( NX, NY, NZ, w1, wr1, MPI_COMM_WORLD, FFT_PLANNING); if (c2rfft_mpi == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW C2R plan creation failed"); r2cfft_2Dslice = fftw_plan_dft_r2c_2d(NX,NY,wrh3,wh3,FFT_PLANNING); if (r2cfft_2Dslice == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW r2c slice plan creation failed"); // init transpose routines (These are used by remap routines) init_transpose(); fft_timer=0.0; return; }
void FFTConvolver::ConvolveSameSize(double* image, const double* kernel, size_t imgWidth, size_t imgHeight) { const size_t imgSize = imgWidth * imgHeight; const size_t complexSize = (imgWidth/2+1) * imgHeight; double* tempData = reinterpret_cast<double*>(fftw_malloc(imgSize * sizeof(double))); fftw_complex* fftImageData = reinterpret_cast<fftw_complex*>(fftw_malloc(complexSize * sizeof(fftw_complex))); fftw_complex* fftKernelData = reinterpret_cast<fftw_complex*>(fftw_malloc(complexSize * sizeof(fftw_complex))); boost::mutex::scoped_lock lock(_mutex); fftw_plan inToFPlan = fftw_plan_dft_r2c_2d(imgHeight, imgWidth, tempData, fftImageData, FFTW_ESTIMATE); fftw_plan fToOutPlan = fftw_plan_dft_c2r_2d(imgHeight, imgWidth, fftImageData, tempData, FFTW_ESTIMATE); lock.unlock(); memcpy(tempData, image, imgSize * sizeof(double)); fftw_execute_dft_r2c(inToFPlan, tempData, fftImageData); memcpy(tempData, kernel, imgSize * sizeof(double)); fftw_execute_dft_r2c(inToFPlan, tempData, fftKernelData); double fact = 1.0/imgSize; for(size_t i=0; i!=complexSize; ++i) reinterpret_cast<std::complex<double>*>(fftImageData)[i] *= fact * reinterpret_cast<std::complex<double>*>(fftKernelData)[i]; fftw_execute_dft_c2r(fToOutPlan, reinterpret_cast<fftw_complex*>(fftImageData), tempData); memcpy(image, tempData, imgSize * sizeof(double)); fftw_free(fftImageData); fftw_free(fftKernelData); fftw_free(tempData); lock.lock(); fftw_destroy_plan(inToFPlan); fftw_destroy_plan(fToOutPlan); lock.unlock(); }
/** * \brief Perform the fourier transform * * This method uses the real-data fourier transform from the FFTW3 library * to compute the fourier transform of the real pixel data. To match the * different conventions how image data is stored, the ny or n1 dimension * of the fftw data array needs to be the width of the image. */ void FourierImage::fourier(const Image<double>& image) { // make sure image has the right dimensions (the ones we stored in // _orig) if (_orig != image.size()) { throw std::range_error("wrong dims for fourier transform"); } // get the dimensions (note the differing data storage conventions // used in fftw3 and in our image class) int n0 = image.size().height(); int n1 = image.size().width(); debug(LOG_DEBUG, DEBUG_LOG, 0, "fourier transform dimensions: %d x %d", n0, n1); // compute the fourier transform fftw_plan p = fftw_plan_dft_r2c_2d(n0, n1, image.pixels, (fftw_complex *)pixels, FFTW_ESTIMATE); fftw_execute(p); fftw_destroy_plan(p); fftw_cleanup(); debug(LOG_DEBUG, DEBUG_LOG, 0, "fourier transform completed"); #if 0 // renormalize double v = 1 / sqrt(n0 *n1); size_t m = size().getPixels(); for (size_t o = 0; o < m; o++) { pixels[o] *= v; } #endif }
/****** fft_rtf ************************************************************ PROTO double *fft_rtf(double *data, int *size) PURPOSE Optimized 2-dimensional FFT "in place" using the FFTW library. INPUT ptr to the image, ptr to image size vector. OUTPUT Pointer to the compressed, memory-allocated Fourier transform. NOTES Input data may end up corrupted. AUTHOR E. Bertin (IAP) VERSION 26/03/2007 ***/ double *fft_rtf(double *data, int *size) { fftw_plan plan; fftw_complex *fdata; int npix2; /* Convert axis indexing to that of FFTW */ npix2 = ((size[0]/2) + 1) * size[1]; /* Forward FFT "in place" for data1 */ #ifdef USE_THREADS QPTHREAD_MUTEX_LOCK(&fftmutex); #endif QFFTWMALLOC(fdata, fftw_complex, npix2); plan = fftw_plan_dft_r2c_2d(size[1], size[0], data, fdata, FFTW_ESTIMATE|FFTW_DESTROY_INPUT); #ifdef USE_THREADS QPTHREAD_MUTEX_UNLOCK(&fftmutex); #endif fftw_execute(plan); #ifdef USE_THREADS QPTHREAD_MUTEX_LOCK(&fftmutex); #endif fftw_destroy_plan(plan); #ifdef USE_THREADS QPTHREAD_MUTEX_UNLOCK(&fftmutex); #endif return (double *)fdata; }
// Fourier transform from x back to (complex) k: kTable* xTable::Transform() const { kTable *kt = new kTable( N, 2*PI/(N*dx) ); fftw_plan plan = fftw_plan_dft_r2c_2d(N,N, array, reinterpret_cast<fftw_complex*> (kt->array), FFTW_ESTIMATE); if (plan==NULL) throw FFTInvalid(); fftw_execute(plan); fftw_destroy_plan(plan); // Now scale the k spectrum and flip signs for x=0 in middle. double fac = scaleby * dx * dx; size_t ind=0; for (int i=0; i<N; i++) for (int j=0; j<=N/2; j++) { if ( (i+j)%2==0) kt->array[ind] *= fac; else kt->array[ind] *= -fac; ind++; } return kt; }
EXPORT void rgrid2d_fftw_alloc(rgrid2d *grid) { long s; double *plan_temp; s = 2 * grid->nx * (grid->ny/2 + 1); if(s > temp_len) { if(temp) free(temp); if(!(temp = fftw_malloc(sizeof(double) * s))) { fprintf(stderr, "libgrid: Out of memory in rgrid3d_fft().\n"); return; } temp_len = s; temp2 = (double complex *) temp; } if(!(plan_temp = fftw_malloc(sizeof(double) * s))) { fprintf(stderr, "libgrid: Out of memory in rgrid2d_fft().\n"); return; } memcpy(plan_temp, grid->value, sizeof(double) * s); fftw_plan_with_nthreads(grid_threads()); grid->plan = fftw_plan_dft_r2c_2d(grid->nx, grid->ny, grid->value, temp2, GRID_FFTW_PLAN | FFTW_DESTROY_INPUT); grid->iplan = fftw_plan_dft_c2r_2d(grid->nx, grid->ny, grid->cint->value, temp, GRID_FFTW_PLAN | FFTW_DESTROY_INPUT); memcpy(grid->value, plan_temp, sizeof(double) * s); fftw_free(plan_temp); }
/*! * pixDFT() * * Input: pix (1 bpp or 8 bpp) * shiftflag (L_NO_SHIFTING or L_WITH_SHIFTING) * Return: complex array, or null on error * * Notes: * (1) The complex array returned has size (pixs->h) * (pixs->w / 2 + 1). * This is to save space, given the fact the other half of the * transform can be calculated by the complex conjugate. * (2) By default, the DC of the DFT is in the top left corner (0, 0). * Set @shiftflag to L_WITH_SHIFTING to move the DC to the center. * (3) It is the responsibility of the caller to release the allocated * complex array by invoking fftw_free(). */ fftw_complex * pixDFT(PIX *pixs, l_int32 shiftflag) { l_int32 w, h, d; l_int32 i, j, k; DPIX *dpix; fftw_complex *output; fftw_plan plan; PROCNAME("pixDFT"); if (!pixs) return (fftw_complex *)ERROR_PTR("pixs not defined", procName, NULL); if (shiftflag != L_NO_SHIFTING && shiftflag != L_WITH_SHIFTING) return (fftw_complex *)ERROR_PTR("invalid shiftflag", procName, NULL); pixGetDimensions(pixs, &w, &h, &d); if (d != 1 && d != 8) return (fftw_complex *)ERROR_PTR("pixs not 1 bpp or 8 bpp", procName, NULL); /* Convert Pix to a DPix that can be fed to the FFTW library */ if ((dpix = pixConvertToDPix(pixs, 1, shiftflag)) == NULL) return (fftw_complex *)ERROR_PTR("dpix not made", procName, NULL); /* Compute the DFT of the DPix */ output = (fftw_complex *) fftw_malloc(sizeof(fftw_complex) * h * (w / 2 + 1)); plan = fftw_plan_dft_r2c_2d(h, w, (double *) dpixGetData(dpix), output, FFTW_ESTIMATE); fftw_execute(plan); dpixDestroy(&dpix); fftw_destroy_plan(plan); return output; }
// Fourier transform from x back to (complex) k: void XTable::transform(KTable& kt) const { check_array(); // Make a new copy of data array since measurement will overwrite: FFTW_Array<double> t_array = _array; fftw_plan plan = fftw_plan_dft_r2c_2d( _N,_N, t_array.get_fftw(), kt._array.get_fftw(), FFTW_ESTIMATE); #ifdef FFT_DEBUG if (plan==NULL) throw FFTInvalid(); #endif fftw_execute(plan); fftw_destroy_plan(plan); // Now scale the k spectrum and flip signs for x=0 in middle. double fac = _dx * _dx; size_t ind=0; for (int iy=0; iy<_N; iy++) { for (int ix=0; ix<=_N/2; ix++) { if ( (ix+iy)%2==0) kt._array[ind] *= fac; else kt._array[ind] *= -fac; ind++; } } kt._dk = 2*M_PI/(_N*_dx); }
void // x [m x n] -> r [m, n/2+1] mad_mat_rfft (const num_t x[], cnum_t r[], ssz_t m, ssz_t n) { CHKXR; fftw_plan p = fftw_plan_dft_r2c_2d(m, n, (num_t*)x, r, FFTW_ESTIMATE); fftw_execute(p); fftw_destroy_plan(p); }
static gboolean output_spectra_event (GfsEvent * event, GfsSimulation * sim) { if ((* GFS_EVENT_CLASS (GTS_OBJECT_CLASS (gfs_output_spectra_class ())->parent_class)->event) (event, sim)) { GfsDomain * domain = GFS_DOMAIN (sim); GfsOutputSpectra * v = GFS_OUTPUT_SPECTRA (event); fftw_plan p; Datawrite data; data.fp = GFS_OUTPUT (event)->file->fp; data.L = v->L; data.kmax = init_kmax(v->L); data.dir1 = v->dir[0]; data.dir2 = v->dir[1]; fill_cartesian_matrix( v->cgd, v->v, domain); switch (v->Ndim) { case 1: { data.n1 = ( v->cgd->n[v->dir[0]] / 2 ) + 1; data.out = fftw_malloc( sizeof(fftw_complex)*data.n1 ); p = fftw_plan_dft_r2c_1d( v->cgd->n[v->dir[0]], v->cgd->v, data.out, FFTW_ESTIMATE); fftw_execute(p); write_spectra_1D ( &data ); break; } case 2: { data.n1 = v->cgd->n[v->dir[0]]; data.n2 = ( v->cgd->n[v->dir[1]] / 2 ) + 1; data.out = fftw_malloc( sizeof(fftw_complex)*v->cgd->n[v->dir[0]]*data.n2 ); p = fftw_plan_dft_r2c_2d( v->cgd->n[v->dir[0]], v->cgd->n[v->dir[1]], v->cgd->v, data.out, FFTW_ESTIMATE); fftw_execute(p); write_spectra_2D ( &data ); break; } case 3: { data.n1 = v->cgd->n[0]; data.n2 = v->cgd->n[1]; data.n3 = ( v->cgd->n[2] / 2 ) + 1; data.out = fftw_malloc( sizeof(fftw_complex)*v->cgd->n[0]*v->cgd->n[1]*data.n3 ); p = fftw_plan_dft_r2c_3d( v->cgd->n[0], v->cgd->n[1], v->cgd->n[2], v->cgd->v, data.out, FFTW_ESTIMATE); fftw_execute(p); write_spectra_3D ( &data ); break; } default: g_assert_not_reached (); } fftw_destroy_plan(p); fftw_free ( data.out ); return TRUE; } return FALSE; }
void DeconvolutionTool::initFFT(const QImage *inputImage) { removeFFTObjects(); QTime time; time.start(); QString progressText = "Loading image"; setProgressInterval(1,100, progressText); // Read image size width = inputImage->width(); height = inputImage->height(); // Init FFTW structures with given size inputImageMatrix = (double*) fftw_malloc(sizeof(double) * width * height); outputImageMatrix = (double*) fftw_malloc(sizeof(double) * width * height); kernelMatrix = (double*) fftw_malloc(sizeof(double) * width * height); // kernelTempMatrix = (double*)fftw_malloc(sizeof(double)*width*height); laplacianMatrix = (double*) fftw_malloc(sizeof(double) * width * height); outLaplacianMatrix = (double*) fftw_malloc(sizeof(double) * width * height); inputImageFFT = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * (width/2+1) * height); outputImageFFT = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * (width/2+1) * height); kernelFFT = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * (width/2+1) * height); kernelTempFFT = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * (width/2+1) * height); laplacianMatrixFFT = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * (width/2+1) * height); setProgressSubValue(10); // Init FFTW plan to optimize speed - init once and use many times // Column-major Format used. When creating the plan, simply pass // the dimensions of the array to the planner in reverse order (width, height..) -> (height, width...) realForwardPlan = fftw_plan_dft_r2c_2d(height, width, inputImageMatrix, inputImageFFT, FFTW_MEASURE); setProgressSubValue(30); realForwardKernelPlan = fftw_plan_dft_r2c_2d(height, width, kernelMatrix, kernelFFT, FFTW_MEASURE); setProgressSubValue(50); realBackwardPlan = fftw_plan_dft_c2r_2d(height, width, inputImageFFT, outputImageMatrix, FFTW_MEASURE); setProgressSubValue(70); QApplication::processEvents(); forwardLaplacianPlan = fftw_plan_dft_r2c_2d(height, width, laplacianMatrix, laplacianMatrixFFT, FFTW_MEASURE); setProgressSubValue(90); backwardLaplacianPlan = fftw_plan_dft_c2r_2d(height, width, laplacianMatrixFFT, outLaplacianMatrix, FFTW_MEASURE); setProgressSubValue(100); qDebug("initFFT: %d ms", time.elapsed()); }
void init_gfft() { double complex *wi1; double *wir1; DEBUG_START_FUNC; wi1 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (wi1 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for wi1 allocation"); wir1 = (double *) wi1; #ifdef _OPENMP fftw_plan_with_nthreads( nthreads ); #endif #ifdef WITH_2D r2cfft = fftw_plan_dft_r2c_2d( NX, NY, wr1, w1, FFT_PLANNING); if (r2cfft == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW R2C plan creation failed"); c2rfft = fftw_plan_dft_c2r_2d( NX, NY, w1, wr1, FFT_PLANNING); if (c2rfft == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW C2R plan creation failed"); #else r2cfft = fftw_plan_dft_r2c_3d( NX, NY, NZ, wr1, w1, FFT_PLANNING); if (r2cfft == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW R2C plan creation failed"); c2rfft = fftw_plan_dft_c2r_3d( NX, NY, NZ, w1, wr1, FFT_PLANNING); if (c2rfft == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW C2R plan creation failed"); r2cfft_2Dslice = fftw_plan_dft_r2c_2d(NX,NY,wrh3,wh3,FFT_PLANNING); if (r2cfft_2Dslice == NULL) ERROR_HANDLER( ERROR_CRITICAL, "FFTW r2c slice plan creation failed"); #endif fftw_free(wi1); fft_timer=0.0; DEBUG_END_FUNC; return; }
void gravity_fft_init(void) { // dimension definition grid_NX_COMPLEX = root_nx; grid_NY_COMPLEX = (root_ny / 2 + 1); grid_NCOMPLEX = grid_NX_COMPLEX * grid_NY_COMPLEX; dx = boxsize_x / root_nx; dy = boxsize_y / root_ny; // Array allocation kx = (double *) fftw_malloc( sizeof(double) * grid_NCOMPLEX); ky = (double *) fftw_malloc( sizeof(double) * grid_NCOMPLEX); if (integrator == SEI){ kxt = (double *) fftw_malloc( sizeof(double) * grid_NCOMPLEX); w1d = (double *) fftw_malloc( sizeof(double) * root_ny * 2 ); }else{ kxt = kx; // No time dependent wave vectors. } k = (double *) fftw_malloc( sizeof(double) * grid_NCOMPLEX); density = (double *) fftw_malloc( sizeof(double) * grid_NCOMPLEX * 2); density_r = (double *) fftw_malloc( sizeof(double) * grid_NCOMPLEX * 2); fx = (double *) fftw_malloc( sizeof(double) * grid_NCOMPLEX * 2); fy = (double *) fftw_malloc( sizeof(double) * grid_NCOMPLEX * 2); // Init wavevectors for(int i = 0; i < grid_NX_COMPLEX; i++) { for(int j =0; j < grid_NY_COMPLEX; j++) { int IDX2D = i * grid_NY_COMPLEX + j; kx[IDX2D] = (2.0 * M_PI) / boxsize_x * ( fmod( (double) (i + (grid_NX_COMPLEX/2.0 )), (double) grid_NX_COMPLEX) - (double) grid_NX_COMPLEX / 2.0 ); ky[IDX2D] = (2.0 * M_PI) / boxsize_y * ((double) j); if (integrator != SEI){ k[IDX2D] = pow( kx[IDX2D]*kx[IDX2D] + ky[IDX2D] * ky[IDX2D], 0.5); // we will use 1/k, that prevents singularity // (the k=0 is set to zero by renormalization...) if ( k[IDX2D] == 0.0 ) k[IDX2D] = 1.0; } } } // Init ffts (use in place fourier transform for efficient memory usage) r2cfft = fftw_plan_dft_r2c_2d( root_nx, root_ny, density, (fftw_complex*)density, FFTW_MEASURE); c2rfft = fftw_plan_dft_c2r_2d( root_nx, root_ny, (fftw_complex*)density, density, FFTW_MEASURE); if (integrator == SEI){ for1dfft = fftw_plan_dft_1d(root_ny, (fftw_complex*)w1d, (fftw_complex*)w1d, FFTW_FORWARD, FFTW_MEASURE); bac1dfft = fftw_plan_dft_1d(root_ny, (fftw_complex*)w1d, (fftw_complex*)w1d, FFTW_BACKWARD, FFTW_MEASURE); } }
void XTable::fftwMeasure() const { // Make a new copy of data array since measurement will overwrite: // Copy data into new array to avoid NaN's, etc., but not bothering // with scaling, etc. FFTW_Array<double> t_array = _array; KTable kt( _N, 2*M_PI/(_N*_dx) ); fftw_plan plan = fftw_plan_dft_r2c_2d( _N,_N, t_array.get_fftw(), kt._array.get_fftw(), FFTW_MEASURE); #ifdef FFT_DEBUG if (plan==NULL) throw FFTInvalid(); #endif fftw_destroy_plan(plan); }
static void _FFT_2D( vector<CD> &out, const vector<double> &in, int Nfast, int Nslow ) { int M = Nslow * (Nfast/2 + 1); out.resize( M ); fftw_plan p; p = fftw_plan_dft_r2c_2d( Nslow, Nfast, (double*)&in[0], (double (*)[2])&out[0], FFTW_ESTIMATE ); fftw_execute( p ); fftw_destroy_plan( p ); }
//-------------------------------------------------------------------- void fftw_r2c_2d(REAL *in_real, fftw_complex *in_fft, int nx, int ny) { int i,j; int nyh = ny/2+1; int index; fftw_complex *in_fft_tmp; in_fft_tmp = fftw_malloc(nx*nyh*sizeof(fftw_complex)); fftw_plan plfd; plfd = fftw_plan_dft_r2c_2d(nx,ny,in_real,in_fft_tmp,FFTW_ESTIMATE); fftw_execute(plfd); fftw_destroy_plan(plfd); for(i=0;i<nx;i++) for(j=0;j<nyh;j++) { index = i*nyh+j; in_fft[index][0] = in_fft_tmp[index][0]; in_fft[index][1] = in_fft_tmp[index][1]; } free(in_fft_tmp); }
static void initialize_circulant(hbhankel_matrix *h, const double *F, R_len_t Nx, R_len_t Ny, R_len_t Lx, R_len_t Ly) { R_len_t Kx = Nx - Lx + 1, Ky = Ny - Ly + 1, i, j; fftw_complex *ocirc; fftw_plan p1, p2; double *circ; /* Allocate needed memory */ circ = (double*) fftw_malloc(Nx * Ny * sizeof(double)); ocirc = (fftw_complex*) fftw_malloc(Ny * (Nx/2 + 1) * sizeof(fftw_complex)); /* Estimate the best plans for given input length, note, that input data is stored in column-major mode, that's why we're passing dimensions in *reverse* order */ p1 = fftw_plan_dft_r2c_2d(Ny, Nx, circ, ocirc, FFTW_ESTIMATE); p2 = fftw_plan_dft_c2r_2d(Ny, Nx, ocirc, circ, FFTW_ESTIMATE); /* Fill input buffer */ /* TF <- cbind(F[,Ky:Ny],F[,1:(Ky-1)]); TF <- rbind(TF[Kx:Nx,],TF[1:(Kx-1),]); */ for (j = 0; j < Ny; ++j) for (i = 0; i < Nx; ++i) /* This is pretty ad-hoc solution and needs to be fixed in the future */ circ[i + Nx*j] = F[(i + Kx - 1) % Nx + Nx*((j + Ky - 1) % Ny)]; /* Run the plan on input data */ fftw_execute(p1); /* Cleanup and return */ fftw_free(circ); h->circ_freq = ocirc; h->r2c_plan = p1; h->c2r_plan = p2; h->window.x = Lx; h->window.y = Ly; h->length.x = Nx; h->length.y = Ny; }
void xTable::fftwMeasure() const { // Make a new copy of data array since measurement will overwrite: double* t_array = (double*) fftw_malloc(sizeof(double)*N*N); // Copy data into new array to avoid NaN's, etc., but not bothering // with scaling, etc. for (int i=0; i<N*N; i++) t_array[i] = array[i]; kTable *kt = new kTable( N, 2*PI/(N*dx) ); fftw_plan plan = fftw_plan_dft_r2c_2d(N,N, t_array, reinterpret_cast<fftw_complex*> (kt->array), FFTW_MEASURE); if (plan==NULL) throw FFTInvalid(); delete kt; fftw_free(t_array); fftw_destroy_plan(plan); }
int init_global_bufs (plugin_context* ctx, int width, int height, double sgm, int ns) { artistic_proc_context* c; int nx = width; int ny = height; fft_plans_t* f; fftw_complex* in; if (nx <= 0 || ny <= 0) { return -1; } if (NULL == (f = malloc (sizeof(fft_plans_t)))) { return -1; } if (NULL == (in = fftw_malloc (sizeof(fftw_complex)*ny*(nx/2+1)))) { return -1; } f->forward = fftw_plan_dft_r2c_2d (ny, nx, (double*)in, in, FFTW_ESTIMATE); f->backward = fftw_plan_dft_c2r_2d (ny, nx, in, (double*)in, FFTW_ESTIMATE); fftw_free (in); if (NULL == (c = malloc (sizeof(artistic_proc_context)))) { return -1; } if (NULL == (c->b = calloc (ctx->num_threads, sizeof(artistic_buf_t*)))) { return -1; } c->width = width; c->height = height; c->sgm = sgm; c->ns = ns; c->p = f; ctx->data = c; return 0; }
static MagickBooleanType ForwardFourierTransform(FourierInfo *fourier_info, const Image *image,double *magnitude,double *phase,ExceptionInfo *exception) { CacheView *image_view; double n, *source; fftw_complex *fourier; fftw_plan fftw_r2c_plan; register const IndexPacket *indexes; register const PixelPacket *p; register ssize_t i, x; ssize_t y; /* Generate the forward Fourier transform. */ source=(double *) AcquireQuantumMemory((size_t) fourier_info->height, fourier_info->width*sizeof(*source)); if (source == (double *) NULL) { (void) ThrowMagickException(exception,GetMagickModule(), ResourceLimitError,"MemoryAllocationFailed","`%s'",image->filename); return(MagickFalse); } ResetMagickMemory(source,0,fourier_info->height*fourier_info->width* sizeof(*source)); i=0L; image_view=AcquireVirtualCacheView(image,exception); for (y=0L; y < (ssize_t) fourier_info->height; y++) { p=GetCacheViewVirtualPixels(image_view,0L,y,fourier_info->width,1UL, exception); if (p == (const PixelPacket *) NULL) break; indexes=GetCacheViewVirtualIndexQueue(image_view); for (x=0L; x < (ssize_t) fourier_info->width; x++) { switch (fourier_info->channel) { case RedChannel: default: { source[i]=QuantumScale*GetPixelRed(p); break; } case GreenChannel: { source[i]=QuantumScale*GetPixelGreen(p); break; } case BlueChannel: { source[i]=QuantumScale*GetPixelBlue(p); break; } case OpacityChannel: { source[i]=QuantumScale*GetPixelOpacity(p); break; } case IndexChannel: { source[i]=QuantumScale*GetPixelIndex(indexes+x); break; } case GrayChannels: { source[i]=QuantumScale*GetPixelGray(p); break; } } i++; p++; } } image_view=DestroyCacheView(image_view); fourier=(fftw_complex *) AcquireQuantumMemory((size_t) fourier_info->height, fourier_info->center*sizeof(*fourier)); if (fourier == (fftw_complex *) NULL) { (void) ThrowMagickException(exception,GetMagickModule(), ResourceLimitError,"MemoryAllocationFailed","`%s'",image->filename); source=(double *) RelinquishMagickMemory(source); return(MagickFalse); } #if defined(MAGICKCORE_OPENMP_SUPPORT) #pragma omp critical (MagickCore_ForwardFourierTransform) #endif fftw_r2c_plan=fftw_plan_dft_r2c_2d(fourier_info->width,fourier_info->width, source,fourier,FFTW_ESTIMATE); fftw_execute(fftw_r2c_plan); fftw_destroy_plan(fftw_r2c_plan); source=(double *) RelinquishMagickMemory(source); /* Normalize Fourier transform. */ n=(double) fourier_info->width*(double) fourier_info->width; i=0L; for (y=0L; y < (ssize_t) fourier_info->height; y++) for (x=0L; x < (ssize_t) fourier_info->center; x++) { #if defined(MAGICKCORE_HAVE_COMPLEX_H) fourier[i]/=n; #else fourier[i][0]/=n; fourier[i][1]/=n; #endif i++; } /* Generate magnitude and phase (or real and imaginary). */ i=0L; if (fourier_info->modulus != MagickFalse) for (y=0L; y < (ssize_t) fourier_info->height; y++) for (x=0L; x < (ssize_t) fourier_info->center; x++) { magnitude[i]=cabs(fourier[i]); phase[i]=carg(fourier[i]); i++; } else for (y=0L; y < (ssize_t) fourier_info->height; y++) for (x=0L; x < (ssize_t) fourier_info->center; x++) { magnitude[i]=creal(fourier[i]); phase[i]=cimag(fourier[i]); i++; } fourier=(fftw_complex *) RelinquishMagickMemory(fourier); return(MagickTrue); }
//constructor of the class Solver_FFTW::Solver_FFTW(){ /*===============================================*/ Input* initInput = new Input(); numOfXGrid = initInput->getXGridNum(); numOfYGrid = initInput->getYGridNum(); cout << "Input Finished" << endl; cout << "Grids along x axis: " << numOfXGrid << endl; cout << "Grids along y axis: " << numOfYGrid << endl; /*====================================================== Initializing arrays in real space ======================================================*/ v = new double*[numOfXGrid]; w = new double*[numOfXGrid]; initE = 0; temp_Velocity = new double[numOfXGrid*numOfYGrid]; firstD_u = new double[numOfXGrid*numOfYGrid]; secondD_u = new double[numOfXGrid*numOfYGrid]; for(int i = 0; i < numOfXGrid; i++){ v[i] = new double[numOfYGrid]; w[i] = new double[numOfYGrid]; for(int j = 0; j < numOfYGrid; j++){ v[i][j] = initInput->getXVelocity(i,j); w[i][j] = initInput->getYVelocity(i,j); initE += v[i][j]*v[i][j] + w[i][j]*w[i][j]; //calculating the initial energy } } for(int i = 0; i < numOfXGrid*numOfYGrid; i++){ temp_Velocity[i] = 0; firstD_u[i] = 0; secondD_u[i] = 0; } /*===================================================== Initializing first order derivatives =====================================================*/ v_x = new double*[numOfXGrid]; v_y = new double*[numOfXGrid]; w_x = new double*[numOfXGrid]; w_y = new double*[numOfXGrid]; for(int i = 0; i < numOfXGrid; i++){ v_x[i] = new double[numOfYGrid]; v_y[i] = new double[numOfYGrid]; w_x[i] = new double[numOfYGrid]; w_y[i] = new double[numOfYGrid]; for(int j = 0; j < numOfYGrid; j++){ v_x[i][j] = 0; v_y[i][j] = 0; w_x[i][j] = 0; w_y[i][j] = 0; } } /*===================================================== Initializing second order derivatives =====================================================*/ v_x_x = new double*[numOfXGrid]; v_y_y = new double*[numOfXGrid]; w_x_x = new double*[numOfXGrid]; w_y_y = new double*[numOfXGrid]; for(int i = 0; i < numOfXGrid; i++){ v_x_x[i] = new double[numOfYGrid]; v_y_y[i] = new double[numOfYGrid]; w_x_x[i] = new double[numOfYGrid]; w_y_y[i] = new double[numOfYGrid]; for(int j = 0; j < numOfYGrid; j++){ v_x_x[i][j] = 0; v_y_y[i][j] = 0; w_x_x[i][j] = 0; w_y_y[i][j] = 0; } } /*======================================================== Initializing the forces ========================================================*/ externalFx = new double*[numOfXGrid]; externalFy = new double*[numOfXGrid]; for(int i = 0;i < numOfXGrid; i++){ externalFx[i] = new double[numOfYGrid]; externalFy[i] = new double[numOfYGrid]; for(int j = 0;j < numOfYGrid; j++){ externalFx[i][j] = 0; externalFy[i][j] = 0; } } /*======================================================== initializing multiple threads ==========================================================*/ if(fftw_init_threads()){ fftw_plan_with_nthreads(THREADS); cout << "Using "<< THREADS << " threads" << endl << endl; } else { cout << "Using multiple threads failed" << endl; exit(0); } /*===================================================== Initializing arrays in fourier space =====================================================*/ V = (fftw_complex**)fftw_malloc(sizeof(fftw_complex*)*numOfXGrid); W = (fftw_complex**)fftw_malloc(sizeof(fftw_complex*)*numOfXGrid); temp_U = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*numOfXGrid*(numOfYGrid/2+1)); firstD_U = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*numOfXGrid*(numOfYGrid/2+1)); secondD_U = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*numOfXGrid*(numOfYGrid/2+1)); for(int i = 0; i < numOfXGrid; i++){ V[i] = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*(numOfYGrid/2+1)); W[i] = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*(numOfYGrid/2+1)); for(int j = 0; j < numOfYGrid/2 + 1; j++){ V[i][j][0] = 0; V[i][j][1] = 0; W[i][j][0] = 0; W[i][j][1] = 0; } } for(int i = 0; i < numOfXGrid*(numOfYGrid/2+1); i++){ temp_U[i][0] = 0; temp_U[i][1] = 0; firstD_U[i][0] = 0; firstD_U[i][1] = 0; secondD_U[i][0] = 0; secondD_U[i][1] = 0; } temp = (fftw_complex**)fftw_malloc(sizeof(fftw_complex*)*numOfXGrid); for(int i = 0; i < numOfXGrid; i++){ temp[i] = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*(numOfYGrid/2+1)); } /**===================================================== Initializing Plans ======================================================*/ plan_r2c = fftw_plan_dft_r2c_2d(numOfXGrid,numOfYGrid,temp_Velocity,temp_U,FFTW_ESTIMATE); plan_c2r = fftw_plan_dft_c2r_2d(numOfXGrid,numOfYGrid,temp_U,temp_Velocity,FFTW_ESTIMATE); plan_firstD = fftw_plan_dft_c2r_2d(numOfXGrid,numOfYGrid,firstD_U,firstD_u,FFTW_ESTIMATE); plan_secondD = fftw_plan_dft_c2r_2d(numOfXGrid,numOfYGrid,secondD_U,secondD_u,FFTW_ESTIMATE); /*====================================================== initializing output energy file ======================================================*/ stringstream fileNameOfE; fileNameOfE << OUTPUT_PATH << "E" << ".txt"; energy.open(fileNameOfE.str().c_str()); /*====================================================== generating the readMe.txt file ======================================================*/ stringstream fileNameOfReadMe; fileNameOfReadMe << OUTPUT_PATH << "readMe.txt"; readMe.open(fileNameOfReadMe.str().c_str()); readMe << "Nx = " << numOfXGrid << endl; readMe << "Ny = " << numOfYGrid << endl; readMe << "dt = " << TIME_STEP << endl; readMe << "Initial Energy = " << initE << endl; readMe << "Viscosity = "<< VISCOSITY << endl; readMe << "Rescaled Viscosity =" << VISCOSITY/sqrt(initE*(numOfXGrid-1)*(numOfYGrid-1)); readMe.close(); /*====================================================== Initializing Adams array. ======================================================*/ Adams_v = new double**[3]; Adams_w = new double**[3]; for(int i = 0;i < 3; i++){ Adams_v[i] = new double*[numOfXGrid]; Adams_w[i] = new double*[numOfXGrid]; for(int j = 0; j < numOfXGrid; j++){ Adams_v[i][j] = new double[numOfYGrid]; Adams_w[i][j] = new double[numOfYGrid]; for(int k = 0; k < numOfYGrid; k++){ Adams_v[i][j][k] = 0; Adams_w[i][j][k] = 0; } } } /*==================================================*/ return; }
void test04() /******************************************************************************/ /* Purpose: TEST04: apply FFT to real 2D data. Discussion: In this example, we generate NX=8 by NY=10 random real values stored as an NX by NY array of type DOUBLE named "IN". We have FFTW3 compute the Fourier transform of this data named "OUT". We have FFTW3 compute the inverse Fourier transform of "OUT" to get "IN2", which should be the original input data, scaled by NX * NY. The Fourier coefficients are stored in an NX by NYH array where NYH = (NY/2) + 1. We only compute about half the data because of real data implies symmetric FFT coefficients. a[i*nyh+j][0] is the real part of A(I,J). a[i*nyh+j][1] is the imaginary part of A(I,J).. Licensing: This code is distributed under the GNU LGPL license. Modified: 05 November 2007 Author: John Burkardt */ { int i; double *in; double *in2; int j; int nx = 8; int ny = 10; int nyh; fftw_complex *out; fftw_plan plan_backward; fftw_plan plan_forward; unsigned int seed = 123456789; printf("\n"); printf("TEST04\n"); printf(" Demonstrate FFTW3 on a %d by %d array of real data.\n", nx, ny); printf("\n"); printf(" Transform data to FFT coefficients.\n"); printf(" Backtransform FFT coefficients to recover data.\n"); printf(" Compare recovered data to original data.\n"); /* Create the input array, an NX by NY array of doubles. */ in = (double *)malloc(sizeof(double) * nx * ny); srand(seed); for (i = 0; i < nx; i++) { for (j = 0; j < ny; j++) { in[i*ny + j] = rand(); } } printf("\n"); printf(" Input Data:\n"); printf("\n"); for (i = 0; i < nx; i++) { for (j = 0; j < ny; j++) { printf(" %4d %4d %12f\n", i, j, in[i*ny + j]); } } /* Create the output array OUT, which is of type FFTW_COMPLEX, and of a size NX * NYH that is roughly half the dimension of the input data (ignoring the fact that the input data is real, and the FFT coefficients are complex). */ nyh = (ny / 2) + 1; out = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * nx * nyh); plan_forward = fftw_plan_dft_r2c_2d(nx, ny, in, out, FFTW_ESTIMATE); fftw_execute(plan_forward); printf("\n"); printf(" Output FFT Coefficients:\n"); printf("\n"); for (i = 0; i < nx; i++) { for (j = 0; j < nyh; j++) { printf(" %4d %4d %12f %12f\n", i, j, out[i*nyh + j][0], out[i*nyh + j][1]); } } /* Recreate the input array. */ in2 = (double *)malloc(sizeof(double) * nx * ny); plan_backward = fftw_plan_dft_c2r_2d(nx, ny, out, in2, FFTW_ESTIMATE); fftw_execute(plan_backward); printf("\n"); printf(" Recovered input data divided by NX * NY:\n"); printf("\n"); for (i = 0; i < nx; i++) { for (j = 0; j < ny; j++) { printf(" %4d %4d %12f\n", i, j, in2[i*ny + j] / (double)(nx * ny)); } } /* Free up the allocated memory. */ fftw_destroy_plan(plan_forward); fftw_destroy_plan(plan_backward); free(in); free(in2); fftw_free(out); return; }
/***************** * Version DOUBLE *****************/ void Chi2LibFFTW::conv2d_fft(MyMatrix<double> *img, MyMatrix<double> *kernel_img, MyMatrix<double> *output){ MyLogger::log()->debug("[Chi2LibFFTW][conv2d_fft] Generating Convolution using FFTW"); fftw_complex *fft_image, *fft_kernel; fftw_plan plan_forward_image, plan_forward_kernel, plan_backward; //auxiliary structures are necessary because fftw3 optimization plan will destroy it! double *ifft_result, *data, *kernel; int nwidth = (int)(img->sX()+kernel_img->sX()-1); int nheight = (int)(img->sY()+kernel_img->sY()-1); pthread_mutex_lock( &mutex1 ); // FFTW Allocs size_t size = (size_t)(nwidth * nheight); //the new size includes zero padding space data = fftw_alloc_real(size); kernel = fftw_alloc_real(size); ifft_result = fftw_alloc_real(size); //fftw handle real fft avoiding redundancy in the complex plane, therefore the nheight/2 size = (size_t)(nwidth*(floor(nheight/2) + 1)); fft_image = fftw_alloc_complex(size); fft_kernel = fftw_alloc_complex(size); plan_forward_image = fftw_plan_dft_r2c_2d( nwidth, nheight, data, fft_image, FFTW_ESTIMATE ); plan_forward_kernel = fftw_plan_dft_r2c_2d( nwidth, nheight, kernel, fft_kernel, FFTW_ESTIMATE ); plan_backward = fftw_plan_dft_c2r_2d( nwidth, nheight, fft_image, ifft_result, FFTW_ESTIMATE ); pthread_mutex_unlock( &mutex1 ); //populate kernel and shift input for(unsigned int x = 0 ; x < (unsigned int)nwidth ; ++x ){ unsigned int xnw = x*nwidth; for(unsigned int y=0; y < (unsigned int)nheight; ++y){ if(x < kernel_img->sX() && y < kernel_img->sY()) kernel[xnw+ y] = kernel_img->getValue(x,y); else kernel[xnw+ y] = 0; } } for(unsigned int x = 0 ; x < (unsigned int)nwidth ; ++x ){ unsigned int xnw = x*nwidth; for(unsigned int y=0; y < (unsigned int)nheight; ++y){ if(x < img->sX() && y < img->sY()) data[xnw+ y] = img->getValue(x,y); else data[xnw+ y] = 0; } } MyLogger::log()->debug("[Chi2LibFFTW][conv2d_fft] Starting FFTW"); /** FFT Execute */ //fft of image fftw_execute( plan_forward_image ); //fft of kernel fftw_execute( plan_forward_kernel ); //convolution in fourier domain double f1, f2; double nwnh = (double)(nwidth*nheight); unsigned int limit = (unsigned int)(nwidth * (floor(nheight/2) + 1)); for(unsigned int i=0; i< limit; ++i){ f1 = fft_image[i][0]*fft_kernel[i][0] - fft_image[i][1]*fft_kernel[i][1]; f2 = fft_image[i][0]*fft_kernel[i][1] + fft_image[i][1]*fft_kernel[i][0]; fft_image[i][0]=f1/nwnh; fft_image[i][1]=f2/nwnh; } //ifft of the product fftw_execute( plan_backward ); /** FFT Execute */ MyLogger::log()->debug("[Chi2LibFFTW][conv2d_fft] FFTW Finished"); if(output->sX() == (unsigned int)nwidth && output->sY() == (unsigned int)nheight) for(unsigned int x = 0 ; x < output->sX() ; ++x ){ unsigned int xnw = x*nwidth; for(unsigned int y = 0 ; y < output->sY() ; ++y ){ output->at(x,y) = ifft_result[xnw+y]; } } /* free memory */ fftw_destroy_plan( plan_forward_image ); fftw_destroy_plan( plan_forward_kernel ); fftw_destroy_plan( plan_backward ); fftw_free( data ); fftw_free( kernel ); fftw_free( ifft_result ); fftw_free( fft_image ); fftw_free( fft_kernel ); }
/* Real to complex forward transform. */ static int rfwfft1( IMAGE *dummy, IMAGE *in, IMAGE *out ) { const int size = in->Xsize * in->Ysize; const int half_width = in->Xsize / 2 + 1; /* Pack to double real here. */ IMAGE *real = im_open_local( dummy, "fwfft1:1", "t" ); /* Transform to halfcomplex here. */ double *half_complex = IM_ARRAY( dummy, in->Ysize * half_width * 2, double ); /* We have to have a separate real buffer for the planner to work on. */ double *planner_scratch = IM_ARRAY( dummy, in->Xsize * in->Ysize, double ); fftw_plan plan; double *buf, *q, *p; int x, y; if( !real || !half_complex || im_pincheck( in ) || im_outcheck( out ) ) return( -1 ); if( in->Coding != IM_CODING_NONE || in->Bands != 1 ) { im_error( "im_fwfft", _( "one band uncoded only" ) ); return( -1 ); } if( im_clip2d( in, real ) ) return( -1 ); /* Make the plan for the transform. Yes, they really do use nx for * height and ny for width. Use a separate scratch buffer for the * planner, we can't overwrite real->data */ if( !(plan = fftw_plan_dft_r2c_2d( in->Ysize, in->Xsize, planner_scratch, (fftw_complex *) half_complex, 0 )) ) { im_error( "im_fwfft", _( "unable to create transform plan" ) ); return( -1 ); } fftw_execute_dft_r2c( plan, (double *) real->data, (fftw_complex *) half_complex ); fftw_destroy_plan( plan ); /* WIO to out. */ if( im_cp_desc( out, in ) ) return( -1 ); out->Bbits = IM_BBITS_DPCOMPLEX; out->BandFmt = IM_BANDFMT_DPCOMPLEX; if( im_setupout( out ) ) return( -1 ); if( !(buf = (double *) IM_ARRAY( dummy, IM_IMAGE_SIZEOF_LINE( out ), PEL )) ) return( -1 ); /* Copy to out and normalise. The right half is the up/down and * left/right flip of the left, but conjugated. Do the first * row separately, then mirror around the centre row. */ p = half_complex; q = buf; for( x = 0; x < half_width; x++ ) { q[0] = p[0] / size; q[1] = p[1] / size; p += 2; q += 2; } p = half_complex + ((in->Xsize + 1) / 2 - 1) * 2; for( x = half_width; x < out->Xsize; x++ ) { q[0] = p[0] / size; q[1] = -1.0 * p[1] / size; p -= 2; q += 2; } if( im_writeline( 0, out, (PEL *) buf ) ) return( -1 ); for( y = 1; y < out->Ysize; y++ ) { p = half_complex + y * half_width * 2; q = buf; for( x = 0; x < half_width; x++ ) { q[0] = p[0] / size; q[1] = p[1] / size; p += 2; q += 2; } /* Good grief. */ p = half_complex + 2 * ((out->Ysize - y + 1) * half_width - 2 + (in->Xsize & 1)); for( x = half_width; x < out->Xsize; x++ ) { q[0] = p[0] / size; q[1] = -1.0 * p[1] / size; p -= 2; q += 2; } if( im_writeline( y, out, (PEL *) buf ) ) return( -1 ); } return( 0 ); }
PetscInt main(PetscInt argc,char **args) { typedef enum {RANDOM, CONSTANT, TANH, NUM_FUNCS} FuncType; const char *funcNames[NUM_FUNCS] = {"random", "constant", "tanh"}; PetscMPIInt size; PetscInt n = 10,N,Ny,ndim=4,dim[4],DIM,i; Vec x,y,z; PetscScalar s; PetscRandom rdm; PetscReal enorm; PetscInt func=RANDOM; FuncType function = RANDOM; PetscBool view = PETSC_FALSE; PetscErrorCode ierr; PetscScalar *x_array,*y_array,*z_array; fftw_plan fplan,bplan; const ptrdiff_t N0 = 20, N1 = 20; ptrdiff_t alloc_local, local_n0, local_0_start; ierr = PetscInitialize(&argc,&args,(char *)0,help);CHKERRQ(ierr); #if defined(PETSC_USE_COMPLEX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP, "This example requires real numbers"); #endif ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); alloc_local=fftw_mpi_local_size_2d(N0, N1, PETSC_COMM_WORLD, &local_n0, &local_0_start); if (size != 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP, "This is a uniprocessor example only!"); ierr = PetscOptionsBegin(PETSC_COMM_WORLD, PETSC_NULL, "FFTW Options", "ex142");CHKERRQ(ierr); ierr = PetscOptionsEList("-function", "Function type", "ex142", funcNames, NUM_FUNCS, funcNames[function], &func, PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsBool("-vec_view draw", "View the functions", "ex112", view, &view, PETSC_NULL);CHKERRQ(ierr); function = (FuncType) func; ierr = PetscOptionsEnd();CHKERRQ(ierr); for (DIM = 0; DIM < ndim; DIM++){ dim[DIM] = n; /* size of real space vector in DIM-dimension */ } ierr = PetscRandomCreate(PETSC_COMM_SELF, &rdm);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rdm);CHKERRQ(ierr); for (DIM = 1; DIM < 5; DIM++){ /* create vectors of length N=dim[0]*dim[1]* ...*dim[DIM-1] */ /*----------------------------------------------------------*/ N = Ny = 1; for (i = 0; i < DIM-1; i++) { N *= dim[i]; } Ny = N; Ny *= 2*(dim[DIM-1]/2 + 1); /* add padding elements to output vector y */ N *= dim[DIM-1]; ierr = PetscPrintf(PETSC_COMM_SELF, "\n %d-D: FFTW on vector of size %d \n",DIM,N);CHKERRQ(ierr); ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) x, "Real space vector");CHKERRQ(ierr); ierr = VecCreateSeq(PETSC_COMM_SELF,Ny,&y);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) y, "Frequency space vector");CHKERRQ(ierr); ierr = VecDuplicate(x,&z);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) z, "Reconstructed vector");CHKERRQ(ierr); /* Set fftw plan */ /*----------------------------------*/ ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); ierr = VecGetArray(z,&z_array);CHKERRQ(ierr); unsigned int flags = FFTW_ESTIMATE; //or FFTW_MEASURE /* The data in the in/out arrays is overwritten during FFTW_MEASURE planning, so such planning should be done before the input is initialized by the user. */ printf("DIM: %d, N %d, Ny %d\n",DIM,N,Ny); switch (DIM){ case 1: fplan = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex*)y_array, flags); bplan = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex*)y_array, (double *)z_array, flags); break; case 2: fplan = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array, (fftw_complex*)y_array,flags); bplan = fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)y_array,(double *)z_array,flags); break; case 3: fplan = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array, (fftw_complex*)y_array,flags); bplan = fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)y_array,(double *)z_array,flags); break; default: fplan = fftw_plan_dft_r2c(DIM,dim,(double *)x_array, (fftw_complex*)y_array,flags); bplan = fftw_plan_dft_c2r(DIM,dim,(fftw_complex*)y_array,(double *)z_array,flags); break; } ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); ierr = VecRestoreArray(z,&z_array);CHKERRQ(ierr); /* Initialize Real space vector x: The data in the in/out arrays is overwritten during FFTW_MEASURE planning, so planning should be done before the input is initialized by the user. --------------------------------------------------------*/ if (function == RANDOM) { ierr = VecSetRandom(x, rdm);CHKERRQ(ierr); } else if (function == CONSTANT) { ierr = VecSet(x, 1.0);CHKERRQ(ierr); } else if (function == TANH) { ierr = VecGetArray(x, &x_array);CHKERRQ(ierr); for (i = 0; i < N; ++i) { x_array[i] = tanh((i - N/2.0)*(10.0/N)); } ierr = VecRestoreArray(x, &x_array);CHKERRQ(ierr); } if (view) { ierr = VecView(x, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); } /* FFT - also test repeated transformation */ /*-------------------------------------------*/ ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); ierr = VecGetArray(z,&z_array);CHKERRQ(ierr); for (i=0; i<3; i++){ /* FFTW_FORWARD */ fftw_execute(fplan); //printf("\n fout:\n"); //fftw_complex* fout = (fftw_complex*)y_array; //for (i=0; i<N/2+1; i++) printf("%d (%g %g)\n",i,fout[i][0],fout[i][1]); /* FFTW_BACKWARD: destroys its input array 'y_array' even for out-of-place transforms! */ fftw_execute(bplan); } ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); ierr = VecRestoreArray(z,&z_array);CHKERRQ(ierr); /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */ /*------------------------------------------------------------------*/ s = 1.0/(PetscReal)N; ierr = VecScale(z,s);CHKERRQ(ierr); if (view) {ierr = VecView(x, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);} if (view) {ierr = VecView(z, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);} ierr = VecAXPY(z,-1.0,x);CHKERRQ(ierr); ierr = VecNorm(z,NORM_1,&enorm);CHKERRQ(ierr); if (enorm > 1.e-11){ ierr = PetscPrintf(PETSC_COMM_SELF," Error norm of |x - z| %G\n",enorm);CHKERRQ(ierr); } /* free spaces */ fftw_destroy_plan(fplan); fftw_destroy_plan(bplan); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&y);CHKERRQ(ierr); ierr = VecDestroy(&z);CHKERRQ(ierr); } ierr = PetscRandomDestroy(&rdm);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
/** This routine performs must of the work involved in the analyze modes2d command. A breakdown of what the routine does is as follows \li fftw plans and in / out arrays are initialized as required \li calculate height function is called \li The height function is fourier transformed using the fftw library. Note: argument switch_fluc switch_fluc == 1 for height grid switch_fluc == 0 for thickness */ int modes2d(fftw_complex* modes, int switch_fluc) { /* All these variables need to be static so that the fftw3 plan can be initialised and reused */ static fftw_plan mode_analysis_plan; // height grid /** Input values for the fft */ static double* height_grid; /** Output values for the fft */ static fftw_complex* result; /** Every time a change is made to the grid calculate the fftw plan for the subsequent fft and destroy any existing plans */ if ( mode_grid_changed ) { STAT_TRACE(fprintf(stderr,"%d,initializing fftw for mode analysis \n",this_node)); if ( xdir + ydir + zdir == -3 ) { char *errtxt = runtime_error(128); ERROR_SPRINTF(errtxt,"{092 attempt to perform mode analysis with uninitialized grid} "); return -1; } STAT_TRACE(fprintf(stderr,"%d,destroying old fftw plan \n",this_node)); /* Make sure all memory is free and old plan is destroyed. It's ok to call these functions on uninitialised pointers I think */ fftw_free(result); fftw_free(height_grid); fftw_destroy_plan(mode_analysis_plan); fftw_cleanup(); /* Allocate memory for input and output arrays */ height_grid = (double*) malloc((mode_grid_3d[xdir])*sizeof(double)*mode_grid_3d[ydir]); result = (fftw_complex*) malloc((mode_grid_3d[ydir]/2+1)*(mode_grid_3d[xdir])*sizeof(fftw_complex)); mode_analysis_plan = fftw_plan_dft_r2c_2d(mode_grid_3d[xdir],mode_grid_3d[ydir],height_grid, result,FFTW_ESTIMATE); STAT_TRACE(fprintf(stderr,"%d,created new fftw plan \n",this_node)); mode_grid_changed = 0; } /* Update particles */ updatePartCfg(WITHOUT_BONDS); //Make sure particles are sorted if (!sortPartCfg()) { fprintf(stderr,"%d,could not sort partCfg \n",this_node); return -1; } if ( !calc_fluctuations(height_grid, switch_fluc)) { char *errtxt = runtime_error(128); ERROR_SPRINTF(errtxt,"{034 calculation of height grid failed } "); return -1; } STAT_TRACE(fprintf(stderr,"%d,calling fftw \n",this_node)); fftw_execute(mode_analysis_plan); /* Copy result to modes */ memcpy(modes, result, mode_grid_3d[xdir]*(mode_grid_3d[ydir]/2 + 1)*sizeof(fftw_complex)); STAT_TRACE(fprintf(stderr,"%d,called fftw \n",this_node)); return 1; }
// calculate a degree 3 spline of 2 dim periodic data: // in each cell, data is approximated by degree 3 polynomial // f(x,y) = p00*N0(x)N0(y) + p01*N0(x)N1(y) + p10*N1(x)N0(y) + ... // where Ni(x) = binom(3,i)*x^i(1-x)^(3-i) spline2d_t spline2d_init(double *data, int m, int n) { int i, j, l1, l2, idx; double *d; double complex *kcx, *kcy, *kd, *kp; double complex wx[4], wy[4]; double complex kcxx, kcyy; fftw_plan cxplan, cyplan, dplan, iplan; spline2d_t myspline; myspline.nx = m; myspline.ny = n; myspline.cells = calloc(m*n,sizeof(spline2d_cell_t)); d = calloc(m*(n/2+1)*2,sizeof(double)); kcx = calloc(m,sizeof(double complex)); kcy = calloc(n,sizeof(double complex)); kd = calloc(m*(n/2+1),sizeof(double complex)); kp = calloc(m*(n/2+1),sizeof(double complex)); cxplan = fftw_plan_dft_1d(m, kcx, kcx, FFTW_BACKWARD, FFTW_MEASURE); cyplan = fftw_plan_dft_1d(n, kcy, kcy, FFTW_BACKWARD, FFTW_MEASURE); dplan = fftw_plan_dft_r2c_2d(m, n, d, kd, FFTW_MEASURE); iplan = fftw_plan_dft_c2r_2d(m, n, kp, d, FFTW_MEASURE); // initialize kcx,kcy kcx[0] = 1.0 + 0.0*I; kcx[1] = 4.0 + 0.0*I; kcx[2] = 1.0 + 0.0*I; kcy[0] = 1.0 + 0.0*I; kcy[1] = 4.0 + 0.0*I; kcy[2] = 1.0 + 0.0*I; fftw_execute(cxplan); fftw_execute(cyplan); // calculate kd for(i = 0; i < m; i++) { for(j = 0; j < n; j++) { idx = i*(n/2 + 0)*2 + j; d[idx] = data[i*n + j]; } } fftw_execute(dplan); // kd now holds fft of data // calculate p's for(l1 = 0; l1 < 4; l1++) { for(l2 = 0; l2 < 4; l2++) { for(i = 0; i < m; i++) { for(j = 0; j < n/2 + 1; j++) { idx = i*(n/2 + 1) + j; // weights calcualted here!! wx[0] = 1.0; wx[1] = cexp(2.0*M_PI*I*i/(double)m)*4.0; wx[1]+= cexp(4.0*M_PI*I*i/(double)m)*2.0; wx[2] = cexp(2.0*M_PI*I*i/(double)m)*2.0; wx[2]+= cexp(4.0*M_PI*I*i/(double)m)*4.0; wx[3] = cexp(2.0*M_PI*I*i/(double)m); wy[0] = 1.0; wy[1] = cexp(2.0*M_PI*I*j/(double)n)*4.0; wy[1]+= cexp(4.0*M_PI*I*j/(double)n)*2.0; wy[2] = cexp(2.0*M_PI*I*j/(double)n)*2.0; wy[2]+= cexp(4.0*M_PI*I*j/(double)n)*4.0; wy[3] = cexp(2.0*M_PI*I*j/(double)n); if(l1 == 0 || l1 == 3) { kcxx = 1.0; } else { kcxx = kcx[i]; } if(l2 == 0 || l2 == 3) { kcyy = 1.0; } else { kcyy = kcy[j]; } if(cabs(kcxx*kcyy) > 0) { kp[idx] = kd[idx]*wx[l1]*wy[l2]/(kcxx*kcyy); } } } fftw_execute(iplan); // d holds p[i*n + j][l1*4 + l2] unnormalized for(i = 0; i < m; i++) { for(j = 0; j < n; j++) { myspline.cells[i*n + j].p[l1][l2] = d[i*(n/2+0)*2 + j]/(double)(m*n); } } } } fftw_destroy_plan(cxplan); fftw_destroy_plan(cyplan); fftw_destroy_plan(dplan); fftw_destroy_plan(iplan); free(kcx); free(kcy); free(kd); free(kp); return myspline; }
int main() { // apply FFT to real 2D data. double *in; double *apex; int W = 640; int H = 480; int half_H = (H / 2) + 1; int x; int y; fftw_complex *out; fftw_complex *apex_f; fftw_plan plan_backward; fftw_plan plan_apex; fftw_plan plan_forward; unsigned int seed = 123456789; srand(seed); in = (double *) malloc_check(sizeof(double) * W * H); for(x = 0; x < W; x++) { for(y = 0; y < H; y++) { #if 1 in[x*H+y] = ( double ) rand ( ) / ( RAND_MAX ); #else in[x*H+y] = 0; #endif } } in[(H/2) + (W/2)*H] = 1; in[(H/2)+3 + (W/2 + 3)*H] = 1; in[10 + (20)*H] = 1; in[H-3 + (W-3)*H] = 1; y = W * H; for (x = 0; x < y; x++) { in[x] *= PALETTE_LEN -10; } apex = (double*)malloc_check(sizeof(double) * W * H); double apex_sum = 0; for(x = 0; x < W; x++) { for(y = 0; y < H; y++) { double dist = 0; int xx = x; int yy = y; if (xx >= W/2) xx = W - x; if (yy >= H/2) yy = H - y; dist = sqrt(xx*xx + yy*yy); double v = 8.01 - dist; if (v < 0) v = 0; #if 0 if (x == 2 && y == 1) v = 302.1; #endif #if 0 if (x == W / 2 && y == H / 2) v = 850; #endif #if 0 if (x < W/2 || y > H / 2) v = -v * 1.85; #endif #if 0 if (x == W/3-1 && y == H/3-1) v = 200; if (x == W/3 && y == H/3) v = -200; #endif apex_sum += v; apex[x*H+y] = v; } } double burn = 1.005; double apex_mul = (burn / (W*H)) / apex_sum; printf("%f %f\n", apex_sum, apex_mul); y = W * H; for (x = 0; x < y; x++) { apex[x] *= apex_mul; } apex_f = fftw_malloc(sizeof(fftw_complex) * W * half_H); plan_apex = fftw_plan_dft_r2c_2d(W, H, apex, apex_f, FFTW_ESTIMATE); fftw_execute(plan_apex); out = fftw_malloc(sizeof(fftw_complex) * W * half_H); plan_forward = fftw_plan_dft_r2c_2d(W, H, in, out, FFTW_ESTIMATE); plan_backward = fftw_plan_dft_c2r_2d(W, H, out, in, FFTW_ESTIMATE); int winW = W; int winH = H; SDL_Window *window; window = SDL_CreateWindow("fftw3_test", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, winW, winH, 0); if (!window) { fprintf(stderr, "Unable to set %dx%d video: %s\n", winW, winH, SDL_GetError()); exit(1); } SDL_Renderer *renderer = SDL_CreateRenderer(window, -1, 0); if (!renderer) { fprintf(stderr, "Unable to set %dx%d video: %s\n", winW, winH, SDL_GetError()); exit(1); } SDL_ShowCursor(SDL_DISABLE); SDL_PixelFormat *pixelformat = SDL_AllocFormat(SDL_PIXELFORMAT_RGBA8888); SDL_Texture *texture = SDL_CreateTexture(renderer, pixelformat->format, SDL_TEXTUREACCESS_STREAMING, winW, winH); if (!texture) { fprintf(stderr, "Cannot create texture\n"); exit(1); } #if 0 #define n_palette_points 2 palette_point_t palette_points[n_palette_points] = { { 0., 0, 0, 0 }, { 1., 1, 1, 1 }, }; #else #define n_palette_points 11 palette_point_t palette_points[n_palette_points] = { { 0./6, 1, 1, 1 }, { 0.5/6, 1, .9, 0 }, { 1./6, 1, .1, 1 }, { 1.5/6, 0, 0, 1 }, { 3./6, .5, 0, .7 }, { 3.5/6, 0, 1, .7 }, { 4.5/6, .2, .8, .2 }, { 4.8/6, 0, 0, 1 }, { 5.25/6, .8, .8, 0 }, { 5.55/6, .8, .2, 0.4 }, { 5.85/6, .0,.60,.50 }, }; #endif palette_t palette; make_palette(&palette, PALETTE_LEN, palette_points, n_palette_points, pixelformat); bool running = true; int frame_period = 50; int last_ticks = SDL_GetTicks() - frame_period; Uint32 *winbuf = (Uint32*)malloc_check(winW * winH * sizeof(Uint32)); while (running) { bool do_render = false; int elapsed = SDL_GetTicks() - last_ticks; if (elapsed > frame_period) { last_ticks += frame_period * (elapsed / frame_period); do_render = true; } if (do_render) { render(winbuf, winW, winH, &palette, in, W, H); SDL_UpdateTexture(texture, NULL, winbuf, winW * sizeof(Uint32)); SDL_RenderClear(renderer); SDL_RenderCopy(renderer, texture, NULL, NULL); SDL_RenderPresent(renderer); fftw_execute(plan_forward); #if 1 for (x = 0; x < W; x++) { for (y = 0; y < half_H; y++) { double *o = out[x*half_H + y]; double *af = apex_f[x*half_H + y]; double a, b, c, d; a = o[0]; b = o[1]; c = af[0]; d = af[1]; #if 1 o[0] = (a*c - b*d); o[1] = (b*c + a*d); #else double l = sqrt(c*c + d*d); o[0] *= l; o[1] *= l; #endif } } #endif fftw_execute(plan_backward); } else SDL_Delay(5); SDL_Event event; while (SDL_PollEvent(&event)) { switch (event.type) { case SDL_KEYDOWN: // If escape is pressed, return (and thus, quit) switch(event.key.keysym.sym) { case SDLK_ESCAPE: running = false; break; default: break; } break; case SDL_QUIT: running = false; break; } } } SDL_Quit(); fftw_destroy_plan(plan_apex); fftw_destroy_plan(plan_forward); fftw_destroy_plan(plan_backward); free(in); free(apex); fftw_free(out); fftw_free(apex_f); return 0; }
int main(int argc, char * argv[]) { // main function to set up the components. All of this work would be done in Python apart from // the GenerateModel function // Check we have enough cline-args if(argc!=8) { printf("This program must be run with file command-line arguments:"); printf("eg. ./Inclined_Exponential_Profile <n> <i> <R> <h> <t> <p> <output_name>"); fflush(stdout); return 1; } double sersic_n = strtod(argv[1], NULL); double inc_angle = strtod(argv[2], NULL); double scale_radius = strtod(argv[3], NULL); double scale_height = strtod(argv[4], NULL); double trunc_factor = strtod(argv[5], NULL); double pos_angle = strtod(argv[6], NULL); char * output_name = argv[7]; printf("Sersic Index: %1.1f\n",sersic_n); printf("Inclination angle: %1.4f\n",inc_angle); printf("Scale radius: %3.2f\n",scale_radius); printf("Scale height: %3.2f\n",scale_height); printf("Truncation factor: %3.2f\n",trunc_factor); printf("Position angle: %1.4f\n",pos_angle); fflush(stdout); fftw_complex *pixelaverageft, *convmodelft, *PSFft, *dsmodelft, *dsmodel, *xshiftft, *yshiftft; double *pixelaverage, *image; double rfiducial, sum; float **rmodelft, *resampledmodelft, *thickdiskft; time_t t1, t2; int i, num, x, y, ip; int p; int hos; fftw_plan pixavplan, invplan; /* ******************************* */ // step one: // make a circular galaxy surface brighness distribution and r2c FT it // this step is done once only at the start of the code, for each Sersic index // that we wish to simulate int mdim, hmdim, nsersic; double *model; fftw_complex *modelft; fftw_plan bigmodel; // set the dimension of the large input circular galaxy. Note that this must be a large // value e.g. 16384 in order to avoid aliasing mdim = 16384; hmdim = 1 + mdim/2; model = (double*)calloc(mdim*mdim, sizeof(double)); modelft = (fftw_complex*)fftw_malloc( mdim*hmdim*sizeof(fftw_complex) ); bigmodel = fftw_plan_dft_r2c_2d(mdim, mdim, model, modelft, FFTW_ESTIMATE); // make a model with a nominal (fiducial) scalelength rfiducial = 30.; // make a set of models with different Sersic index values nsersic = 1; // store the real part of their fourier transform rmodelft = (float**)calloc(nsersic, sizeof(float*)); for (i=0; i<nsersic; i++) { // allocate memory for this model FT rmodelft[i] = (float*)calloc(hmdim, sizeof(float)); // make a large circular galaxy profile makecirculargalaxy(sersic_n, rfiducial, trunc_factor, mdim, model); // FFT fftw_execute(bigmodel); // convert FT complex to float Hankel and store with separate index for each model component makehankel(modelft, hmdim, rmodelft[i]); } // free memory not needed free(model); free(modelft); /* *********************** */ // set the galaxy parameters - these would be set by the MCMC routine double overgalsize = scale_radius*oversampling; // major axis scalelength in pixels in oversampled arrays if (overgalsize >= rfiducial/sqrt(2.)) { fflush(stdout); fprintf(stderr," error in specifying galaxy size, must be smaller than fiducial size/sqrt(2)\n"); exit(0); } double overscaleheight = scale_height*oversampling; double xpos = 0.*oversampling; // x position offset in oversampled pixels double ypos = 0.*oversampling; // y position offset in oversampled pixels // allocate oversampled arrays // in future, make a selection of sizes to be chosen appropriately // to optimise the speed for small galaxy model generation int idim, odim, hdim; odim = 2*(int)(32.*scale_radius/2.0*pow(sersic_n,2)) ; idim = odim*oversampling; // size of oversampled galaxy image hdim = 1 + idim/2; // x-axis dimension of FFTW hermitian array // odim = idim; // allocate memory // pixel average function and its FT pixelaverage = (double*)calloc( idim*idim,sizeof(double) ); pixelaverageft = (fftw_complex*)fftw_malloc( idim*hdim*sizeof(fftw_complex) ); // x,y shift FTs yshiftft = (fftw_complex*)fftw_malloc( idim*sizeof(fftw_complex) ); xshiftft = (fftw_complex*)fftw_malloc( hdim*sizeof(fftw_complex) ); // r2c FFT of convolved model, stored as float resampledmodelft = (float*)calloc( idim*hdim,sizeof(float) ); // r2c FFT of PSF, stored as complex PSFft = (fftw_complex*)calloc( idim*hdim,sizeof(fftw_complex) ); // r2c FFT of thick disk convolving function, stored as complex thickdiskft = (float*)calloc( idim*hdim,sizeof(float) ); // r2c FFT of oversampled, convolved model, stored as complex convmodelft = (fftw_complex*)fftw_malloc( idim*hdim*sizeof(fftw_complex) ); // full FFT of downsampled model dsmodelft = (fftw_complex*)fftw_malloc( odim*odim*sizeof(fftw_complex) ); // complex downsampled image domain model dsmodel = (fftw_complex*)calloc( odim*odim,sizeof(fftw_complex) ); //dsmodel = (double*)calloc( odim*odim,sizeof(double) ); // real part of downsampeld image domain model image = (double*)calloc( odim*odim,sizeof(double) ); // complex downsampled image domain model //fftw_complex* rsmodel = (fftw_complex*)calloc( idim*idim,sizeof(fftw_complex) ); //double* rsimage = (double*)calloc( idim*idim,sizeof(double) ); // calculate fftw plans // pixelaverage plan pixavplan = fftw_plan_dft_r2c_2d(idim, idim, pixelaverage, pixelaverageft, FFTW_ESTIMATE); // inverse downsampled image plan invplan = fftw_plan_dft_2d(odim, odim, dsmodelft, dsmodel, FFTW_BACKWARD, FFTW_MEASURE); //invplan = fftw_plan_dft_c2r_2d(odim, odim, convmodelft, dsmodel, FFTW_ESTIMATE); //fftw_plan invplan2 = fftw_plan_dft_2d(odim, odim, convmodelft, rsmodel, FFTW_BACKWARD, FFTW_ESTIMATE); // fill up pixelaverage function // this function would also only be calculated once at the start of the galaxy measurement // set all values to zero for (ip=0; ip<idim*idim; ip++) { pixelaverage[ip]=0.; } // set a central box to a tophat function which sums to unity // set it to be centred in the array so we don't need to swap quadrants at the end hos = oversampling/2; int cen = idim/2; for (y=cen-hos; y<=cen+hos; y++) { for (x=cen-hos; x<=cen+hos; x++) { ip = y*idim + x; pixelaverage[ip] = 1./(double)(oversampling*oversampling); } } // create FT of pixelaverage fftw_execute(pixavplan); // create the FT of the PSF. For now, just set the PSF to be a delta function in image domain // i.e. a uniform function in the Fourier domain // this function would be filled once for each galaxy component for (i=0; i<idim*hdim; i++) PSFft[i] = pixelaverageft[i]; // choose which sersic index to make int sersicindex = 0; // this will make value of sersic index = 1 double sini = sin(inc_angle); double sini_squared = sini*sini; double emod; if(sini_squared==0) { emod = 0.; } else { emod = (2. - sini_squared + 2*sqrt(1-sini_squared))/sini_squared; } double e1 = emod * cos(2*pos_angle); double e2 = emod * sin(2*pos_angle); // optional: run a timing test using a loop by setting a high value for num num = 1; t1 = time(NULL); int odimsq = odim*odim; for (i=0; i<num; i++) { /* ********************************************** */ // this is the call to the C function that // generates a downsampled FT of galaxy model GenerateModel(e1, e2, overgalsize, overscaleheight, xpos, ypos, rfiducial, odim, idim, mdim, rmodelft[sersicindex], resampledmodelft, PSFft, thickdiskft, xshiftft, yshiftft, convmodelft, dsmodelft); /* ********************************************** */ /* the following sections are all back in the Python function. in principle we could do the iFFT step inside the C function but this probably does not improve the speed and would mean also passing an fftw_plan into the C function */ // make downsampled image inverse FFT fftw_execute(invplan); // take the real part (discard the imaginary part) and scale for (p=0; p<odimsq; p++) { image[p] = creal(dsmodel[p])/(odim*odim); } // end of timing loop } t2 = time(NULL); if (num>1) { printf(" time %g \n",difftime(t2,t1)/num); } sum = 0.; // take the real part (discard the imaginary part) and test the normalisation for (p=0; p<odimsq; p++) { sum += image[p]; } printf(" sum %g \n",sum); // write out final output image to fits file remove(output_name); write2Dfits(output_name,image,odim,odim); exit(0); }