int gmx_fft_init_2d(gmx_fft_t * pfft, int nx, int ny, enum gmx_fft_flag flags) { gmx_fft_t fft; int info = 0; acmlComplex* comm = NULL; int commSize = 0; if (pfft == NULL) { gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer."); return EINVAL; } *pfft = NULL; if ( (fft = malloc(sizeof(struct gmx_fft))) == NULL) { return ENOMEM; } // Single precision requires nx*ny+5*(nx+ny) // Double precision requires nx*ny+3*(nx+ny) if (sizeof( acmlComplex ) == 16) { commSize = (nx*ny+3*(nx+ny)+200)*sizeof( acmlComplex ); } else { commSize = (nx*ny+5*(nx+ny)+200)*sizeof( acmlComplex ); } // Allocate communication work array if ( (comm = (acmlComplex*)malloc( commSize ) ) == NULL) { return ENOMEM; } // Initialize communication work array ACML_FFT2DX( 100, 1.0f, TRUE, TRUE, nx, ny, NULL, 1, nx, NULL, 1, nx, (acmlComplex*)comm, &info ); if (info != 0) { gmx_fatal(FARGS, "Error initializing ACML FFT; status=%d", info); gmx_fft_destroy( fft ); return info; } fft->ndim = 2; fft->nx = nx; fft->ny = ny; fft->real_fft = 0; fft->comm[0] = comm; *pfft = fft; return 0; }
int gmx_fft_init_1d(gmx_fft_t * pfft, int nx, enum gmx_fft_flag flags) { int i,j; gmx_fft_t fft; int fftw_flags; /* FFTW2 is slow to measure, so we do not use it */ /* If you change this, add an #ifndef for GMX_DISABLE_FFTW_MEASURE around it! */ fftw_flags = FFTW_ESTIMATE; if(pfft==NULL) { gmx_fatal(FARGS,"Invalid opaque FFT datatype pointer."); return EINVAL; } *pfft = NULL; if( (fft = (gmx_fft_t)malloc(sizeof(struct gmx_fft))) == NULL) { return ENOMEM; } fft->single[0][0] = fftw_create_plan(nx,FFTW_BACKWARD,FFTW_OUT_OF_PLACE|fftw_flags); fft->single[0][1] = fftw_create_plan(nx,FFTW_FORWARD,FFTW_OUT_OF_PLACE|fftw_flags); fft->single[1][0] = fftw_create_plan(nx,FFTW_BACKWARD,FFTW_IN_PLACE|fftw_flags); fft->single[1][1] = fftw_create_plan(nx,FFTW_FORWARD,FFTW_IN_PLACE|fftw_flags); fft->multi[0][0] = NULL; fft->multi[0][1] = NULL; fft->multi[1][0] = NULL; fft->multi[1][1] = NULL; for(i=0;i<2;i++) { for(j=0;j<2;j++) { if(fft->single[i][j] == NULL) { gmx_fatal(FARGS,"Error initializing FFTW2 plan."); gmx_fft_destroy(fft); return -1; } } } /* No workspace needed for complex-to-complex FFTs */ fft->work = NULL; fft->ndim = 1; fft->nx = nx; *pfft = fft; return 0; }
void cross_corr(int n, real f[], real g[], real corr[]) { gmx_fft_t fft; gmx_fft_init_1d(&fft, zeroPaddingSize(n), GMX_FFT_FLAG_CONSERVATIVE); cross_corr_low( n, f, g, corr, fft); gmx_fft_destroy(fft); gmx_fft_cleanup(); }
int gmx_parallel_3dfft_destroy(gmx_parallel_3dfft_t pfft_setup) { gmx_fft_destroy(pfft_setup->fft_x); gmx_fft_destroy(pfft_setup->fft_yz); free(pfft_setup->slab2grid_x); free(pfft_setup->slab2grid_y); if (pfft_setup->aav) { free(pfft_setup->aav->sdisps); free(pfft_setup->aav->scounts); free(pfft_setup->aav->rdisps); free(pfft_setup->aav->rcounts); free(pfft_setup->aav); } free(pfft_setup->work_rawptr); free(pfft_setup->work2_rawptr); return 0; }
static void calc_spectrum(int n, real c[], real dt, const char *fn, gmx_output_env_t *oenv, gmx_bool bRecip) { FILE *fp; gmx_fft_t fft; int i, status; real *data; real nu, omega, recip_fac; snew(data, n*2); for (i = 0; (i < n); i++) { data[i] = c[i]; } if ((status = gmx_fft_init_1d_real(&fft, n, GMX_FFT_FLAG_NONE)) != 0) { gmx_fatal(FARGS, "Invalid fft return status %d", status); } if ((status = gmx_fft_1d_real(fft, GMX_FFT_REAL_TO_COMPLEX, data, data)) != 0) { gmx_fatal(FARGS, "Invalid fft return status %d", status); } fp = xvgropen(fn, "Vibrational Power Spectrum", bRecip ? "\\f{12}w\\f{4} (cm\\S-1\\N)" : "\\f{12}n\\f{4} (ps\\S-1\\N)", "a.u.", oenv); /* This is difficult. * The length of the ACF is dt (as passed to this routine). * We pass the vacf with N time steps from 0 to dt. * That means that after FFT we have lowest frequency = 1/dt * then 1/(2 dt) etc. (this is the X-axis of the data after FFT). * To convert to 1/cm we need to have to realize that * E = hbar w = h nu = h c/lambda. We want to have reciprokal cm * on the x-axis, that is 1/lambda, so we then have * 1/lambda = nu/c. Since nu has units of 1/ps and c has gromacs units * of nm/ps, we need to multiply by 1e7. * The timestep between saving the trajectory is * 1e7 is to convert nanometer to cm */ recip_fac = bRecip ? (1e7/SPEED_OF_LIGHT) : 1.0; for (i = 0; (i < n); i += 2) { nu = i/(2*dt); omega = nu*recip_fac; /* Computing the square magnitude of a complex number, since this is a power * spectrum. */ fprintf(fp, "%10g %10g\n", omega, gmx::square(data[i])+gmx::square(data[i+1])); } xvgrclose(fp); gmx_fft_destroy(fft); sfree(data); }
void gmx_fft_destroy(gmx_fft_t fft) { if (fft != nullptr) { free(fft->work); if (fft->next != nullptr) { gmx_fft_destroy(fft->next); } free(fft); } }
void many_cross_corr(int nFunc, int * nData, real ** f, real ** g, real ** corr) { #pragma omp parallel //gmx_fft_t is not thread safe, so structure are allocated per thread. { int i; #pragma omp for for (i = 0; i < nFunc; i++) { gmx_fft_t fft; gmx_fft_init_1d(&fft, zeroPaddingSize(nData[i]), GMX_FFT_FLAG_CONSERVATIVE); cross_corr_low( nData[i], f[i], g[i], corr[i], fft); gmx_fft_destroy(fft); } } gmx_fft_cleanup(); }
void do_four(const char *fn, const char *cn, int nx, real x[], real dy[], real eps0, real epsRF, const output_env_t oenv) { FILE *fp, *cp; t_complex *tmp, gw, hw, kw; int i, nnx, nxsav; real fac, nu, dt, *ptr, maxeps, numax; gmx_fft_t fft; int fftcode; nxsav = nx; /*while ((dy[nx-1] == 0.0) && (nx > 0)) nx--;*/ if (nx == 0) { gmx_fatal(FARGS, "Empty dataset in %s, line %d!", __FILE__, __LINE__); } nnx = 1; while (nnx < 2*nx) { nnx *= 2; } snew(tmp, 2*nnx); printf("Doing FFT of %d points\n", nnx); for (i = 0; (i < nx); i++) { tmp[i].re = dy[i]; } if ((fftcode = gmx_fft_init_1d_real(&fft, nnx, GMX_FFT_FLAG_NONE)) != 0) { gmx_fatal(FARGS, "gmx_fft_init_1d_real returned %d", fftcode); } if ((fftcode = gmx_fft_1d_real(fft, GMX_FFT_COMPLEX_TO_REAL, (void *)tmp, (void *)tmp)) != 0) { gmx_fatal(FARGS, "gmx_fft_1d_real returned %d", fftcode); } gmx_fft_destroy(fft); dt = x[1]-x[0]; if (epsRF == 0) { fac = (eps0-1)/tmp[0].re; } else { fac = ((eps0-1)/(2*epsRF+eps0))/tmp[0].re; } fp = xvgropen(fn, "Epsilon(\\8w\\4)", "Freq. (GHz)", "eps", oenv); cp = xvgropen(cn, "Cole-Cole plot", "Eps'", "Eps''", oenv); maxeps = 0; numax = 0; for (i = 0; (i < nxsav); i++) { if (epsRF == 0) { kw.re = 1+fac*tmp[i].re; kw.im = 1+fac*tmp[i].im; } else { gw = rcmul(fac, tmp[i]); hw = rcmul(2*epsRF, gw); hw.re += 1.0; gw.re = 1.0 - gw.re; gw.im = -gw.im; kw = cdiv(hw, gw); } kw.im *= -1; nu = (i+1)*1000.0/(nnx*dt); if (kw.im > maxeps) { maxeps = kw.im; numax = nu; } fprintf(fp, "%10.5e %10.5e %10.5e\n", nu, kw.re, kw.im); fprintf(cp, "%10.5e %10.5e\n", kw.re, kw.im); } printf("MAXEPS = %10.5e at frequency %10.5e GHz (tauD = %8.1f ps)\n", maxeps, numax, 1000/(2*M_PI*numax)); gmx_ffclose(fp); gmx_ffclose(cp); sfree(tmp); }
int gmx_fft_init_3d_real(gmx_fft_t * pfft, int nx, int ny, int nz, enum gmx_fft_flag flags) { int i,j; gmx_fft_t fft; int fftw_flags; /* FFTW2 is slow to measure, so we do not use it */ /* If you change this, add an #ifndef for GMX_DISABLE_FFTW_MEASURE around it! */ fftw_flags = FFTW_ESTIMATE; if(pfft==NULL) { gmx_fatal(FARGS,"Invalid opaque FFT datatype pointer."); return EINVAL; } *pfft = NULL; if( (fft = (gmx_fft_t)malloc(sizeof(struct gmx_fft))) == NULL) { return ENOMEM; } fft->single[0][0] = NULL; fft->single[0][1] = NULL; fft->single[1][0] = NULL; fft->single[1][1] = NULL; fft->multi[0][0] = rfftw3d_create_plan(nx,ny,nz,FFTW_COMPLEX_TO_REAL,FFTW_OUT_OF_PLACE|fftw_flags); fft->multi[0][1] = rfftw3d_create_plan(nx,ny,nz,FFTW_REAL_TO_COMPLEX,FFTW_OUT_OF_PLACE|fftw_flags); fft->multi[1][0] = rfftw3d_create_plan(nx,ny,nz,FFTW_COMPLEX_TO_REAL,FFTW_IN_PLACE|fftw_flags); fft->multi[1][1] = rfftw3d_create_plan(nx,ny,nz,FFTW_REAL_TO_COMPLEX,FFTW_IN_PLACE|fftw_flags); for(i=0;i<2;i++) { for(j=0;j<2;j++) { if(fft->multi[i][j] == NULL) { gmx_fatal(FARGS,"Error initializing FFTW2 plan."); gmx_fft_destroy(fft); return -1; } } } /* FFTW2 overwrites the input when doing out-of-place complex-to-real FFTs. * This is not acceptable for the Gromacs interface, so we define a * work array and copy the data there before doing complex-to-real FFTs. */ fft->work = (real *)malloc(sizeof(real)*( nx*ny*(nz/2 + 1)*2) ); if(fft->work == NULL) { gmx_fatal(FARGS,"Cannot allocate complex-to-real FFT workspace."); gmx_fft_destroy(fft); return ENOMEM; } fft->ndim = 3; fft->nx = nx; fft->ny = ny; fft->nz = nz; *pfft = fft; return 0; }
int gmx_fft_init_1d(gmx_fft_t * pfft, int nx, enum gmx_fft_flag flags) { gmx_fft_t fft; int d; int status; if(pfft==NULL) { gmx_fatal(FARGS,"Invalid opaque FFT datatype pointer."); return EINVAL; } *pfft = NULL; if( (fft = malloc(sizeof(struct gmx_fft))) == NULL) { return ENOMEM; } /* Mark all handles invalid */ for(d=0;d<3;d++) { fft->inplace[d] = fft->ooplace[d] = NULL; } fft->ooplace[3] = NULL; status = DftiCreateDescriptor(&fft->inplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)nx); if( status == 0 ) status = DftiSetValue(fft->inplace[0],DFTI_PLACEMENT,DFTI_INPLACE); if( status == 0 ) status = DftiCommitDescriptor(fft->inplace[0]); if( status == 0 ) status = DftiCreateDescriptor(&fft->ooplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)nx); if( status == 0) DftiSetValue(fft->ooplace[0],DFTI_PLACEMENT,DFTI_NOT_INPLACE); if( status == 0) DftiCommitDescriptor(fft->ooplace[0]); if( status != 0 ) { gmx_fatal(FARGS,"Error initializing Intel MKL FFT; status=%d",status); gmx_fft_destroy(fft); return status; } fft->ndim = 1; fft->nx = nx; fft->real_fft = 0; fft->work = NULL; *pfft = fft; return 0; }
int gmx_fft_init_3d_real(gmx_fft_t * pfft, int nx, int ny, int nz, enum gmx_fft_flag flags) { gmx_fft_t fft; int d; int status; MKL_LONG stride[2]; int nzc; if(pfft==NULL) { gmx_fatal(FARGS,"Invalid opaque FFT datatype pointer."); return EINVAL; } *pfft = NULL; nzc = (nz/2 + 1); if( (fft = malloc(sizeof(struct gmx_fft))) == NULL) { return ENOMEM; } /* Mark all handles invalid */ for(d=0;d<3;d++) { fft->inplace[d] = fft->ooplace[d] = NULL; } fft->ooplace[3] = NULL; /* Roll our own 3D real transform using multiple transforms in MKL, * since the current MKL versions does not support our storage format * or 3D real transforms. */ /* In-place X FFT. * ny*nzc complex-to-complex transforms, length nx * transform distance: 1 * element strides: ny*nzc */ status = DftiCreateDescriptor(&fft->inplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)nx); if ( status == 0) { stride[0] = 0; stride[1] = ny*nzc; status = (DftiSetValue(fft->inplace[0],DFTI_PLACEMENT,DFTI_INPLACE) || DftiSetValue(fft->inplace[0],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)ny*nzc) || DftiSetValue(fft->inplace[0],DFTI_INPUT_DISTANCE,1) || DftiSetValue(fft->inplace[0],DFTI_INPUT_STRIDES,stride) || DftiSetValue(fft->inplace[0],DFTI_OUTPUT_DISTANCE,1) || DftiSetValue(fft->inplace[0],DFTI_OUTPUT_STRIDES,stride) || DftiCommitDescriptor(fft->inplace[0])); } /* Out-of-place X FFT: * ny*nzc complex-to-complex transforms, length nx * transform distance: 1 * element strides: ny*nzc */ if( status == 0 ) status = DftiCreateDescriptor(&fft->ooplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)nx); if( status == 0 ) { stride[0] = 0; stride[1] = ny*nzc; status = (DftiSetValue(fft->ooplace[0],DFTI_PLACEMENT,DFTI_NOT_INPLACE) || DftiSetValue(fft->ooplace[0],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)ny*nzc) || DftiSetValue(fft->ooplace[0],DFTI_INPUT_DISTANCE,1) || DftiSetValue(fft->ooplace[0],DFTI_INPUT_STRIDES,stride) || DftiSetValue(fft->ooplace[0],DFTI_OUTPUT_DISTANCE,1) || DftiSetValue(fft->ooplace[0],DFTI_OUTPUT_STRIDES,stride) || DftiCommitDescriptor(fft->ooplace[0])); } /* In-place Y FFT. * We cannot do all NX*NZC transforms at once, so define a handle to do * NZC transforms, and then execute it NX times. * nzc complex-to-complex transforms, length ny * transform distance: 1 * element strides: nzc */ if( status == 0 ) status = DftiCreateDescriptor(&fft->inplace[1],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)ny); if( status == 0 ) { stride[0] = 0; stride[1] = nzc; status = (DftiSetValue(fft->inplace[1],DFTI_PLACEMENT,DFTI_INPLACE) || DftiSetValue(fft->inplace[1],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nzc) || DftiSetValue(fft->inplace[1],DFTI_INPUT_DISTANCE,1) || DftiSetValue(fft->inplace[1],DFTI_INPUT_STRIDES,stride) || DftiSetValue(fft->inplace[1],DFTI_OUTPUT_DISTANCE,1) || DftiSetValue(fft->inplace[1],DFTI_OUTPUT_STRIDES,stride) || DftiCommitDescriptor(fft->inplace[1])); } /* Out-of-place Y FFT: * We cannot do all NX*NZC transforms at once, so define a handle to do * NZC transforms, and then execute it NX times. * nzc complex-to-complex transforms, length ny * transform distance: 1 * element strides: nzc */ if( status == 0 ) status = DftiCreateDescriptor(&fft->ooplace[1],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)ny); if( status == 0 ) { stride[0] = 0; stride[1] = nzc; status = (DftiSetValue(fft->ooplace[1],DFTI_PLACEMENT,DFTI_NOT_INPLACE) || DftiSetValue(fft->ooplace[1],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nzc) || DftiSetValue(fft->ooplace[1],DFTI_INPUT_DISTANCE,1) || DftiSetValue(fft->ooplace[1],DFTI_INPUT_STRIDES,stride) || DftiSetValue(fft->ooplace[1],DFTI_OUTPUT_DISTANCE,1) || DftiSetValue(fft->ooplace[1],DFTI_OUTPUT_STRIDES,stride) || DftiCommitDescriptor(fft->ooplace[1])); } /* In-place Z FFT: * nx*ny real-to-complex transforms, length nz * transform distance: nzc*2 -> nzc*2 * element strides: 1 */ if( status == 0 ) status = DftiCreateDescriptor(&fft->inplace[2],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)nz); if( status == 0 ) { stride[0] = 0; stride[1] = 1; status = (DftiSetValue(fft->inplace[2],DFTI_PLACEMENT,DFTI_INPLACE) || DftiSetValue(fft->inplace[2],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nx*ny) || DftiSetValue(fft->inplace[2],DFTI_INPUT_DISTANCE,(MKL_LONG)nzc*2) || DftiSetValue(fft->inplace[2],DFTI_INPUT_STRIDES,stride) || DftiSetValue(fft->inplace[2],DFTI_OUTPUT_DISTANCE,(MKL_LONG)nzc*2) || DftiSetValue(fft->inplace[2],DFTI_OUTPUT_STRIDES,stride) || DftiCommitDescriptor(fft->inplace[2])); } /* Out-of-place real-to-complex (affects distance) Z FFT: * nx*ny real-to-complex transforms, length nz * transform distance: nz -> nzc*2 * element STRIDES: 1 */ if( status == 0 ) status = DftiCreateDescriptor(&fft->ooplace[2],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)nz); if( status == 0 ) { stride[0] = 0; stride[1] = 1; status = (DftiSetValue(fft->ooplace[2],DFTI_PLACEMENT,DFTI_NOT_INPLACE) || DftiSetValue(fft->ooplace[2],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nx*ny) || DftiSetValue(fft->ooplace[2],DFTI_INPUT_DISTANCE,(MKL_LONG)nz) || DftiSetValue(fft->ooplace[2],DFTI_INPUT_STRIDES,stride) || DftiSetValue(fft->ooplace[2],DFTI_OUTPUT_DISTANCE,(MKL_LONG)nzc*2) || DftiSetValue(fft->ooplace[2],DFTI_OUTPUT_STRIDES,stride) || DftiCommitDescriptor(fft->ooplace[2])); } /* Out-of-place complex-to-real (affects distance) Z FFT: * nx*ny real-to-complex transforms, length nz * transform distance: nzc*2 -> nz * element STRIDES: 1 */ if( status == 0 ) status = DftiCreateDescriptor(&fft->ooplace[3],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)nz); if( status == 0 ) { stride[0] = 0; stride[1] = 1; status = (DftiSetValue(fft->ooplace[3],DFTI_PLACEMENT,DFTI_NOT_INPLACE) || DftiSetValue(fft->ooplace[3],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nx*ny) || DftiSetValue(fft->ooplace[3],DFTI_INPUT_DISTANCE,(MKL_LONG)nzc*2) || DftiSetValue(fft->ooplace[3],DFTI_INPUT_STRIDES,stride) || DftiSetValue(fft->ooplace[3],DFTI_OUTPUT_DISTANCE,(MKL_LONG)nz) || DftiSetValue(fft->ooplace[3],DFTI_OUTPUT_STRIDES,stride) || DftiCommitDescriptor(fft->ooplace[3])); } if ( status == 0 ) { if ((fft->work = malloc(sizeof(t_complex)*(nx*ny*(nz/2+1)))) == NULL) { status = ENOMEM; } } if( status != 0 ) { gmx_fatal(FARGS,"Error initializing Intel MKL FFT; status=%d",status); gmx_fft_destroy(fft); return status; } fft->ndim = 3; fft->nx = nx; fft->ny = ny; fft->nz = nz; fft->real_fft = 1; *pfft = fft; return 0; }
int gmx_fft_init_1d_real(gmx_fft_t * pfft, int nx, enum gmx_fft_flag flags) { gmx_fft_t fft; int info = 0; real * commRC = NULL; real * commCR = NULL; int commSize = 0; if (pfft == NULL) { gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer."); return EINVAL; } *pfft = NULL; if ( (fft = malloc(sizeof(struct gmx_fft))) == NULL) { return ENOMEM; } commSize = (3*nx+100)*sizeof( float ); // Allocate communication work array, r2c if ( (commRC = (real*)malloc( commSize ) ) == NULL) { return ENOMEM; } // Allocate communication work array, c2r if ( (commCR = (real*)malloc( commSize ) ) == NULL) { return ENOMEM; } // Initialize communication work array ACML_RCFFT1D( 100, nx, NULL, (real*)commRC, &info ); if (info != 0) { gmx_fatal(FARGS, "Error initializing ACML FFT; status=%d", info); gmx_fft_destroy( fft ); return info; } // Initialize communication work array ACML_CRFFT1D( 100, nx, NULL, (real*)commCR, &info ); if (info != 0) { gmx_fatal(FARGS, "Error initializing ACML FFT; status=%d", info); gmx_fft_destroy( fft ); return info; } /* Allocate scratch work array that ACML uses to splat from hermitian complex format to * full complex format */ if ( (fft->realScratch = (acmlComplex*)malloc( nx*sizeof( acmlComplex ) ) ) == NULL) { return ENOMEM; } fft->ndim = 1; fft->nx = nx; fft->real_fft = 1; fft->comm[0] = commRC; fft->comm[1] = commCR; *pfft = fft; return 0; }
int many_auto_correl(int nfunc, int ndata, int nfft, real **c) { #pragma omp parallel { try { typedef real complex[2]; int i, j; gmx_fft_t fft1; complex *in, *out; int i0, i1; int nthreads, thread_id; nthreads = gmx_omp_get_max_threads(); thread_id = gmx_omp_get_thread_num(); if ((0 == thread_id)) { // fprintf(stderr, "There are %d threads for correlation functions\n", nthreads); } i0 = thread_id*nfunc/nthreads; i1 = std::min(nfunc, (thread_id+1)*nfunc/nthreads); gmx_fft_init_1d(&fft1, nfft, GMX_FFT_FLAG_CONSERVATIVE); /* Allocate temporary arrays */ snew(in, nfft); snew(out, nfft); for (i = i0; (i < i1); i++) { for (j = 0; j < ndata; j++) { in[j][0] = c[i][j]; in[j][1] = 0; } for (; (j < nfft); j++) { in[j][0] = in[j][1] = 0; } gmx_fft_1d(fft1, GMX_FFT_BACKWARD, (void *)in, (void *)out); for (j = 0; j < nfft; j++) { in[j][0] = (out[j][0]*out[j][0] + out[j][1]*out[j][1])/nfft; in[j][1] = 0; } for (; (j < nfft); j++) { in[j][0] = in[j][1] = 0; } gmx_fft_1d(fft1, GMX_FFT_FORWARD, (void *)in, (void *)out); for (j = 0; (j < nfft); j++) { c[i][j] = out[j][0]/ndata; } } /* Free the memory */ gmx_fft_destroy(fft1); sfree(in); sfree(out); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } // gmx_fft_cleanup(); return 0; }
int gmx_fft_init_3d_real(gmx_fft_t * pfft, int nx, int ny, int nz, enum gmx_fft_flag flags) { gmx_fft_t fft; int info = 0; acmlComplex* commX = NULL; acmlComplex* commY = NULL; real* commRC = NULL; real* commCR = NULL; int commSize = 0; if (pfft == NULL) { gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer."); return EINVAL; } *pfft = NULL; /* nzc = (nz/2 + 1); */ if ( (fft = malloc(sizeof(struct gmx_fft))) == NULL) { return ENOMEM; } /* Roll our own 3D real transform using multiple transforms in ACML, * since the current ACML versions does not support 2D * or 3D real transforms. */ /* In-place X FFT. * ny*nz complex-to-complex transforms, length nx * transform distance: 1 * element strides: ny*nz */ /* Single precision requires 5*nx+100 Double precision requires 3*nx+100 */ if (sizeof( acmlComplex ) == 16) { commSize = (3*nx+100)*sizeof( acmlComplex ); } else { commSize = (5*nx+100)*sizeof( acmlComplex ); } /* Allocate communication work array */ if ( (commX = (acmlComplex*)malloc( commSize ) ) == NULL) { return ENOMEM; } /* Initialize communication work array */ ACML_FFT1DMX( 100, 1.0f, TRUE, ny*nz, nx, NULL, ny*nz, 1, NULL, ny*nz, 1, commX, &info ); if (info != 0) { gmx_fatal(FARGS, "Error initializing ACML FFT; status=%d", info); gmx_fft_destroy( fft ); return info; } /* In-place Y FFT. * We cannot do all NX*NZ transforms at once, so define a handle to do * NZ transforms, and then execute it NX times. * nz complex-to-complex transforms, length ny * transform distance: 1 * element strides: nz */ /* Single precision requires 5*nx+100 Double precision requires 3*nx+100 */ if (sizeof( acmlComplex ) == 16) { commSize = (3*ny+100)*sizeof( acmlComplex ); } else { commSize = (5*ny+100)*sizeof( acmlComplex ); } /* Allocate communication work array */ if ( (commY = (acmlComplex*)malloc( commSize ) ) == NULL) { return ENOMEM; } /* Initialize communication work array */ /* We want to do multiple 1D FFT's in z-y plane, so we have to loop over x * dimension recalculating z-y plane for each slice. */ ACML_FFT1DMX( 100, 1.0f, TRUE, nz, ny, NULL, nz, 1, NULL, nz, 1, commY, &info ); if (info != 0) { gmx_fatal(FARGS, "Error initializing ACML FFT; status=%d", info); gmx_fft_destroy( fft ); return info; } /* In-place Z FFT: * nx*ny real-to-complex transforms, length nz * transform distance: nzc*2 -> nzc*2 * element strides: 1 */ commSize = (3*nz+100)*sizeof( real ); /* Allocate communication work array */ if ( (commRC = (real*)malloc( commSize ) ) == NULL) { return ENOMEM; } /* TODO: Is there no MODE or PLAN for multiple hermetian sequences? */ // Initialize communication work array ACML_RCFFT1D( 100, nz, NULL, commRC, &info ); if (info != 0) { gmx_fatal(FARGS, "Error initializing ACML FFT; status=%d", info); gmx_fft_destroy( fft ); return info; } /* Out-of-place complex-to-real (affects distance) Z FFT: * nx*ny real-to-complex transforms, length nz * transform distance: nzc*2 -> nz * element STRIDES: 1 */ commSize = (3*nz+100)*sizeof( real ); /* Allocate communication work array */ if ( (commCR = (real*)malloc( commSize ) ) == NULL) { return ENOMEM; } // Initialize communication work array ACML_CRFFT1D( 100, nz, NULL, commCR, &info ); if (info != 0) { gmx_fatal(FARGS, "Error initializing ACML FFT; status=%d", info); gmx_fft_destroy( fft ); return info; } /* Allocate scratch work array that ACML uses to splat from hermitian complex format to * full complex format */ if ( (fft->realScratch = (acmlComplex*)malloc( (nx*ny*nz)*sizeof( acmlComplex ) ) ) == NULL) { return ENOMEM; } fft->ndim = 3; fft->nx = nx; fft->ny = ny; fft->nz = nz; fft->real_fft = 1; fft->comm[0] = commX; fft->comm[1] = commY; fft->comm[2] = commRC; fft->comm[3] = commCR; *pfft = fft; return 0; }
int gmx_fft_init_2d_real(gmx_fft_t * pfft, int nx, int ny, enum gmx_fft_flag flags) { gmx_fft_t fft; int info = 0; acmlComplex* comm = NULL; real* commRC = NULL; real* commCR = NULL; int commSize = 0; int nyc = 0; if (pfft == NULL) { gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer."); return EINVAL; } *pfft = NULL; if ( (fft = malloc(sizeof(struct gmx_fft))) == NULL) { return ENOMEM; } nyc = (ny/2 + 1); /* Roll our own 2D real transform using multiple transforms in ACML, * since the current ACML versions does not support our storage format, * and all but the most recent don't even have 2D real FFTs. */ // Single precision requires 5*nx+100 // Double precision requires 3*nx+100 if (sizeof( acmlComplex ) == 16) { commSize = (3*nx+100)*sizeof( acmlComplex ); } else { commSize = (5*nx+100)*sizeof( acmlComplex ); } // Allocate communication work array if ( (comm = (acmlComplex*)malloc( commSize ) ) == NULL) { return ENOMEM; } // Initialize communication work array ACML_FFT1DMX( 100, 1.0f, FALSE, nyc, nx, NULL, nyc, 1, NULL, nyc, 1, comm, &info ); if (info != 0) { gmx_fatal(FARGS, "Error initializing ACML FFT; status=%d", info); gmx_fft_destroy( fft ); return info; } commSize = (3*ny+100)*sizeof( real ); // Allocate communication work array if ( (commRC = (real*)malloc( commSize ) ) == NULL) { return ENOMEM; } // TODO: Is there no MODE or PLAN for multiple hermetian sequences? // Initialize communication work array ACML_RCFFT1D( 100, ny, NULL, commRC, &info ); if (info != 0) { gmx_fatal(FARGS, "Error initializing ACML FFT; status=%d", info); gmx_fft_destroy( fft ); return info; } commSize = (3*ny+100)*sizeof( real ); // Allocate communication work array if ( (commCR = (real*)malloc( commSize ) ) == NULL) { return ENOMEM; } // TODO: Is there no MODE or PLAN for multiple hermetian sequences? // Initialize communication work array ACML_CRFFT1D( 100, ny, NULL, commCR, &info ); if (info != 0) { gmx_fatal(FARGS, "Error initializing ACML FFT; status=%d", info); gmx_fft_destroy( fft ); return info; } /* Allocate scratch work array that ACML uses to splat from hermitian complex format to * full complex format */ if ( (fft->realScratch = (acmlComplex*)malloc( (nx*ny)*sizeof( acmlComplex ) ) ) == NULL) { return ENOMEM; } fft->ndim = 2; fft->nx = nx; fft->ny = ny; fft->real_fft = 1; fft->comm[0] = comm; fft->comm[1] = commRC; fft->comm[2] = commCR; *pfft = fft; return 0; }
int gmx_fft_init_2d_real(gmx_fft_t * pfft, int nx, int ny, enum gmx_fft_flag flags) { gmx_fft_t fft; int d; int status; MKL_LONG stride[2]; MKL_LONG nyc; if(pfft==NULL) { gmx_fatal(FARGS,"Invalid opaque FFT datatype pointer."); return EINVAL; } *pfft = NULL; if( (fft = malloc(sizeof(struct gmx_fft))) == NULL) { return ENOMEM; } nyc = (ny/2 + 1); /* Mark all handles invalid */ for(d=0;d<3;d++) { fft->inplace[d] = fft->ooplace[d] = NULL; } fft->ooplace[3] = NULL; /* Roll our own 2D real transform using multiple transforms in MKL, * since the current MKL versions does not support our storage format, * and all but the most recent don't even have 2D real FFTs. */ /* In-place X FFT */ status = DftiCreateDescriptor(&fft->inplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)nx); if ( status == 0 ) { stride[0] = 0; stride[1] = nyc; status = (DftiSetValue(fft->inplace[0],DFTI_PLACEMENT,DFTI_INPLACE) || DftiSetValue(fft->inplace[0],DFTI_NUMBER_OF_TRANSFORMS,nyc) || DftiSetValue(fft->inplace[0],DFTI_INPUT_DISTANCE,1) || DftiSetValue(fft->inplace[0],DFTI_INPUT_STRIDES,stride) || DftiSetValue(fft->inplace[0],DFTI_OUTPUT_DISTANCE,1) || DftiSetValue(fft->inplace[0],DFTI_OUTPUT_STRIDES,stride)); } if( status == 0 ) status = DftiCommitDescriptor(fft->inplace[0]); /* Out-of-place X FFT */ if( status == 0 ) status = DftiCreateDescriptor(&(fft->ooplace[0]),GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)nx); if( status == 0 ) { stride[0] = 0; stride[1] = nyc; status = (DftiSetValue(fft->ooplace[0],DFTI_PLACEMENT,DFTI_NOT_INPLACE) || DftiSetValue(fft->ooplace[0],DFTI_NUMBER_OF_TRANSFORMS,nyc) || DftiSetValue(fft->ooplace[0],DFTI_INPUT_DISTANCE,1) || DftiSetValue(fft->ooplace[0],DFTI_INPUT_STRIDES,stride) || DftiSetValue(fft->ooplace[0],DFTI_OUTPUT_DISTANCE,1) || DftiSetValue(fft->ooplace[0],DFTI_OUTPUT_STRIDES,stride)); } if( status == 0 ) status = DftiCommitDescriptor(fft->ooplace[0]); /* In-place Y FFT */ if( status == 0 ) status = DftiCreateDescriptor(&fft->inplace[1],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)ny); if( status == 0 ) { stride[0] = 0; stride[1] = 1; status = (DftiSetValue(fft->inplace[1],DFTI_PLACEMENT,DFTI_INPLACE) || DftiSetValue(fft->inplace[1],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nx) || DftiSetValue(fft->inplace[1],DFTI_INPUT_DISTANCE,2*nyc) || DftiSetValue(fft->inplace[1],DFTI_INPUT_STRIDES,stride) || DftiSetValue(fft->inplace[1],DFTI_OUTPUT_DISTANCE,2*nyc) || DftiSetValue(fft->inplace[1],DFTI_OUTPUT_STRIDES,stride) || DftiCommitDescriptor(fft->inplace[1])); } /* Out-of-place real-to-complex (affects output distance) Y FFT */ if( status == 0 ) status = DftiCreateDescriptor(&fft->ooplace[1],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)ny); if( status == 0 ) { stride[0] = 0; stride[1] = 1; status = (DftiSetValue(fft->ooplace[1],DFTI_PLACEMENT,DFTI_NOT_INPLACE) || DftiSetValue(fft->ooplace[1],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nx) || DftiSetValue(fft->ooplace[1],DFTI_INPUT_DISTANCE,(MKL_LONG)ny) || DftiSetValue(fft->ooplace[1],DFTI_INPUT_STRIDES,stride) || DftiSetValue(fft->ooplace[1],DFTI_OUTPUT_DISTANCE,2*nyc) || DftiSetValue(fft->ooplace[1],DFTI_OUTPUT_STRIDES,stride) || DftiCommitDescriptor(fft->ooplace[1])); } /* Out-of-place complex-to-real (affects output distance) Y FFT */ if( status == 0 ) status = DftiCreateDescriptor(&fft->ooplace[2],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)ny); if( status == 0 ) { stride[0] = 0; stride[1] = 1; status = (DftiSetValue(fft->ooplace[2],DFTI_PLACEMENT,DFTI_NOT_INPLACE) || DftiSetValue(fft->ooplace[2],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nx) || DftiSetValue(fft->ooplace[2],DFTI_INPUT_DISTANCE,2*nyc) || DftiSetValue(fft->ooplace[2],DFTI_INPUT_STRIDES,stride) || DftiSetValue(fft->ooplace[2],DFTI_OUTPUT_DISTANCE,(MKL_LONG)ny) || DftiSetValue(fft->ooplace[2],DFTI_OUTPUT_STRIDES,stride) || DftiCommitDescriptor(fft->ooplace[2])); } if ( status == 0 ) { if ((fft->work = malloc(sizeof(t_complex)*(nx*(ny/2+1)))) == NULL) { status = ENOMEM; } } if( status != 0 ) { gmx_fatal(FARGS,"Error initializing Intel MKL FFT; status=%d",status); gmx_fft_destroy(fft); return status; } fft->ndim = 2; fft->nx = nx; fft->ny = ny; fft->real_fft = 1; *pfft = fft; return 0; }
int gmx_fft_init_1d_real(gmx_fft_t * pfft, int nx, gmx_fft_flag gmx_unused flags) { gmx_fft_t fft; int d; int status; if (pfft == NULL) { gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer."); return EINVAL; } *pfft = NULL; if ( (fft = (gmx_fft_t)malloc(sizeof(struct gmx_fft))) == NULL) { return ENOMEM; } /* Mark all handles invalid */ for (d = 0; d < 3; d++) { fft->inplace[d] = fft->ooplace[d] = NULL; } fft->ooplace[3] = NULL; status = DftiCreateDescriptor(&fft->inplace[0], GMX_DFTI_PREC, DFTI_REAL, 1, (MKL_LONG)nx); if (status == 0) { status = DftiSetValue(fft->inplace[0], DFTI_PLACEMENT, DFTI_INPLACE); } if (status == 0) { status = DftiCommitDescriptor(fft->inplace[0]); } if (status == 0) { status = DftiCreateDescriptor(&fft->ooplace[0], GMX_DFTI_PREC, DFTI_REAL, 1, (MKL_LONG)nx); } if (status == 0) { status = DftiSetValue(fft->ooplace[0], DFTI_PLACEMENT, DFTI_NOT_INPLACE); } if (status == 0) { status = DftiCommitDescriptor(fft->ooplace[0]); } if (status == DFTI_UNIMPLEMENTED) { gmx_fatal(FARGS, "The linked Intel MKL version (<6.0?) cannot do real FFTs."); gmx_fft_destroy(fft); return status; } if (status != 0) { gmx_fatal(FARGS, "Error initializing Intel MKL FFT; status=%d", status); gmx_fft_destroy(fft); return status; } fft->ndim = 1; fft->nx = nx; fft->real_fft = 1; fft->work = NULL; *pfft = fft; return 0; }