/*! \brief * Compute one cross correlation corr = f x g using FFT. * * \param[in] n number of data point * \param[in] f first function * \param[in] g second function * \param[out] corr output correlation * \param[in] fft FFT data structure */ static void cross_corr_low(int n, real f[], real g[], real corr[], gmx_fft_t fft) { int i; const int size = zeroPaddingSize(n); double ** in1, ** in2; snew(in1, size); snew(in2, size); for (i = 0; i < size; i++) { snew(in1[i], 2); snew(in2[i], 2); } for (i = 0; i < n; i++) { in1[i][0] = (double)f[i]; in1[i][1] = 0; in2[i][0] = (double)g[i]; in2[i][1] = 0; } for (; i < size; i++) { in1[i][0] = 0; in1[i][1] = 0; in2[i][0] = 0; in2[i][1] = 0; } gmx_fft_1d(fft, GMX_FFT_FORWARD, in1, in1); gmx_fft_1d(fft, GMX_FFT_FORWARD, in2, in2); for (i = 0; i < size; i++) { complexConjugatMult(in1[i], in2[i]); in1[i][0] /= size; } gmx_fft_1d(fft, GMX_FFT_BACKWARD, in1, in1); for (i = 0; i < n; i++) { corr[i] = (real)(in1[i][0]); } for (i = 0; i < size; i++) { sfree(in1[i]); sfree(in2[i]); } sfree(in1); sfree(in2); }
/*! \brief * Compute one cross correlation corr = f x g using FFT. * * \param[in] n number of data point * \param[in] f first function * \param[in] g second function * \param[out] corr output correlation * \param[in] fft FFT data structure */ static void cross_corr_low(int n, const real f[], const real g[], real corr[], gmx_fft_t fft) { int i; const int size = zeroPaddingSize(n); t_complex * in1, * in2; snew(in1, size); snew(in2, size); for (i = 0; i < n; i++) { in1[i].re = f[i]; in1[i].im = 0; in2[i].re = g[i]; in2[i].im = 0; } for (; i < size; i++) { in1[i].re = 0; in1[i].im = 0; in2[i].re = 0; in2[i].im = 0; } gmx_fft_1d(fft, GMX_FFT_FORWARD, in1, in1); gmx_fft_1d(fft, GMX_FFT_FORWARD, in2, in2); for (i = 0; i < size; i++) { complexConjugatMult(&in1[i], &in2[i]); in1[i].re /= size; } gmx_fft_1d(fft, GMX_FFT_BACKWARD, in1, in1); for (i = 0; i < n; i++) { corr[i] = in1[i].re; } sfree(in1); sfree(in2); }
int gmx_parallel_3dfft(gmx_parallel_3dfft_t pfft_setup, enum gmx_fft_direction dir, void * in_data, void * out_data) { int i,j,k; int nx,ny,nz,nzc,nzr; int local_x_start,local_nx; int local_y_start,local_ny; t_complex * work; real * rdata; t_complex * cdata; t_complex * ctmp; work = pfft_setup->work; /* When we do in-place FFTs the data need to be embedded in the z-dimension, * so there is room for the complex data. This means the direct space * _grid_ (not data) dimensions will be nx*ny*(nzc*2), where nzc=nz/2+1. * If we do out-of-place transforms the direct space dimensions are simply * nx*ny*nz, and no embedding is used. * The complex dimensions are always ny*nx*nzc (note the transpose). * * The direct space _grid_ dimension is nzr. */ nx = pfft_setup->nx; ny = pfft_setup->ny; nz = pfft_setup->nz; nzc = pfft_setup->nzc; if(in_data == out_data) { nzr = 2*nzc; } else { nzr = nz; } gmx_parallel_3dfft_limits(pfft_setup, &local_x_start, &local_nx, &local_y_start, &local_ny); if(dir == GMX_FFT_REAL_TO_COMPLEX) { rdata = (real *)in_data + local_x_start*ny*nzr; cdata = (t_complex *)out_data + local_x_start*ny*nzc; /* Perform nx local 2D real-to-complex FFTs in the yz slices. * When the input data is "embedded" for 3D-in-place transforms, this * must also be done in-place to get the data embedding right. * * Note that rdata==cdata when we work in-place. */ for(i=0;i<local_nx;i++) { gmx_fft_2d_real(pfft_setup->fft_yz, GMX_FFT_REAL_TO_COMPLEX, rdata + i*ny*nzr, cdata + i*ny*nzc); } /* Transpose to temporary work array */ gmx_parallel_transpose_xy(cdata, work, nx, ny, pfft_setup->local_slab, pfft_setup->slab2grid_x, pfft_setup->slab2grid_y, nzc, pfft_setup->nnodes, pfft_setup->node2slab, pfft_setup->aav, pfft_setup->comm); /* Transpose from temporary work array in order YXZ to * the output array in order YZX. */ /* output cdata changes when nx or ny not divisible by nnodes */ cdata = (t_complex *)out_data + local_y_start*nx*nzc; for(j=0;j<local_ny;j++) { gmx_fft_transpose_2d(work + j*nzc*nx, cdata + j*nzc*nx, nx, nzc); } /* Perform local_ny*nzc complex FFTs along the x dimension */ for(i=0;i<local_ny*nzc;i++) { gmx_fft_1d(pfft_setup->fft_x, GMX_FFT_FORWARD, cdata + i*nx, work + i*nx); } /* Transpose back from YZX to YXZ. */ for(j=0;j<local_ny;j++) { gmx_fft_transpose_2d(work + j*nzc*nx, cdata + j*nzc*nx, nzc, nx); } } else if(dir == GMX_FFT_COMPLEX_TO_REAL) { cdata = (t_complex *)in_data + local_y_start*nx*nzc; rdata = (real *)out_data + local_x_start*ny*nzr; /* If we are working in-place it doesn't matter that we destroy * input data. Otherwise we use an extra temporary workspace array. */ if(in_data == out_data) { ctmp = cdata; } else { ctmp = pfft_setup->work2; } /* Transpose from YXZ to YZX. */ for(j=0;j<local_ny;j++) { gmx_fft_transpose_2d(cdata + j*nzc*nx, work + j*nzc*nx, nx, nzc); } /* Perform local_ny*nzc complex FFTs along the x dimension */ for(i=0;i<local_ny*nzc;i++) { gmx_fft_1d(pfft_setup->fft_x, GMX_FFT_BACKWARD, work + i*nx, ctmp + i*nx); } /* Transpose from YZX to YXZ. */ for(j=0;j<local_ny;j++) { gmx_fft_transpose_2d(ctmp + j*nzc*nx, work + j*nzc*nx, nzc, nx); } if(in_data == out_data) { /* output cdata changes when nx or ny not divisible by nnodes */ ctmp = (t_complex *)in_data + local_x_start*ny*nzc; } gmx_parallel_transpose_xy(work, ctmp, ny, nx, pfft_setup->local_slab, pfft_setup->slab2grid_y, pfft_setup->slab2grid_x, nzc, pfft_setup->nnodes, pfft_setup->node2slab, pfft_setup->aav, pfft_setup->comm); /* Perform nx local 2D complex-to-real FFTs in the yz slices. * The 3D FFT is done in-place, so we need to do this in-place too in order * to get the data organization right. */ for(i=0;i<local_nx;i++) { gmx_fft_2d_real(pfft_setup->fft_yz, GMX_FFT_COMPLEX_TO_REAL, ctmp + i*ny*nzc, rdata + i*ny*nzr); } } else { gmx_fatal(FARGS,"Incorrect FFT direction."); } /* Skip the YX backtranspose to save communication! Grid is now YXZ */ return 0; }
int gmx_fft_2d_real (gmx_fft_t fft, enum gmx_fft_direction dir, void * in_data, void * out_data) { int i, j, nx, ny, nyc; t_complex * data; real * work; real * p1; real * p2; nx = fft->n; ny = fft->next->n; /* Number of complex elements in y direction */ nyc = (ny/2+1); work = fft->work+4*nx; if (dir == GMX_FFT_REAL_TO_COMPLEX) { /* If we are doing an in-place transform the 2D array is already * properly padded by the user, and we are all set. * * For out-of-place there is no array padding, but FFTPACK only * does in-place FFTs internally, so we need to start by copying * data from the input to the padded (larger) output array. */ if (in_data != out_data) { p1 = (real *)in_data; p2 = (real *)out_data; for (i = 0; i < nx; i++) { for (j = 0; j < ny; j++) { p2[i*nyc*2+j] = p1[i*ny+j]; } } } data = static_cast<t_complex *>(out_data); /* y real-to-complex FFTs */ for (i = 0; i < nx; i++) { gmx_fft_1d_real(fft->next, GMX_FFT_REAL_TO_COMPLEX, data+i*nyc, data+i*nyc); } /* Transform to get X data in place */ gmx_fft_transpose_2d(data, data, nx, nyc); /* Complex-to-complex X FFTs */ for (i = 0; i < nyc; i++) { gmx_fft_1d(fft, GMX_FFT_FORWARD, data+i*nx, data+i*nx); } /* Transpose back */ gmx_fft_transpose_2d(data, data, nyc, nx); } else if (dir == GMX_FFT_COMPLEX_TO_REAL) { /* An in-place complex-to-real transform is straightforward, * since the output array must be large enough for the padding to fit. * * For out-of-place complex-to-real transforms we cannot just copy * data to the output array, since it is smaller than the input. * In this case there's nothing to do but employing temporary work data, * starting at work+4*nx and using nx*nyc*2 elements. */ if (in_data != out_data) { memcpy(work, in_data, sizeof(t_complex)*nx*nyc); data = reinterpret_cast<t_complex *>(work); } else { /* in-place */ data = reinterpret_cast<t_complex *>(out_data); } /* Transpose to get X arrays */ gmx_fft_transpose_2d(data, data, nx, nyc); /* Do X iFFTs */ for (i = 0; i < nyc; i++) { gmx_fft_1d(fft, GMX_FFT_BACKWARD, data+i*nx, data+i*nx); } /* Transpose to get Y arrays */ gmx_fft_transpose_2d(data, data, nyc, nx); /* Do Y iFFTs */ for (i = 0; i < nx; i++) { gmx_fft_1d_real(fft->next, GMX_FFT_COMPLEX_TO_REAL, data+i*nyc, data+i*nyc); } if (in_data != out_data) { /* Output (pointed to by data) is now in padded format. * Pack it into out_data if we were doing an out-of-place transform. */ p1 = (real *)data; p2 = (real *)out_data; for (i = 0; i < nx; i++) { for (j = 0; j < ny; j++) { p2[i*ny+j] = p1[i*nyc*2+j]; } } } } else { gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction."); return EINVAL; } return 0; }
int many_auto_correl(int nfunc, int ndata, int nfft, real **c) { #pragma omp parallel { try { typedef real complex[2]; int i, j; gmx_fft_t fft1; complex *in, *out; int i0, i1; int nthreads, thread_id; nthreads = gmx_omp_get_max_threads(); thread_id = gmx_omp_get_thread_num(); if ((0 == thread_id)) { // fprintf(stderr, "There are %d threads for correlation functions\n", nthreads); } i0 = thread_id*nfunc/nthreads; i1 = std::min(nfunc, (thread_id+1)*nfunc/nthreads); gmx_fft_init_1d(&fft1, nfft, GMX_FFT_FLAG_CONSERVATIVE); /* Allocate temporary arrays */ snew(in, nfft); snew(out, nfft); for (i = i0; (i < i1); i++) { for (j = 0; j < ndata; j++) { in[j][0] = c[i][j]; in[j][1] = 0; } for (; (j < nfft); j++) { in[j][0] = in[j][1] = 0; } gmx_fft_1d(fft1, GMX_FFT_BACKWARD, (void *)in, (void *)out); for (j = 0; j < nfft; j++) { in[j][0] = (out[j][0]*out[j][0] + out[j][1]*out[j][1])/nfft; in[j][1] = 0; } for (; (j < nfft); j++) { in[j][0] = in[j][1] = 0; } gmx_fft_1d(fft1, GMX_FFT_FORWARD, (void *)in, (void *)out); for (j = 0; (j < nfft); j++) { c[i][j] = out[j][0]/ndata; } } /* Free the memory */ gmx_fft_destroy(fft1); sfree(in); sfree(out); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } // gmx_fft_cleanup(); return 0; }