void remap_output( double wri[], const double t) { int i,j,k; double tvelocity; double tremap; complex double wexp; complex double phase; double complex *w2d; DEBUG_START_FUNC; w2d = (double complex *) fftw_malloc( sizeof(double complex) * (NY/2+1) * NZ ); if (w2d == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w2d allocation"); #ifdef TIME_DEPENDANT_SHEAR tremap = time_shift(t); tvelocity = 0.0; #else tremap = time_shift(t); tvelocity = fmod(t, 2.0 * param.ly / (param.shear * param.lx)); #endif for( i = 0 ; i < NX/NPROC ; i++) { #ifdef WITH_2D fftw_execute_dft_r2c(fft_1d_forward, wri + i*(NY+2), w2d); #else fftw_execute_dft_r2c(fft_1d_forward, wri + i*(NZ+2)*NY, w2d); #endif for( j = 0 ; j < NY/2+1 ; j++) { phase = (double complex) ((2.0 * M_PI) / param.ly * ((double) j ) * ( ((double) (i + rank * (NX/NPROC)) / (double) NX ) * tremap - tvelocity / 2.0 ) * param.lx * param.shear); //printf("phase=%g + I %g\n",creal(phase), cimag(phase)); wexp = cexp( I * phase)/NY; //printf("wexp=%g + I %g\n",creal(wexp), cimag(wexp)); for( k = 0 ; k < NZ; k++) { w2d[ k + j * NZ ] = wexp * w2d[ k + j * NZ ]; } } #ifdef WITH_2D fftw_execute_dft_c2r(fft_1d_backward, w2d, wri + i*(NY+2)); #else fftw_execute_dft_c2r(fft_1d_backward, w2d, wri + i*(NZ+2)*NY); #endif } fftw_free(w2d); DEBUG_END_FUNC; return; }
static void hankel_matmul(double* out, const double* v, const void* matrix) { const hankel_matrix *h = matrix; R_len_t N = h->length, L = h->window; R_len_t K = N - L + 1, i; double *circ; fftw_complex *ocirc; /* Allocate needed memory */ circ = (double*) fftw_malloc(N * sizeof(double)); ocirc = (fftw_complex*) fftw_malloc((N/2 + 1) * sizeof(fftw_complex)); /* Fill the arrays */ for (i = 0; i < K; ++i) circ[i] = v[K - i - 1]; memset(circ + K, 0, (L - 1)*sizeof(double)); /* Compute the FFT of the reversed vector v */ fftw_execute_dft_r2c(h->r2c_plan, circ, ocirc); /* Dot-multiply with pre-computed FFT of toeplitz circulant */ for (i = 0; i < (N/2 + 1); ++i) ocirc[i] = ocirc[i] * h->circ_freq[i]; /* Compute the reverse transform to obtain result */ fftw_execute_dft_c2r(h->c2r_plan, ocirc, circ); /* Cleanup and return */ for (i = 0; i < L; ++i) out[i] = circ[i] / N; fftw_free(circ); fftw_free(ocirc); }
static void convolveNd_half(const fftw_complex *ox, double *y, R_len_t rank, const R_len_t *N, int conjugate, fftw_plan r2c_plan, fftw_plan c2r_plan) { R_len_t i; fftw_complex *oy; R_len_t pN = prod(rank, N), phN = hprod(rank, N); /* Allocate needed memory */ oy = (fftw_complex*) fftw_malloc(phN * sizeof(fftw_complex)); /* Compute the Nd-FFT of the matrix y */ fftw_execute_dft_r2c(r2c_plan, y, oy); /* Compute conjugation if needed */ if (conjugate) for (i = 0; i < phN; ++i) oy[i] = conj(oy[i]); /* Dot-multiply ox and oy, and divide by Nx*...*Nz*/ for (i = 0; i < phN; ++i) oy[i] *= ox[i] / pN; /* Compute the reverse transform to obtain result */ fftw_execute_dft_c2r(c2r_plan, oy, y); /* Cleanup */ fftw_free(oy); }
void gfft_c2r(double complex *win){ double *wrin = (double *) win; fft_timer = fft_timer - get_c_time(); fftw_execute_dft_c2r(c2rfft, win, wrin); fft_timer = fft_timer + get_c_time(); return; }
void FFTConvolver::ConvolveSameSize(double* image, const double* kernel, size_t imgWidth, size_t imgHeight) { const size_t imgSize = imgWidth * imgHeight; const size_t complexSize = (imgWidth/2+1) * imgHeight; double* tempData = reinterpret_cast<double*>(fftw_malloc(imgSize * sizeof(double))); fftw_complex* fftImageData = reinterpret_cast<fftw_complex*>(fftw_malloc(complexSize * sizeof(fftw_complex))); fftw_complex* fftKernelData = reinterpret_cast<fftw_complex*>(fftw_malloc(complexSize * sizeof(fftw_complex))); boost::mutex::scoped_lock lock(_mutex); fftw_plan inToFPlan = fftw_plan_dft_r2c_2d(imgHeight, imgWidth, tempData, fftImageData, FFTW_ESTIMATE); fftw_plan fToOutPlan = fftw_plan_dft_c2r_2d(imgHeight, imgWidth, fftImageData, tempData, FFTW_ESTIMATE); lock.unlock(); memcpy(tempData, image, imgSize * sizeof(double)); fftw_execute_dft_r2c(inToFPlan, tempData, fftImageData); memcpy(tempData, kernel, imgSize * sizeof(double)); fftw_execute_dft_r2c(inToFPlan, tempData, fftKernelData); double fact = 1.0/imgSize; for(size_t i=0; i!=complexSize; ++i) reinterpret_cast<std::complex<double>*>(fftImageData)[i] *= fact * reinterpret_cast<std::complex<double>*>(fftKernelData)[i]; fftw_execute_dft_c2r(fToOutPlan, reinterpret_cast<fftw_complex*>(fftImageData), tempData); memcpy(image, tempData, imgSize * sizeof(double)); fftw_free(fftImageData); fftw_free(fftKernelData); fftw_free(tempData); lock.lock(); fftw_destroy_plan(inToFPlan); fftw_destroy_plan(fToOutPlan); lock.unlock(); }
fftw_complex* gen_sec (const int nx, const int ny, fft_plans_t* f, const double a, const double wd, const double* g1, const fftw_complex* g2) { int i, j, x, y; double total = 0; double* out; double* sec_d = fftw_malloc (sizeof(double)*ny*2*(nx/2+1)); if (NULL == sec_d) { return NULL; } fftw_complex* sec = (fftw_complex*)sec_d; memset(sec_d, 0, sizeof(double)*ny*2*(nx/2+1)); for (j = 0; j < ny; j++) { for (i = 0; i < nx; i++) { register double x_val = -nx/2.0 + (i) * (double)nx/(double)(nx-1); register double y_val = ny/2.0 - (j) * (double)ny/(double)(ny-1); sec_d[i+j*2*(nx/2+1)] = (x_val*cos(a-wd+M_PI/2.0) + y_val*sin(a-wd+M_PI/2.0) > 0 ? 1 : 0) * (x_val*cos(a+wd+M_PI/2.0) + y_val*sin(a+wd+M_PI/2.0) <= 0 ? 1 : 0); } } fftw_execute_dft_r2c (f->forward, sec_d, sec); for (i = 0; i < ny*(nx/2+1); i++) { sec[i] *= g2[i]; } fftw_execute_dft_c2r (f->backward, sec, sec_d); if (NULL == (out = fftw_malloc (sizeof(double)*ny*2*(nx/2+1)))) { return NULL; } x = nx/2; y = ny/2; for (j = 0; j < ny; j++) { int k = (j - y) % ny; k = k < 0 ? ny + k : k; for (i = 0; i < nx; i++) { int h = (i - x) % nx; h = h < 0 ? nx + h : h; out[i+j*2*(nx/2+1)] = sec_d[h+k*2*(nx/2+1)] / (nx*ny) * g1[i+j*nx]; total += out[i+j*2*(nx/2+1)]; } } for (i = 0; i < ny*2*(nx/2+1); i++) { out[i] /= total; } fftw_execute_dft_r2c (f->forward, out, (fftw_complex*)out); fftw_free (sec); return (fftw_complex*)out; }
static R_INLINE void hankelize_fft(double *F, const double *U, const double *V, const hankel_matrix *h) { R_len_t N = h->length, L = h->window; R_len_t K = N - L + 1; R_len_t i; double *iU, *iV; fftw_complex *cU, *cV; /* Allocate needed memory */ iU = (double*) fftw_malloc(N * sizeof(double)); iV = (double*) fftw_malloc(N * sizeof(double)); cU = (fftw_complex*) fftw_malloc((N/2 + 1) * sizeof(fftw_complex)); cV = (fftw_complex*) fftw_malloc((N/2 + 1) * sizeof(fftw_complex)); /* Fill in buffers */ memcpy(iU, U, L*sizeof(double)); memset(iU+L, 0, (K - 1)*sizeof(double)); memcpy(iV, V, K*sizeof(double)); memset(iV+K, 0, (L - 1)*sizeof(double)); /* Compute the FFTs */ fftw_execute_dft_r2c(h->r2c_plan, iU, cU); fftw_execute_dft_r2c(h->r2c_plan, iV, cV); /* Dot-multiply */ for (i = 0; i < N/2 + 1; ++i) cU[i] = cU[i] * cV[i]; /* Compute the inverse FFT */ fftw_execute_dft_c2r(h->c2r_plan, cU, iU); /* Form the result */ for (i = 0; i < N; ++i) { R_len_t leftu, rightu, l; if (i < L) leftu = i; else leftu = L - 1; if (i < K) rightu = 0; else rightu = i - K + 1; l = (leftu - rightu + 1); F[i] = iU[i] / l / N; } fftw_free(iU); fftw_free(iV); fftw_free(cU); fftw_free(cV); }
static void hbhankel_tmatmul(double* out, const double* v, const void* matrix) { const hbhankel_matrix *h = matrix; R_len_t Nx = h->length.x, Ny = h->length.y; R_len_t Lx = h->window.x, Ly = h->window.y; R_len_t Kx = Nx - Lx + 1, Ky = Ny - Ly + 1, i, j; double *circ; fftw_complex *ocirc; /* Allocate needed memory */ circ = (double*) fftw_malloc(Nx * Ny * sizeof(double)); ocirc = (fftw_complex*) fftw_malloc(Ny*(Nx / 2 + 1) * sizeof(fftw_complex)); /* revv <- matrix(c(rep(0, C$Lx*(C$Ky-1)), rev(v)), C$Lx, ncol(C$Cblock)); revv <- rbind(matrix(0, (C$Kx-1), ncol(revv)), revv); mult <- fft(C$Cblock * fft(revv), inverse = TRUE); Re((mult/(prod(dim(C$Cblock))))[C$Lx:(C$Lx+C$Kx-1),C$Ly:(C$Ly+C$Ky-1)]); */ /* Fill the arrays */ memset(circ, 0, Nx * Ny * sizeof(double)); for (j = 0; j < Ly; ++j) for (i = 0; i < Lx; ++i) circ[(i + Kx - 1) + (j + Ky - 1)*Nx] = v[Lx*Ly - i - j*Lx - 1]; /* Compute the FFT of the reversed vector v */ fftw_execute_dft_r2c(h->r2c_plan, circ, ocirc); /* Dot-multiply with pre-computed FFT of toeplitz circulant */ for (i = 0; i < Ny * (Nx/2 + 1); ++i) ocirc[i] = ocirc[i] * h->circ_freq[i]; /* Compute the reverse transform to obtain result */ fftw_execute_dft_c2r(h->c2r_plan, ocirc, circ); /* Cleanup and return */ for (j = 0; j < Ky; ++j) for (i = 0; i < Kx; ++i) out[i + j*Kx] = circ[(i + Lx - 1) + (j + Ly - 1)*Nx] / (Nx*Ny); fftw_free(circ); fftw_free(ocirc); }
void s_ncc_fft_run (FATM_Options* fopt) { S_Ncc_Fft_Data* udp = (S_Ncc_Fft_Data*) fopt->alg_data; int fft_nx = fopt->sig_rect_scan.dims[1]; /* In fftw3, nx is rows */ int fft_ny = fopt->sig_rect_scan.dims[0]; /* In fftw3, ny is cols */ int fftw_size = fft_nx * (fft_ny/2+1); int i; /* Make integral images, etc. */ s_ncc_fft_scorewin_initialize (fopt); /* Take fft of signal */ fftw_execute_dft_r2c (udp->sig_fftw3_plan, (double*) fopt->sig.data, udp->sig_fft); /* Debugging info */ dump_fft (udp->sig_fft, fft_nx, fft_ny, "sig_fft.txt"); /* Multiply fft of signal by fft of pattern */ for (i = 0; i < fftw_size; i++) { double re = udp->sig_fft[i][0] * udp->pat_fft[i][0] - udp->sig_fft[i][1] * udp->pat_fft[i][1]; double im = udp->sig_fft[i][0] * udp->pat_fft[i][1] + udp->sig_fft[i][1] * udp->pat_fft[i][0]; udp->sig_fft[i][0] = re; udp->sig_fft[i][1] = im; } /* Debugging info */ dump_fft (udp->sig_fft, fft_nx, fft_ny, "sco_fft.txt"); /* Take ifft of signal */ fftw_execute_dft_c2r (udp->sco_fftw3_plan, udp->sig_fft, udp->padded_score); dump_txt (udp->padded_score, fft_nx, fft_ny, "sco_ifftd.txt"); }
void gfft_c2r_t(double complex *win) { int i; double *wrin = (double *) win; fft_timer = fft_timer - get_c_time(); // We now have an array with logical dimensions[NX_COMPLEX/NPROC, NY_COMPLEX, NZ_COMPLEX] #ifdef _OPENMP #pragma omp parallel for private(i) schedule(static) #endif for(i=0 ; i < NX_COMPLEX/NPROC ; i++) fftw_execute_dft(c2r_1d, &win[i*NY_COMPLEX*NZ_COMPLEX],&win[i*NY_COMPLEX*NZ_COMPLEX]); // The logical dimensions of win are [NX_COMPLEX/NPROC, NY_COMPLEX, NZ_COMPLEX] // transpose it transpose_complex_XY(win, win); // The final 2D transform fftw_execute_dft_c2r(c2r_2d, win, wrin); // and we're done ! fft_timer = fft_timer + get_c_time(); return; }
void FFTRealInverse::transform(int size, Complex *in, rsFloat *out) { if (plan == 0) throw FFTException("[BUG] Can not transform on NULL plan."); fftw_execute_dft_c2r((fftw_plan_s *)plan, reinterpret_cast<fftw_complex *>(in), out); }
void kemo_fftw_execute_dft_c2r(fftw_plan *plan, fftw_complex *cplx_in, double *dble_out){ fftw_execute_dft_c2r(*plan, cplx_in, dble_out); return; }
void artistic_smooth (uint8_t* src, uint8_t* dst, double ns, double q, int nx, int ny, int pitch, fft_plans_t* f, artistic_buf_t* ab) { int i, j, k, x, y; const int width = 2*(nx/2+1); const double nxny = nx*ny; const double nxny2 = nxny*nxny; const int n = ny*(nx/2+1); const int n2 = n*2; (void) q; fftw_complex** src1_c = ab->src1; fftw_complex** src2_c = ab->src2; fftw_complex** s_c = ab->s; fftw_complex** m_c = ab->m; double** num = ab->num; double* den = ab->den; double* src1_d[3]; double* src2_d[3]; double* s_d[3]; double* m_d[3]; for (k = 0; k < 3; k++) { src1_d[k] = (double*) src1_c[k]; src2_d[k] = (double*) src2_c[k]; s_d[k] = (double*) s_c[k]; m_d[k] = (double*) m_c[k]; } for (j = 0; j < ny; j++) { uint8_t* data = src + j*pitch; double* src1_ptr[] = {src1_d[0]+j*width, src1_d[1]+j*width, src1_d[2]+j*width}; double* src2_ptr[] = {src2_d[0]+j*width, src2_d[1]+j*width, src2_d[2]+j*width}; for (i = 0; i < nx; i++) { for (k = 0; k < 3; k++) { register double temp = *src1_ptr[k]++ = *data++; *src2_ptr[k]++ = temp*temp; } } } for (k = 0; k < 3; k++) { fftw_execute_dft_r2c (f->forward, src1_d[k], src1_c[k]); fftw_execute_dft_r2c (f->forward, src2_d[k], src2_c[k]); } for (k = 0; k < 3; k++) { memset (num[k], 0, sizeof(double)*n2); } memset (den, 0, sizeof(double)*n2); for (i = 0; i < ns; i++) { fftw_complex* g_fft = ab->g[i]; multiply_6_c(m_c[0], src1_c[0], m_c[1], src1_c[1], m_c[2], src1_c[2], s_c[0], src2_c[0], s_c[1], src2_c[1], s_c[2], src2_c[2], g_fft, n); for (k = 0; k < 3; k++) { fftw_execute_dft_c2r (f->backward, m_c[k], m_d[k]); fftw_execute_dft_c2r (f->backward, s_c[k], s_d[k]); } mesh_d (s_d[0], s_d[1], s_d[2], m_d[0], m_d[1], m_d[2], nxny, nxny2, num[0], num[1], num[2], den, n2); } for (k = 0; k < 3; k++) { divide_mul_const_d(num[k], den, nxny, n2); } x = nx/2.0; y = ny/2.0; for (j = 0; j < ny; j++) { int k = (j - y) % ny; k = (k < 0 ? ny + k : k) * 2*(nx/2+1); uint8_t* d = dst + j*pitch; for (i = 0; i < nx; i++) { int z; int h = (i - x) % nx; h = (h < 0 ? nx + h : h) + k; for (z = 0; z < 3; z++) { *d++ = num[z][h]; } } } }
/* Complex to real inverse transform. */ static int invfft1( IMAGE *dummy, IMAGE *in, IMAGE *out ) { IMAGE *cmplx = im_open_local( dummy, "invfft1-1", "t" ); IMAGE *real = im_open_local( out, "invfft1-2", "t" ); const int half_width = in->Xsize / 2 + 1; /* Transform to halfcomplex here. */ double *half_complex = IM_ARRAY( dummy, in->Ysize * half_width * 2, double ); /* We have to have a separate real buffer for the planner to work on. */ double *planner_scratch = IM_ARRAY( dummy, in->Ysize * half_width * 2, double ); fftw_plan plan; int x, y; double *q, *p; if( !cmplx || !real || !half_complex || im_pincheck( in ) || im_poutcheck( out ) ) return( -1 ); if( in->Coding != IM_CODING_NONE || in->Bands != 1 ) { im_error( "im_invfft", "%s", _( "one band uncoded only" ) ); return( -1 ); } /* Make dp complex image for input. */ if( im_clip2fmt( in, cmplx, IM_BANDFMT_DPCOMPLEX ) ) return( -1 ); /* Make mem buffer real image for output. */ if( im_cp_desc( real, in ) ) return( -1 ); real->BandFmt = IM_BANDFMT_DOUBLE; if( im_setupout( real ) ) return( -1 ); /* Build half-complex image. */ q = half_complex; for( y = 0; y < cmplx->Ysize; y++ ) { p = ((double *) cmplx->data) + y * in->Xsize * 2; for( x = 0; x < half_width; x++ ) { q[0] = p[0]; q[1] = p[1]; p += 2; q += 2; } } /* Make the plan for the transform. Yes, they really do use nx for * height and ny for width. */ if( !(plan = fftw_plan_dft_c2r_2d( in->Ysize, in->Xsize, (fftw_complex *) planner_scratch, (double *) real->data, 0 )) ) { im_error( "im_invfft", "%s", _( "unable to create transform plan" ) ); return( -1 ); } fftw_execute_dft_c2r( plan, (fftw_complex *) half_complex, (double *) real->data ); fftw_destroy_plan( plan ); /* Copy to out. */ if( im_copy( real, out ) ) return( -1 ); return( 0 ); }
void gravity_calculate_acceleration(void){ // Setting up the grid if (gravity_fft_init_done==0){ gravity_fft_init(); gravity_fft_init_done=1; } #pragma omp parallel for schedule(guided) for (int i=0; i<N; i++){ particles[i].ax = 0; particles[i].ay = 0; particles[i].az = 0; } if (integrator == SEI){ struct ghostbox gb = boundaries_get_ghostbox(1,0,0); shift_shear = gb.shifty; } gravity_fft_p2grid(); if (integrator == SEI){ // Remap in fourier space to deal with shearing sheet boundary conditions. gravity_fft_remap(density_r, 1); } fftw_execute_dft_r2c(r2cfft, density_r, (fftw_complex*)density); // Inverse Poisson equation for(int i = 0 ; i < grid_NCOMPLEX ; i++) { if (integrator == SEI){ // Compute time-dependent wave-vectors kxt[i] = kx[i] + shift_shear/boxsize_y * ky[i]; k[i] = sqrt( kxt[i]*kxt[i] + ky[i] * ky[i]); // we will use 1/k, that prevents singularity // (the k=0 is set to zero by renormalization...) if ( k[i] == 0.0 ) k[i] = 1.0; } double q0 = - 2.0 * M_PI * density[2*i] / (k[i] * root_nx * root_ny); double q1 = - 2.0 * M_PI * density[2*i+1] / (k[i] * root_nx * root_ny); double sinkxt = sin(kxt[i] * dx); double sinky = sin(ky[i] * dy); fx[2*i] = q1 * sinkxt / dx; // Real part of Fx fx[2*i+1] = - q0 * sinkxt / dx; // Imaginary part of Fx fy[2*i] = q1 * sinky / dy; fy[2*i+1] = - q0 * sinky / dy; } // Transform back the force field fftw_execute_dft_c2r(c2rfft, (fftw_complex*)fx, fx); fftw_execute_dft_c2r(c2rfft, (fftw_complex*)fy, fy); if (integrator == SEI){ // Remap in fourier space to deal with shearing sheet boundary conditions. gravity_fft_remap(fx, -1); gravity_fft_remap(fy, -1); } for(int i=0;i<N;i++){ gravity_fft_grid2p(&(particles[i])); } }
void Transformer_CPU::transform_inverse_x(double *inout) { fftw_execute_dft_c2r(plan_x_c2r, (fftw_complex*)inout, (double*)inout); }
static R_INLINE void hbhankelize_fft(double *F, const double *U, const double *V, const hbhankel_matrix* h) { R_len_t Nx = h->length.x, Ny = h->length.y; R_len_t Lx = h->window.x, Ly = h->window.y; R_len_t Kx = Nx - Lx + 1, Ky = Ny - Ly + 1; R_len_t i, j; R_len_t wx, dwx, wy, dwy; double *iU, *iV; fftw_complex *cU, *cV; /* Allocate needed memory */ iU = (double*) fftw_malloc(Nx * Ny * sizeof(double)); iV = (double*) fftw_malloc(Nx * Ny * sizeof(double)); cU = (fftw_complex*) fftw_malloc(Ny*(Nx / 2 + 1) * sizeof(fftw_complex)); cV = (fftw_complex*) fftw_malloc(Ny*(Nx / 2 + 1) * sizeof(fftw_complex)); /* Fill the arrays */ memset(iU, 0, Nx * Ny * sizeof(double)); for (j = 0; j < Ly; ++j) for (i = 0; i < Lx; ++i) iU[i + j*Nx] = U[i + j*Lx]; memset(iV, 0, Nx * Ny * sizeof(double)); for (j = 0; j < Ky; ++j) for (i = 0; i < Kx; ++i) iV[i + j*Nx] = V[i + j*Kx]; /* Compute the FFTs */ fftw_execute_dft_r2c(h->r2c_plan, iU, cU); fftw_execute_dft_r2c(h->r2c_plan, iV, cV); /* Dot-multiply */ for (i = 0; i < Ny * (Nx/2 + 1); ++i) cU[i] = cU[i] * cV[i]; /* Compute the inverse FFT */ fftw_execute_dft_c2r(h->c2r_plan, cU, iU); /* Form the result */ for (j = 0, wy = 1, dwy = 1; j < Ny; ++j, wy += dwy) { if (j == Ly - 1) dwy--; if (j == Ky - 1) /* Do not join two ifs! */ dwy--; for (i = 0, wx = 1, dwx = 1; i < Nx; ++i, wx += dwx) { if (i == Lx - 1) dwx--; if (i == Kx - 1) dwx--; F[i+j*Nx] = iU[i+j*Nx] / wx / wy / Nx / Ny; } } fftw_free(iU); fftw_free(iV); fftw_free(cU); fftw_free(cV); }
SEXP convolveN(SEXP x, SEXP y, SEXP input_dim, SEXP output_dim, SEXP Conj) { SEXP x_dim = NILSXP, y_dim = NILSXP; R_len_t rank = length(input_dim); R_len_t *N = INTEGER(input_dim); R_len_t pN = prod(rank, N), phN = hprod(rank, N); int conjugate = LOGICAL(Conj)[0]; fftw_complex *ox, *oy; fftw_plan r2c_plan, c2r_plan; double *circ; R_len_t *revN, r, i; /* Allocate needed memory */ circ = (double*) fftw_malloc(pN * sizeof(double)); ox = (fftw_complex*) fftw_malloc(phN * sizeof(fftw_complex)); oy = (fftw_complex*) fftw_malloc(phN * sizeof(fftw_complex)); /* Estimate the best plans for given input length, note, that input data is stored in column-major mode, that's why we're passing dimensions in *reverse* order */ revN = Calloc(rank, R_len_t); for (r = 0; r < rank; ++r) revN[r] = N[rank - 1 - r]; r2c_plan = fftw_plan_dft_r2c(rank, revN, circ, ox, FFTW_ESTIMATE); c2r_plan = fftw_plan_dft_c2r(rank, revN, ox, circ, FFTW_ESTIMATE); Free(revN); PROTECT(x_dim = getAttrib(x, R_DimSymbol)); PROTECT(y_dim = getAttrib(y, R_DimSymbol)); /* Fill input buffer by X values*/ memset(circ, 0, pN * sizeof(double)); fill_subarray(circ, REAL(x), rank, N, INTEGER(x_dim), 1); /* Run the plan on X-input data */ fftw_execute_dft_r2c(r2c_plan, circ, ox); /* Fill input buffer by Y values*/ memset(circ, 0, pN * sizeof(double)); fill_subarray(circ, REAL(y), rank, N, INTEGER(y_dim), 1); /* Run the plan on Y-input data */ fftw_execute_dft_r2c(r2c_plan, circ, oy); /* Compute conjugation if needed */ if (conjugate) for (i = 0; i < phN; ++i) oy[i] = conj(oy[i]); /* Dot-multiply ox and oy, and divide by Nx*...*Nz*/ for (i = 0; i < phN; ++i) oy[i] *= ox[i] / pN; /* Compute the reverse transform to obtain result */ fftw_execute_dft_c2r(c2r_plan, oy, circ); SEXP res; PROTECT(res = allocVector(REALSXP, prod(rank, INTEGER(output_dim)))); fill_subarray(circ, REAL(res), rank, N, INTEGER(output_dim), 0); /* setAttrib(output_dim, R_NamesSymbol, R_NilValue); */ setAttrib(res, R_DimSymbol, output_dim); /* setAttrib(res, R_DimNamesSymbol, R_NilValue); */ /* Cleanup */ fftw_free(ox); fftw_free(oy); fftw_free(circ); /* Return */ UNPROTECT(3); return res; }
/*! Calculates the long-range periodic force given the particle positions * using the PM method. The force is Gaussian filtered with Asmth, given in * mesh-cell units. We carry out a CIC charge assignment, and compute the * potenial by Fourier transform methods. The potential is finite differenced * using a 4-point finite differencing formula, and the forces are * interpolated tri-linearly to the particle positions. The CIC kernel is * deconvolved. Note that the particle distribution is not in the slab * decomposition that is used for the FFT. Instead, overlapping patches * between local domains and FFT slabs are communicated as needed. */ void pmforce_periodic(void) { double k2, kx, ky, kz, smth; double dx, dy, dz; double fx, fy, fz, ff; double asmth2, fac, acc_dim; int i, j, slab, level, sendTask, recvTask; int x, y, z, xl, yl, zl, xr, yr, zr, xll, yll, zll, xrr, yrr, zrr, ip, dim; int slab_x, slab_y, slab_z; int slab_xx, slab_yy, slab_zz; int meshmin[3], meshmax[3], sendmin, sendmax, recvmin, recvmax; int rep, ncont, cont_sendmin[2], cont_sendmax[2], cont_recvmin[2], cont_recvmax[2]; int dimx, dimy, dimz, recv_dimx, recv_dimy, recv_dimz; MPI_Status status; if(ThisTask == 0) { printf("Starting periodic PM calculation.\n"); fflush(stdout); } #ifdef FFTW3 if(fftw_plan_exists) { /* macro defined in callgrind.h */ // CALLGRIND_START_INSTRUMENTATION; } #else // CALLGRIND_START_INSTRUMENTATION; #endif force_treefree(); asmth2 = (2 * M_PI) * All.Asmth[0] / All.BoxSize; asmth2 *= asmth2; fac = All.G / (M_PI * All.BoxSize); /* to get potential */ fac *= 1 / (2 * All.BoxSize / PMGRID); /* for finite differencing */ /* first, establish the extension of the local patch in the PMGRID */ for(j = 0; j < 3; j++) { meshmin[j] = PMGRID; meshmax[j] = 0; } for(i = 0; i < NumPart; i++) { for(j = 0; j < 3; j++) { slab = to_slab_fac * P[i].Pos[j]; if(slab >= PMGRID) slab = PMGRID - 1; if(slab < meshmin[j]) meshmin[j] = slab; if(slab > meshmax[j]) meshmax[j] = slab; } } MPI_Allgather(meshmin, 3, MPI_INT, meshmin_list, 3, MPI_INT, MPI_COMM_WORLD); MPI_Allgather(meshmax, 3, MPI_INT, meshmax_list, 3, MPI_INT, MPI_COMM_WORLD); dimx = meshmax[0] - meshmin[0] + 2; dimy = meshmax[1] - meshmin[1] + 2; dimz = meshmax[2] - meshmin[2] + 2; pm_init_periodic_allocate((dimx + 4) * (dimy + 4) * (dimz + 4)); #ifdef FFTW3 if(!fftw_plan_exists) { /* Create plan for in-place r2c DFT */ fft_forward_plan = fftw_mpi_plan_dft_r2c_3d(PMGRID, PMGRID, PMGRID, rhogrid, fft_of_rhogrid, MPI_COMM_WORLD, FFTW_PATIENT | FFTW_MPI_TRANSPOSED_OUT); fft_inverse_plan = fftw_mpi_plan_dft_c2r_3d(PMGRID, PMGRID, PMGRID, fft_of_rhogrid, rhogrid, MPI_COMM_WORLD, FFTW_PATIENT | FFTW_MPI_TRANSPOSED_IN); fftw_plan_exists = true; // use C99 bool type if(ThisTask == 0) printf("Created new FFTW3 plan.\n"); } else { /* do nothing, the plan has already been created by previous call to this function */ } #endif /* For FFTW3, there is a different convention for fftsize for real-to-complex transforms, i.e. fftsize is the size of the complex data (number of complex values), NOT the size of the real data! We attempt to take care of this by defining fftsize to be fftsize_real when using FFTW3. */ for(i = 0; i < dimx * dimy * dimz; i++) workspace[i] = 0; for(i = 0; i < NumPart; i++) { slab_x = to_slab_fac * P[i].Pos[0]; if(slab_x >= PMGRID) slab_x = PMGRID - 1; dx = to_slab_fac * P[i].Pos[0] - slab_x; slab_x -= meshmin[0]; slab_xx = slab_x + 1; slab_y = to_slab_fac * P[i].Pos[1]; if(slab_y >= PMGRID) slab_y = PMGRID - 1; dy = to_slab_fac * P[i].Pos[1] - slab_y; slab_y -= meshmin[1]; slab_yy = slab_y + 1; slab_z = to_slab_fac * P[i].Pos[2]; if(slab_z >= PMGRID) slab_z = PMGRID - 1; dz = to_slab_fac * P[i].Pos[2] - slab_z; slab_z -= meshmin[2]; slab_zz = slab_z + 1; workspace[(slab_x * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz); workspace[(slab_x * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * dy * (1.0 - dz); workspace[(slab_x * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * dz; workspace[(slab_x * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * dy * dz; workspace[(slab_xx * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (dx) * (1.0 - dy) * (1.0 - dz); workspace[(slab_xx * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (dx) * dy * (1.0 - dz); workspace[(slab_xx * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (dx) * (1.0 - dy) * dz; workspace[(slab_xx * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (dx) * dy * dz; } for(i = 0; i < fftsize; i++) /* clear local density field */ rhogrid[i] = 0; for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */ { sendTask = ThisTask; recvTask = ThisTask ^ level; if(recvTask < NTask) { /* check how much we have to send */ sendmin = 2 * PMGRID; sendmax = -1; for(slab_x = meshmin[0]; slab_x < meshmax[0] + 2; slab_x++) if(slab_to_task[slab_x % PMGRID] == recvTask) { if(slab_x < sendmin) sendmin = slab_x; if(slab_x > sendmax) sendmax = slab_x; } if(sendmax == -1) sendmin = 0; /* check how much we have to receive */ recvmin = 2 * PMGRID; recvmax = -1; for(slab_x = meshmin_list[3 * recvTask]; slab_x < meshmax_list[3 * recvTask] + 2; slab_x++) if(slab_to_task[slab_x % PMGRID] == sendTask) { if(slab_x < recvmin) recvmin = slab_x; if(slab_x > recvmax) recvmax = slab_x; } if(recvmax == -1) recvmin = 0; if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0) /* ok, we have a contribution to the slab */ { recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 2; recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 2; recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 2; if(level > 0) { MPI_Sendrecv(workspace + (sendmin - meshmin[0]) * dimy * dimz, (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_PERIODIC_A, forcegrid, (recvmax - recvmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_PERIODIC_A, MPI_COMM_WORLD, &status); } else { memcpy(forcegrid, workspace + (sendmin - meshmin[0]) * dimy * dimz, (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real)); } for(slab_x = recvmin; slab_x <= recvmax; slab_x++) { slab_xx = (slab_x % PMGRID) - first_slab_of_task[ThisTask]; if(slab_xx >= 0 && slab_xx < slabs_per_task[ThisTask]) { for(slab_y = meshmin_list[3 * recvTask + 1]; slab_y <= meshmax_list[3 * recvTask + 1] + 1; slab_y++) { slab_yy = slab_y; if(slab_yy >= PMGRID) slab_yy -= PMGRID; for(slab_z = meshmin_list[3 * recvTask + 2]; slab_z <= meshmax_list[3 * recvTask + 2] + 1; slab_z++) { slab_zz = slab_z; if(slab_zz >= PMGRID) slab_zz -= PMGRID; rhogrid[PMGRID * PMGRID2 * slab_xx + PMGRID2 * slab_yy + slab_zz] += forcegrid[((slab_x - recvmin) * recv_dimy + (slab_y - meshmin_list[3 * recvTask + 1])) * recv_dimz + (slab_z - meshmin_list[3 * recvTask + 2])]; } } } } } } } #ifdef DEBUG_FFT double norm_density = 0.; for(i = 0; i < fftsize; i++) { norm_density += rhogrid[i]*rhogrid[i]; } /* Write out rhogrid to a 'fft-snapshot' file */ if (ThisTask == 0) { FILE *fp; /* Print the norm of the fft */ printf("L2-norm of density: %f\n", norm_density); printf("First five values of density: %f, %f, %f, %f, %f\n", rhogrid[0],rhogrid[1],rhogrid[2],rhogrid[3], rhogrid[4]); } #endif /* Do the FFT of the density field */ #ifdef FFTW3 fftw_execute_dft_r2c(fft_forward_plan, rhogrid, fft_of_rhogrid); #else rfftwnd_mpi(fft_forward_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER); #endif #ifdef DEBUG_FFT double norm_complex = 0.; for(i = 0; i < fftsize; i++) { norm_complex += rhogrid[i]*rhogrid[i]; } /* Write out rhogrid to a 'fft-snapshot' file */ if (ThisTask == 0) { FILE *fp; /* Print the norm of the fft */ printf("L2-norm of complex rhogrid: %f\n", norm_complex); printf("First two values of complex fft: %f + i*%f, %f + i*%f\n", rhogrid[0],rhogrid[1],rhogrid[2],rhogrid[3]); } #endif /* multiply with Green's function for the potential */ for(y = slabstart_y; y < slabstart_y + nslab_y; y++) for(x = 0; x < PMGRID; x++) for(z = 0; z < PMGRID / 2 + 1; z++) { if(x > PMGRID / 2) kx = x - PMGRID; else kx = x; if(y > PMGRID / 2) ky = y - PMGRID; else ky = y; if(z > PMGRID / 2) kz = z - PMGRID; else kz = z; k2 = kx * kx + ky * ky + kz * kz; if(k2 > 0) { smth = -exp(-k2 * asmth2) / k2; /* do deconvolution */ fx = fy = fz = 1; if(kx != 0) { fx = (M_PI * kx) / PMGRID; fx = sin(fx) / fx; } if(ky != 0) { fy = (M_PI * ky) / PMGRID; fy = sin(fy) / fy; } if(kz != 0) { fz = (M_PI * kz) / PMGRID; fz = sin(fz) / fz; } ff = 1 / (fx * fy * fz); smth *= ff * ff * ff * ff; /* end deconvolution */ ip = PMGRID * (PMGRID / 2 + 1) * (y - slabstart_y) + (PMGRID / 2 + 1) * x + z; c_re(fft_of_rhogrid[ip]) *= smth; c_im(fft_of_rhogrid[ip]) *= smth; } } if(slabstart_y == 0) c_re(fft_of_rhogrid[0]) = c_im(fft_of_rhogrid[0]) = 0.0; /* Do the FFT to get the potential */ #ifdef FFTW3 fftw_execute_dft_c2r(fft_inverse_plan, fft_of_rhogrid, rhogrid); /* Now normalize the output for(i = 0; i < fftsize; i++) rhogrid[i] = rhogrid[i] / (PMGRID*PMGRID*PMGRID); */ #else rfftwnd_mpi(fft_inverse_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER); #endif #ifdef DEBUG_FFT double norm = 0.; for(i = 0; i < fftsize; i++) { norm += rhogrid[i]*rhogrid[i]; } /* Write out rhogrid to a 'fft-snapshot' file */ if (ThisTask == 0) { FILE *fp; /* Print the norm of the fft */ printf("L2-norm of rhogrid: %f\n", norm); printf("First five values of fft: %f, %f, %f, %f, %f\n\n", rhogrid[0],rhogrid[1],rhogrid[2],rhogrid[3],rhogrid[4]); /* fp = fopen("rhogrid.0", "wb"); /* add suffix to indicate which node this is */ /* fwrite(rhogrid, sizeof(rhogrid[0]), fftsize, fp);*/ /* fclose(fp); */ } #endif /* Now rhogrid holds the potential */ /* construct the potential for the local patch */ dimx = meshmax[0] - meshmin[0] + 6; dimy = meshmax[1] - meshmin[1] + 6; dimz = meshmax[2] - meshmin[2] + 6; for(level = 0; level < (1 << PTask); level++) /* note: for level=0, target is the same task */ { sendTask = ThisTask; recvTask = ThisTask ^ level; if(recvTask < NTask) { /* check how much we have to send */ sendmin = 2 * PMGRID; sendmax = -PMGRID; for(slab_x = meshmin_list[3 * recvTask] - 2; slab_x < meshmax_list[3 * recvTask] + 4; slab_x++) if(slab_to_task[(slab_x + PMGRID) % PMGRID] == sendTask) { if(slab_x < sendmin) sendmin = slab_x; if(slab_x > sendmax) sendmax = slab_x; } if(sendmax == -PMGRID) sendmin = sendmax + 1; /* check how much we have to receive */ recvmin = 2 * PMGRID; recvmax = -PMGRID; for(slab_x = meshmin[0] - 2; slab_x < meshmax[0] + 4; slab_x++) if(slab_to_task[(slab_x + PMGRID) % PMGRID] == recvTask) { if(slab_x < recvmin) recvmin = slab_x; if(slab_x > recvmax) recvmax = slab_x; } if(recvmax == -PMGRID) recvmin = recvmax + 1; if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0) /* ok, we have a contribution to the slab */ { recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 6; recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 6; recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 6; ncont = 1; cont_sendmin[0] = sendmin; cont_sendmax[0] = sendmax; cont_sendmin[1] = sendmax + 1; cont_sendmax[1] = sendmax; cont_recvmin[0] = recvmin; cont_recvmax[0] = recvmax; cont_recvmin[1] = recvmax + 1; cont_recvmax[1] = recvmax; for(slab_x = sendmin; slab_x <= sendmax; slab_x++) { if(slab_to_task[(slab_x + PMGRID) % PMGRID] != ThisTask) { /* non-contiguous */ cont_sendmax[0] = slab_x - 1; while(slab_to_task[(slab_x + PMGRID) % PMGRID] != ThisTask) slab_x++; cont_sendmin[1] = slab_x; ncont++; } } for(slab_x = recvmin; slab_x <= recvmax; slab_x++) { if(slab_to_task[(slab_x + PMGRID) % PMGRID] != recvTask) { /* non-contiguous */ cont_recvmax[0] = slab_x - 1; while(slab_to_task[(slab_x + PMGRID) % PMGRID] != recvTask) slab_x++; cont_recvmin[1] = slab_x; if(ncont == 1) ncont++; } } for(rep = 0; rep < ncont; rep++) { sendmin = cont_sendmin[rep]; sendmax = cont_sendmax[rep]; recvmin = cont_recvmin[rep]; recvmax = cont_recvmax[rep]; /* prepare what we want to send */ if(sendmax - sendmin >= 0) { for(slab_x = sendmin; slab_x <= sendmax; slab_x++) { slab_xx = ((slab_x + PMGRID) % PMGRID) - first_slab_of_task[ThisTask]; for(slab_y = meshmin_list[3 * recvTask + 1] - 2; slab_y < meshmax_list[3 * recvTask + 1] + 4; slab_y++) { slab_yy = (slab_y + PMGRID) % PMGRID; for(slab_z = meshmin_list[3 * recvTask + 2] - 2; slab_z < meshmax_list[3 * recvTask + 2] + 4; slab_z++) { slab_zz = (slab_z + PMGRID) % PMGRID; forcegrid[((slab_x - sendmin) * recv_dimy + (slab_y - (meshmin_list[3 * recvTask + 1] - 2))) * recv_dimz + slab_z - (meshmin_list[3 * recvTask + 2] - 2)] = rhogrid[PMGRID * PMGRID2 * slab_xx + PMGRID2 * slab_yy + slab_zz]; } } } } if(level > 0) { MPI_Sendrecv(forcegrid, (sendmax - sendmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_PERIODIC_B, workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz, (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE, recvTask, TAG_PERIODIC_B, MPI_COMM_WORLD, &status); } else { memcpy(workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz, forcegrid, (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real)); } } } } } dimx = meshmax[0] - meshmin[0] + 2; dimy = meshmax[1] - meshmin[1] + 2; dimz = meshmax[2] - meshmin[2] + 2; recv_dimx = meshmax[0] - meshmin[0] + 6; recv_dimy = meshmax[1] - meshmin[1] + 6; recv_dimz = meshmax[2] - meshmin[2] + 6; for(dim = 0; dim < 3; dim++) /* Calculate each component of the force. */ { /* get the force component by finite differencing the potential */ /* note: "workspace" now contains the potential for the local patch, plus a suffiently large buffer region */ for(x = 0; x < meshmax[0] - meshmin[0] + 2; x++) for(y = 0; y < meshmax[1] - meshmin[1] + 2; y++) for(z = 0; z < meshmax[2] - meshmin[2] + 2; z++) { xrr = xll = xr = xl = x; yrr = yll = yr = yl = y; zrr = zll = zr = zl = z; switch (dim) { case 0: xr = x + 1; xrr = x + 2; xl = x - 1; xll = x - 2; break; case 1: yr = y + 1; yl = y - 1; yrr = y + 2; yll = y - 2; break; case 2: zr = z + 1; zl = z - 1; zrr = z + 2; zll = z - 2; break; } forcegrid[(x * dimy + y) * dimz + z] = fac * ((4.0 / 3) * (workspace[((xl + 2) * recv_dimy + (yl + 2)) * recv_dimz + (zl + 2)] - workspace[((xr + 2) * recv_dimy + (yr + 2)) * recv_dimz + (zr + 2)]) - (1.0 / 6) * (workspace[((xll + 2) * recv_dimy + (yll + 2)) * recv_dimz + (zll + 2)] - workspace[((xrr + 2) * recv_dimy + (yrr + 2)) * recv_dimz + (zrr + 2)])); } /* read out the forces */ for(i = 0; i < NumPart; i++) { slab_x = to_slab_fac * P[i].Pos[0]; if(slab_x >= PMGRID) slab_x = PMGRID - 1; dx = to_slab_fac * P[i].Pos[0] - slab_x; slab_x -= meshmin[0]; slab_xx = slab_x + 1; slab_y = to_slab_fac * P[i].Pos[1]; if(slab_y >= PMGRID) slab_y = PMGRID - 1; dy = to_slab_fac * P[i].Pos[1] - slab_y; slab_y -= meshmin[1]; slab_yy = slab_y + 1; slab_z = to_slab_fac * P[i].Pos[2]; if(slab_z >= PMGRID) slab_z = PMGRID - 1; dz = to_slab_fac * P[i].Pos[2] - slab_z; slab_z -= meshmin[2]; slab_zz = slab_z + 1; acc_dim = forcegrid[(slab_x * dimy + slab_y) * dimz + slab_z] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz); acc_dim += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_z] * (1.0 - dx) * dy * (1.0 - dz); acc_dim += forcegrid[(slab_x * dimy + slab_y) * dimz + slab_zz] * (1.0 - dx) * (1.0 - dy) * dz; acc_dim += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_zz] * (1.0 - dx) * dy * dz; acc_dim += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_z] * (dx) * (1.0 - dy) * (1.0 - dz); acc_dim += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_z] * (dx) * dy * (1.0 - dz); acc_dim += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_zz] * (dx) * (1.0 - dy) * dz; acc_dim += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_zz] * (dx) * dy * dz; P[i].GravPM[dim] = acc_dim; } } pm_init_periodic_free(); force_treeallocate(All.TreeAllocFactor * All.MaxPart, All.MaxPart); All.NumForcesSinceLastDomainDecomp = 1 + All.TotNumPart * All.TreeDomainUpdateFrequency; if(ThisTask == 0) { printf("done PM.\n"); fflush(stdout); } #ifdef FFTW3 if(fftw_plan_exists) { /* macro defined in callgrind.h */ // CALLGRIND_STOP_INSTRUMENTATION; } #else // CALLGRIND_STOP_INSTRUMENTATION; #endif }
void FFTHandler::reverse(double* inout){ fftw_execute_dft_c2r(fftReversePlan,(fftw_complex*)inout, inout); //normalize the reverse transform for(long ii =0; ii < n; ++ii) inout[ii]/=n; }