コード例 #1
0
ファイル: output_vtk.c プロジェクト: rmoleary/snoopy
void remap_output(	double wri[], 
					const double t) {
					
	int i,j,k;
	double tvelocity;
	double tremap;
	complex double wexp;
	complex double phase;
	double complex		*w2d;
	
	DEBUG_START_FUNC;
	
	w2d = (double complex *) fftw_malloc( sizeof(double complex) * (NY/2+1) * NZ );
	if (w2d == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w2d allocation");
	
#ifdef TIME_DEPENDANT_SHEAR
	tremap = time_shift(t);
	tvelocity = 0.0;
#else	
	tremap = time_shift(t);
	tvelocity = fmod(t, 2.0 * param.ly / (param.shear * param.lx));
#endif	
	
	for( i = 0 ; i < NX/NPROC ; i++) {
#ifdef WITH_2D
		fftw_execute_dft_r2c(fft_1d_forward, wri + i*(NY+2), w2d);
#else
		fftw_execute_dft_r2c(fft_1d_forward, wri + i*(NZ+2)*NY, w2d);
#endif
		for( j = 0 ; j < NY/2+1 ; j++) {
			phase = (double complex) ((2.0 * M_PI) / param.ly *  ((double) j )  * 
									( ((double) (i + rank * (NX/NPROC)) / (double) NX ) * tremap - tvelocity / 2.0 ) * param.lx * param.shear);
			
			//printf("phase=%g + I %g\n",creal(phase), cimag(phase));
			
			wexp = cexp( I * phase)/NY;
			
			//printf("wexp=%g + I %g\n",creal(wexp), cimag(wexp));

			for( k = 0 ; k < NZ; k++) {
				w2d[ k + j * NZ ] = wexp * w2d[ k + j * NZ ];
			}
		}
#ifdef WITH_2D
		fftw_execute_dft_c2r(fft_1d_backward, w2d, wri + i*(NY+2));
#else
		fftw_execute_dft_c2r(fft_1d_backward, w2d, wri + i*(NZ+2)*NY);
#endif
	}
	
	fftw_free(w2d);
	
	DEBUG_END_FUNC;
	
	return;
}
コード例 #2
0
ファイル: hankel.c プロジェクト: ebernierResearch/rssa
static void hankel_matmul(double* out,
                          const double* v,
                          const void* matrix) {
  const hankel_matrix *h = matrix;
  R_len_t N = h->length, L = h->window;
  R_len_t K = N - L + 1, i;
  double *circ;
  fftw_complex *ocirc;

  /* Allocate needed memory */
  circ = (double*) fftw_malloc(N * sizeof(double));
  ocirc = (fftw_complex*) fftw_malloc((N/2 + 1) * sizeof(fftw_complex));

  /* Fill the arrays */
  for (i = 0; i < K; ++i)
    circ[i] = v[K - i - 1];
  memset(circ + K, 0, (L - 1)*sizeof(double));

  /* Compute the FFT of the reversed vector v */
  fftw_execute_dft_r2c(h->r2c_plan, circ, ocirc);

  /* Dot-multiply with pre-computed FFT of toeplitz circulant */
  for (i = 0; i < (N/2 + 1); ++i)
    ocirc[i] = ocirc[i] * h->circ_freq[i];

  /* Compute the reverse transform to obtain result */
  fftw_execute_dft_c2r(h->c2r_plan, ocirc, circ);

  /* Cleanup and return */
  for (i = 0; i < L; ++i)
    out[i] = circ[i] / N;

  fftw_free(circ);
  fftw_free(ocirc);
}
コード例 #3
0
ファイル: hbhankel.c プロジェクト: asl/rssa
static void convolveNd_half(const fftw_complex *ox,
                            double *y,
                            R_len_t rank,
                            const R_len_t *N,
                            int conjugate,
                            fftw_plan r2c_plan,
                            fftw_plan c2r_plan) {
  R_len_t i;
  fftw_complex *oy;
  R_len_t pN = prod(rank, N), phN = hprod(rank, N);

  /* Allocate needed memory */
  oy = (fftw_complex*) fftw_malloc(phN * sizeof(fftw_complex));

  /* Compute the Nd-FFT of the matrix y */
  fftw_execute_dft_r2c(r2c_plan, y, oy);

  /* Compute conjugation if needed */
  if (conjugate)
    for (i = 0; i < phN; ++i)
      oy[i] = conj(oy[i]);

  /* Dot-multiply ox and oy, and divide by Nx*...*Nz*/
  for (i = 0; i < phN; ++i)
    oy[i] *= ox[i] / pN;

  /* Compute the reverse transform to obtain result */
  fftw_execute_dft_c2r(c2r_plan, oy, y);

  /* Cleanup */
  fftw_free(oy);
}
コード例 #4
0
ファイル: gfft.c プロジェクト: adamdempsey90/Snoopy
void gfft_c2r(double complex *win){
	double *wrin = (double *) win;
	fft_timer = fft_timer - get_c_time();
	fftw_execute_dft_c2r(c2rfft, win, wrin);
	fft_timer = fft_timer + get_c_time();
	return;
}
コード例 #5
0
void FFTConvolver::ConvolveSameSize(double* image, const double* kernel, size_t imgWidth, size_t imgHeight)
{
	const size_t imgSize = imgWidth * imgHeight;
	const size_t complexSize = (imgWidth/2+1) * imgHeight;
	double* tempData = reinterpret_cast<double*>(fftw_malloc(imgSize * sizeof(double)));
	fftw_complex* fftImageData = reinterpret_cast<fftw_complex*>(fftw_malloc(complexSize * sizeof(fftw_complex)));
	fftw_complex* fftKernelData = reinterpret_cast<fftw_complex*>(fftw_malloc(complexSize * sizeof(fftw_complex)));
	
	boost::mutex::scoped_lock lock(_mutex);
	fftw_plan inToFPlan = fftw_plan_dft_r2c_2d(imgHeight, imgWidth, tempData, fftImageData, FFTW_ESTIMATE);
	fftw_plan fToOutPlan = fftw_plan_dft_c2r_2d(imgHeight, imgWidth, fftImageData, tempData, FFTW_ESTIMATE);
	lock.unlock();
	
	memcpy(tempData, image, imgSize * sizeof(double));
	fftw_execute_dft_r2c(inToFPlan, tempData, fftImageData);
	
	memcpy(tempData, kernel, imgSize * sizeof(double));
	fftw_execute_dft_r2c(inToFPlan, tempData, fftKernelData);
	
	double fact = 1.0/imgSize;
	for(size_t i=0; i!=complexSize; ++i)
		reinterpret_cast<std::complex<double>*>(fftImageData)[i] *= fact * reinterpret_cast<std::complex<double>*>(fftKernelData)[i];
		
	fftw_execute_dft_c2r(fToOutPlan, reinterpret_cast<fftw_complex*>(fftImageData), tempData);
	memcpy(image, tempData, imgSize * sizeof(double));
		
	fftw_free(fftImageData);
	fftw_free(fftKernelData);
	fftw_free(tempData);
	
	lock.lock();
	fftw_destroy_plan(inToFPlan);
	fftw_destroy_plan(fToOutPlan);
	lock.unlock();
}
コード例 #6
0
ファイル: artistic.c プロジェクト: jdegges/raster-buffet
fftw_complex* gen_sec (const int nx, const int ny,
                       fft_plans_t* f, const double a, const double wd,
                       const double* g1, const fftw_complex* g2) {
    int i, j, x, y;
    double total = 0;
    double* out;

    double* sec_d = fftw_malloc (sizeof(double)*ny*2*(nx/2+1));
    if (NULL == sec_d) {
      return NULL;
    }
    fftw_complex* sec = (fftw_complex*)sec_d;
    memset(sec_d, 0, sizeof(double)*ny*2*(nx/2+1));
    for (j = 0; j < ny; j++) {
        for (i = 0; i < nx; i++) {
            register double x_val = -nx/2.0 + (i) * (double)nx/(double)(nx-1);
            register double y_val =  ny/2.0 - (j) * (double)ny/(double)(ny-1);

            sec_d[i+j*2*(nx/2+1)] = (x_val*cos(a-wd+M_PI/2.0) + y_val*sin(a-wd+M_PI/2.0) >  0 ? 1 : 0) *
                                    (x_val*cos(a+wd+M_PI/2.0) + y_val*sin(a+wd+M_PI/2.0) <= 0 ? 1 : 0);
        }
    }
    fftw_execute_dft_r2c (f->forward, sec_d, sec);

    for (i = 0; i < ny*(nx/2+1); i++) {
        sec[i] *= g2[i];
    }

    fftw_execute_dft_c2r (f->backward, sec, sec_d);

    if (NULL == (out = fftw_malloc (sizeof(double)*ny*2*(nx/2+1)))) {
      return NULL;
    }
    x = nx/2;
    y = ny/2;
    for (j = 0; j < ny; j++) {
        int k = (j - y) % ny;
        k = k < 0 ? ny + k : k;
        for (i = 0; i < nx; i++) {
            int h = (i - x) % nx;
            h = h < 0 ? nx + h : h;
            out[i+j*2*(nx/2+1)] = sec_d[h+k*2*(nx/2+1)] / (nx*ny) * g1[i+j*nx];
            total += out[i+j*2*(nx/2+1)];
        }
    }

    for (i = 0; i < ny*2*(nx/2+1); i++) {
        out[i] /= total;
    }

    fftw_execute_dft_r2c (f->forward, out, (fftw_complex*)out);

    fftw_free (sec);
    return (fftw_complex*)out;
}
コード例 #7
0
ファイル: hankel.c プロジェクト: ebernierResearch/rssa
static R_INLINE void hankelize_fft(double *F,
                                   const double *U, const double *V,
                                   const hankel_matrix *h) {
  R_len_t N = h->length, L = h->window;
  R_len_t K = N - L + 1;
  R_len_t i;
  double *iU, *iV;
  fftw_complex *cU, *cV;

  /* Allocate needed memory */
  iU = (double*) fftw_malloc(N * sizeof(double));
  iV = (double*) fftw_malloc(N * sizeof(double));
  cU = (fftw_complex*) fftw_malloc((N/2 + 1) * sizeof(fftw_complex));
  cV = (fftw_complex*) fftw_malloc((N/2 + 1) * sizeof(fftw_complex));

  /* Fill in buffers */
  memcpy(iU, U, L*sizeof(double));
  memset(iU+L, 0, (K - 1)*sizeof(double));

  memcpy(iV, V, K*sizeof(double));
  memset(iV+K, 0, (L - 1)*sizeof(double));

  /* Compute the FFTs */
  fftw_execute_dft_r2c(h->r2c_plan, iU, cU);
  fftw_execute_dft_r2c(h->r2c_plan, iV, cV);

  /* Dot-multiply */
  for (i = 0; i < N/2 + 1; ++i)
    cU[i] = cU[i] * cV[i];

  /* Compute the inverse FFT */
  fftw_execute_dft_c2r(h->c2r_plan, cU, iU);

  /* Form the result */
  for (i = 0; i < N; ++i) {
    R_len_t leftu, rightu, l;

    if (i < L) leftu = i; else leftu = L - 1;
    if (i < K) rightu = 0; else  rightu = i - K + 1;

    l = (leftu - rightu + 1);

    F[i] = iU[i] / l / N;
  }

  fftw_free(iU);
  fftw_free(iV);
  fftw_free(cU);
  fftw_free(cV);
}
コード例 #8
0
ファイル: hbhankel.c プロジェクト: davyfeng/rssa
static void hbhankel_tmatmul(double* out,
                             const double* v,
                             const void* matrix) {
  const hbhankel_matrix *h = matrix;
  R_len_t Nx = h->length.x, Ny = h->length.y;
  R_len_t Lx = h->window.x, Ly = h->window.y;
  R_len_t Kx = Nx - Lx + 1, Ky = Ny - Ly + 1, i, j;
  double *circ;
  fftw_complex *ocirc;

  /* Allocate needed memory */
  circ = (double*) fftw_malloc(Nx * Ny * sizeof(double));
  ocirc = (fftw_complex*) fftw_malloc(Ny*(Nx / 2 + 1) * sizeof(fftw_complex));

  /*
  revv <- matrix(c(rep(0, C$Lx*(C$Ky-1)), rev(v)), C$Lx, ncol(C$Cblock));
  revv <- rbind(matrix(0, (C$Kx-1), ncol(revv)), revv);

  mult <- fft(C$Cblock * fft(revv), inverse = TRUE);

  Re((mult/(prod(dim(C$Cblock))))[C$Lx:(C$Lx+C$Kx-1),C$Ly:(C$Ly+C$Ky-1)]); */

  /* Fill the arrays */
  memset(circ, 0, Nx * Ny * sizeof(double));
  for (j = 0; j < Ly; ++j)
    for (i = 0; i < Lx; ++i)
      circ[(i + Kx - 1) + (j + Ky - 1)*Nx] = v[Lx*Ly - i - j*Lx - 1];

  /* Compute the FFT of the reversed vector v */
  fftw_execute_dft_r2c(h->r2c_plan, circ, ocirc);

  /* Dot-multiply with pre-computed FFT of toeplitz circulant */
  for (i = 0; i < Ny * (Nx/2 + 1); ++i)
    ocirc[i] = ocirc[i] * h->circ_freq[i];

  /* Compute the reverse transform to obtain result */
  fftw_execute_dft_c2r(h->c2r_plan, ocirc, circ);

  /* Cleanup and return */
  for (j = 0; j < Ky; ++j)
    for (i = 0; i < Kx; ++i)
      out[i + j*Kx] = circ[(i + Lx - 1) + (j + Ly - 1)*Nx] / (Nx*Ny);

  fftw_free(circ);
  fftw_free(ocirc);
}
コード例 #9
0
ファイル: s_ncc_fft.cpp プロジェクト: agravgaard/Plastimatch
void
s_ncc_fft_run (FATM_Options* fopt)
{
    S_Ncc_Fft_Data* udp = (S_Ncc_Fft_Data*) fopt->alg_data;
    int fft_nx = fopt->sig_rect_scan.dims[1];    /* In fftw3, nx is rows */
    int fft_ny = fopt->sig_rect_scan.dims[0];    /* In fftw3, ny is cols */
    int fftw_size = fft_nx * (fft_ny/2+1);
    int i;

    /* Make integral images, etc. */
    s_ncc_fft_scorewin_initialize (fopt);

    /* Take fft of signal */
    fftw_execute_dft_r2c (udp->sig_fftw3_plan, 
	(double*) fopt->sig.data, udp->sig_fft);

    /* Debugging info */
    dump_fft (udp->sig_fft, fft_nx, fft_ny, "sig_fft.txt");

    /* Multiply fft of signal by fft of pattern */
    for (i = 0; i < fftw_size; i++) {
	double re = udp->sig_fft[i][0] * udp->pat_fft[i][0] 
		    - udp->sig_fft[i][1] * udp->pat_fft[i][1];
	double im = udp->sig_fft[i][0] * udp->pat_fft[i][1] 
		    + udp->sig_fft[i][1] * udp->pat_fft[i][0];
	udp->sig_fft[i][0] = re;
	udp->sig_fft[i][1] = im;
    }

    /* Debugging info */
    dump_fft (udp->sig_fft, fft_nx, fft_ny, "sco_fft.txt");

    /* Take ifft of signal */
    fftw_execute_dft_c2r (udp->sco_fftw3_plan, 
	udp->sig_fft, udp->padded_score);

    dump_txt (udp->padded_score, fft_nx, fft_ny, "sco_ifftd.txt");

}
コード例 #10
0
ファイル: gfft.c プロジェクト: adamdempsey90/Snoopy
void gfft_c2r_t(double complex *win) {
	int i;
	double *wrin = (double *) win;
	
	fft_timer = fft_timer - get_c_time();
	
	// We now have an array with logical dimensions[NX_COMPLEX/NPROC, NY_COMPLEX, NZ_COMPLEX]
#ifdef _OPENMP
	#pragma omp parallel for private(i) schedule(static)	
#endif	
	for(i=0 ; i < NX_COMPLEX/NPROC ; i++) 
		fftw_execute_dft(c2r_1d, &win[i*NY_COMPLEX*NZ_COMPLEX],&win[i*NY_COMPLEX*NZ_COMPLEX]);
		
	// The logical dimensions of win are [NX_COMPLEX/NPROC, NY_COMPLEX, NZ_COMPLEX]
	// transpose it
	transpose_complex_XY(win, win);
	
	// The final 2D transform
	fftw_execute_dft_c2r(c2r_2d, win, wrin);
	 // and we're done !
	 
	 fft_timer = fft_timer + get_c_time();
	 return;
}
コード例 #11
0
ファイル: fftwcpp.cpp プロジェクト: the-nightling/fers-copy
void FFTRealInverse::transform(int size, Complex *in, rsFloat *out)
{
  if (plan == 0)
    throw FFTException("[BUG] Can not transform on NULL plan.");
  fftw_execute_dft_c2r((fftw_plan_s *)plan, reinterpret_cast<fftw_complex *>(in), out);
}
コード例 #12
0
void kemo_fftw_execute_dft_c2r(fftw_plan *plan, fftw_complex *cplx_in, double *dble_out){
	fftw_execute_dft_c2r(*plan, cplx_in, dble_out);
	return;
}
コード例 #13
0
ファイル: artistic.c プロジェクト: jdegges/raster-buffet
void artistic_smooth (uint8_t* src,
                      uint8_t* dst,
                      double ns,
                      double q,
                      int nx, int ny, int pitch,
                      fft_plans_t* f,
                      artistic_buf_t* ab)
{
    int i, j, k, x, y;
    const int width = 2*(nx/2+1);
    const double nxny = nx*ny;
    const double nxny2 = nxny*nxny;
    const int n = ny*(nx/2+1);
    const int n2 = n*2;

    (void) q;

    fftw_complex** src1_c = ab->src1;
    fftw_complex** src2_c = ab->src2;
    fftw_complex** s_c = ab->s;
    fftw_complex** m_c = ab->m;

    double** num = ab->num;
    double* den = ab->den;

    double* src1_d[3];
    double* src2_d[3];
    double* s_d[3];
    double* m_d[3];

    for (k = 0; k < 3; k++) {
        src1_d[k]   = (double*) src1_c[k];
        src2_d[k]   = (double*) src2_c[k];
        s_d[k]      = (double*) s_c[k];
        m_d[k]      = (double*) m_c[k];
    }
                     
    for (j = 0; j < ny; j++) {
        uint8_t* data = src + j*pitch;
        double* src1_ptr[] = {src1_d[0]+j*width, src1_d[1]+j*width, src1_d[2]+j*width};
        double* src2_ptr[] = {src2_d[0]+j*width, src2_d[1]+j*width, src2_d[2]+j*width};

        for (i = 0; i < nx; i++) {
            for (k = 0; k < 3; k++) {
                register double temp = *src1_ptr[k]++ = *data++;
                *src2_ptr[k]++ = temp*temp;
            }
        }
    }

    for (k = 0; k < 3; k++) {
        fftw_execute_dft_r2c (f->forward, src1_d[k], src1_c[k]);
        fftw_execute_dft_r2c (f->forward, src2_d[k], src2_c[k]);
    }

    for (k = 0; k < 3; k++) {
        memset (num[k], 0, sizeof(double)*n2);
    }
    memset (den, 0, sizeof(double)*n2);

    for (i = 0; i < ns; i++) {
        fftw_complex* g_fft = ab->g[i];

        multiply_6_c(m_c[0], src1_c[0],
                     m_c[1], src1_c[1],
                     m_c[2], src1_c[2],
                     s_c[0], src2_c[0],
                     s_c[1], src2_c[1],
                     s_c[2], src2_c[2],
                     g_fft, n);

        for (k = 0; k < 3; k++) {
            fftw_execute_dft_c2r (f->backward, m_c[k], m_d[k]);
            fftw_execute_dft_c2r (f->backward, s_c[k], s_d[k]);
        }

        mesh_d (s_d[0], s_d[1], s_d[2], m_d[0], m_d[1], m_d[2], nxny, nxny2,
                num[0], num[1], num[2], den, n2);
    }

    for (k = 0; k < 3; k++) {
        divide_mul_const_d(num[k], den, nxny, n2);
    }

    x = nx/2.0;
    y = ny/2.0;
    for (j = 0; j < ny; j++) {
        int k = (j - y) % ny;
        k = (k < 0 ? ny + k : k) * 2*(nx/2+1);
        uint8_t* d = dst + j*pitch;
        for (i = 0; i < nx; i++) {
            int z;
            int h = (i - x) % nx;
            h = (h < 0 ? nx + h : h) + k;
            for (z = 0; z < 3; z++) {
                *d++ = num[z][h];
            }
        }
    }
}
コード例 #14
0
ファイル: im_invfftr.c プロジェクト: Ikulagin/transmem
/* Complex to real inverse transform.
 */
static int 
invfft1( IMAGE *dummy, IMAGE *in, IMAGE *out )
{
	IMAGE *cmplx = im_open_local( dummy, "invfft1-1", "t" );
	IMAGE *real = im_open_local( out, "invfft1-2", "t" );
	const int half_width = in->Xsize / 2 + 1;

	/* Transform to halfcomplex here.
	 */
	double *half_complex = IM_ARRAY( dummy, 
		in->Ysize * half_width * 2, double );

	/* We have to have a separate real buffer for the planner to work on.
	 */
	double *planner_scratch = IM_ARRAY( dummy, 
		in->Ysize * half_width * 2, double );

	fftw_plan plan;
	int x, y;
	double *q, *p;

	if( !cmplx || !real || !half_complex || im_pincheck( in ) || 
		im_poutcheck( out ) )
		return( -1 );
	if( in->Coding != IM_CODING_NONE || in->Bands != 1 ) {
                im_error( "im_invfft", 
			"%s", _( "one band uncoded only" ) );
                return( -1 );
	}

	/* Make dp complex image for input.
	 */
	if( im_clip2fmt( in, cmplx, IM_BANDFMT_DPCOMPLEX ) )
                return( -1 );

	/* Make mem buffer real image for output.
	 */
        if( im_cp_desc( real, in ) )
                return( -1 );
	real->BandFmt = IM_BANDFMT_DOUBLE;
        if( im_setupout( real ) )
                return( -1 );

	/* Build half-complex image.
	 */
	q = half_complex;
	for( y = 0; y < cmplx->Ysize; y++ ) {
		p = ((double *) cmplx->data) + y * in->Xsize * 2; 

		for( x = 0; x < half_width; x++ ) {
			q[0] = p[0];
			q[1] = p[1];
			p += 2;
			q += 2;
		}
	}

	/* Make the plan for the transform. Yes, they really do use nx for
	 * height and ny for width.
	 */
	if( !(plan = fftw_plan_dft_c2r_2d( in->Ysize, in->Xsize,
		(fftw_complex *) planner_scratch, (double *) real->data,
		0 )) ) {
                im_error( "im_invfft", 
			"%s", _( "unable to create transform plan" ) );
		return( -1 );
	}

	fftw_execute_dft_c2r( plan,
		(fftw_complex *) half_complex, (double *) real->data );

	fftw_destroy_plan( plan );

	/* Copy to out.
	 */
        if( im_copy( real, out ) )
                return( -1 );

	return( 0 );
}
コード例 #15
0
void gravity_calculate_acceleration(void){
	// Setting up the grid
	if (gravity_fft_init_done==0){
		gravity_fft_init();
		gravity_fft_init_done=1;
	}
#pragma omp parallel for schedule(guided)
	for (int i=0; i<N; i++){
		particles[i].ax = 0; 
		particles[i].ay = 0; 
		particles[i].az = 0; 
	}
	if (integrator == SEI){
		struct ghostbox gb = boundaries_get_ghostbox(1,0,0);
		shift_shear = gb.shifty;
	}
	gravity_fft_p2grid();
	
	if (integrator == SEI){
		// Remap in fourier space to deal with shearing sheet boundary conditions.
		gravity_fft_remap(density_r, 1);
	}
	
	fftw_execute_dft_r2c(r2cfft, density_r, (fftw_complex*)density);
	
	
	// Inverse Poisson equation
	
	for(int i = 0 ; i < grid_NCOMPLEX ; i++) {
		if (integrator == SEI){
			// Compute time-dependent wave-vectors
			kxt[i] = kx[i] + shift_shear/boxsize_y * ky[i];
			k[i]  = sqrt( kxt[i]*kxt[i] + ky[i] * ky[i]);
			// we will use 1/k, that prevents singularity 
			// (the k=0 is set to zero by renormalization...)
			if ( k[i] == 0.0 ) k[i] = 1.0; 
		}
		double q0 = - 2.0 * M_PI * density[2*i] / (k[i] * root_nx * root_ny);
		double q1 = - 2.0 * M_PI * density[2*i+1] / (k[i] * root_nx * root_ny);
		double sinkxt = sin(kxt[i] * dx);
		double sinky  = sin(ky[i] * dy);
		fx[2*i]		=   q1 * sinkxt / dx;		// Real part of Fx
		fx[2*i+1] 	= - q0 * sinkxt / dx;		// Imaginary part of Fx
		fy[2*i]		=   q1 * sinky  / dy;	
		fy[2*i+1] 	= - q0 * sinky  / dy;
	}
	
	// Transform back the force field
	fftw_execute_dft_c2r(c2rfft, (fftw_complex*)fx, fx);
	fftw_execute_dft_c2r(c2rfft, (fftw_complex*)fy, fy);
	
	if (integrator == SEI){
		// Remap in fourier space to deal with shearing sheet boundary conditions.
		gravity_fft_remap(fx, -1);
		gravity_fft_remap(fy, -1);
	}

	for(int i=0;i<N;i++){
		gravity_fft_grid2p(&(particles[i]));
	}
}
コード例 #16
0
void Transformer_CPU::transform_inverse_x(double *inout)
{
	fftw_execute_dft_c2r(plan_x_c2r, (fftw_complex*)inout, (double*)inout);
}
コード例 #17
0
ファイル: hbhankel.c プロジェクト: davyfeng/rssa
static R_INLINE void hbhankelize_fft(double *F,
                                     const double *U, const double *V,
                                     const hbhankel_matrix* h) {
  R_len_t Nx = h->length.x, Ny = h->length.y;
  R_len_t Lx = h->window.x, Ly = h->window.y;
  R_len_t Kx = Nx - Lx + 1, Ky = Ny - Ly + 1;
  R_len_t i, j;
  R_len_t wx, dwx, wy, dwy;

  double *iU, *iV;
  fftw_complex *cU, *cV;

  /* Allocate needed memory */
  iU = (double*) fftw_malloc(Nx * Ny * sizeof(double));
  iV = (double*) fftw_malloc(Nx * Ny * sizeof(double));
  cU = (fftw_complex*) fftw_malloc(Ny*(Nx / 2 + 1) * sizeof(fftw_complex));
  cV = (fftw_complex*) fftw_malloc(Ny*(Nx / 2 + 1) * sizeof(fftw_complex));

  /* Fill the arrays */
  memset(iU, 0, Nx * Ny * sizeof(double));
  for (j = 0; j < Ly; ++j)
    for (i = 0; i < Lx; ++i)
      iU[i + j*Nx] = U[i + j*Lx];

  memset(iV, 0, Nx * Ny * sizeof(double));
  for (j = 0; j < Ky; ++j)
    for (i = 0; i < Kx; ++i)
      iV[i + j*Nx] = V[i + j*Kx];

  /* Compute the FFTs */
  fftw_execute_dft_r2c(h->r2c_plan, iU, cU);
  fftw_execute_dft_r2c(h->r2c_plan, iV, cV);

   /* Dot-multiply */
  for (i = 0; i < Ny * (Nx/2 + 1); ++i)
    cU[i] = cU[i] * cV[i];

  /* Compute the inverse FFT */
  fftw_execute_dft_c2r(h->c2r_plan, cU, iU);

  /* Form the result */
  for (j = 0, wy = 1, dwy = 1; j < Ny; ++j, wy += dwy) {
    if (j == Ly - 1)
      dwy--;
    if (j == Ky - 1) /* Do not join two ifs! */
      dwy--;

    for (i = 0, wx = 1, dwx = 1; i < Nx; ++i, wx += dwx) {
      if (i == Lx - 1)
        dwx--;
      if (i == Kx - 1)
        dwx--;
      F[i+j*Nx] = iU[i+j*Nx] / wx / wy / Nx / Ny;
    }
  }

  fftw_free(iU);
  fftw_free(iV);
  fftw_free(cU);
  fftw_free(cV);
}
コード例 #18
0
ファイル: hbhankel.c プロジェクト: asl/rssa
SEXP convolveN(SEXP x, SEXP y,
               SEXP input_dim, SEXP output_dim,
               SEXP Conj) {
  SEXP x_dim = NILSXP, y_dim = NILSXP;
  R_len_t rank = length(input_dim);
  R_len_t *N = INTEGER(input_dim);
  R_len_t pN = prod(rank, N), phN = hprod(rank, N);
  int conjugate = LOGICAL(Conj)[0];

  fftw_complex *ox, *oy;
  fftw_plan r2c_plan, c2r_plan;
  double *circ;
  R_len_t *revN, r, i;

  /* Allocate needed memory */
  circ = (double*) fftw_malloc(pN * sizeof(double));
  ox = (fftw_complex*) fftw_malloc(phN * sizeof(fftw_complex));
  oy = (fftw_complex*) fftw_malloc(phN * sizeof(fftw_complex));

  /* Estimate the best plans for given input length, note, that input data is
     stored in column-major mode, that's why we're passing dimensions in
     *reverse* order */
  revN = Calloc(rank, R_len_t);
  for (r = 0; r < rank; ++r) revN[r] = N[rank - 1 - r];
  r2c_plan = fftw_plan_dft_r2c(rank, revN, circ, ox, FFTW_ESTIMATE);
  c2r_plan = fftw_plan_dft_c2r(rank, revN, ox, circ, FFTW_ESTIMATE);
  Free(revN);

  PROTECT(x_dim = getAttrib(x, R_DimSymbol));
  PROTECT(y_dim = getAttrib(y, R_DimSymbol));

  /* Fill input buffer by X values*/
  memset(circ, 0, pN * sizeof(double));
  fill_subarray(circ, REAL(x), rank, N, INTEGER(x_dim), 1);

  /* Run the plan on X-input data */
  fftw_execute_dft_r2c(r2c_plan, circ, ox);

  /* Fill input buffer by Y values*/
  memset(circ, 0, pN * sizeof(double));
  fill_subarray(circ, REAL(y), rank, N, INTEGER(y_dim), 1);

  /* Run the plan on Y-input data */
  fftw_execute_dft_r2c(r2c_plan, circ, oy);

  /* Compute conjugation if needed */
  if (conjugate)
    for (i = 0; i < phN; ++i)
      oy[i] = conj(oy[i]);

  /* Dot-multiply ox and oy, and divide by Nx*...*Nz*/
  for (i = 0; i < phN; ++i)
    oy[i] *= ox[i] / pN;

  /* Compute the reverse transform to obtain result */
  fftw_execute_dft_c2r(c2r_plan, oy, circ);

  SEXP res;
  PROTECT(res = allocVector(REALSXP, prod(rank, INTEGER(output_dim))));
  fill_subarray(circ, REAL(res), rank, N, INTEGER(output_dim), 0);
  /* setAttrib(output_dim, R_NamesSymbol, R_NilValue); */
  setAttrib(res, R_DimSymbol, output_dim);
  /* setAttrib(res, R_DimNamesSymbol, R_NilValue); */

  /* Cleanup */
  fftw_free(ox);
  fftw_free(oy);
  fftw_free(circ);

  /* Return */
  UNPROTECT(3);
  return res;
}
コード例 #19
0
ファイル: pm_periodic.c プロジェクト: huilin2014/cuda-gadget
/*! Calculates the long-range periodic force given the particle positions
 *  using the PM method.  The force is Gaussian filtered with Asmth, given in
 *  mesh-cell units. We carry out a CIC charge assignment, and compute the
 *  potenial by Fourier transform methods. The potential is finite differenced
 *  using a 4-point finite differencing formula, and the forces are
 *  interpolated tri-linearly to the particle positions. The CIC kernel is
 *  deconvolved. Note that the particle distribution is not in the slab
 *  decomposition that is used for the FFT. Instead, overlapping patches
 *  between local domains and FFT slabs are communicated as needed.
 */
void pmforce_periodic(void)
{
  double k2, kx, ky, kz, smth;
  double dx, dy, dz;
  double fx, fy, fz, ff;
  double asmth2, fac, acc_dim;
  int i, j, slab, level, sendTask, recvTask;
  int x, y, z, xl, yl, zl, xr, yr, zr, xll, yll, zll, xrr, yrr, zrr, ip, dim;
  int slab_x, slab_y, slab_z;
  int slab_xx, slab_yy, slab_zz;
  int meshmin[3], meshmax[3], sendmin, sendmax, recvmin, recvmax;
  int rep, ncont, cont_sendmin[2], cont_sendmax[2], cont_recvmin[2], cont_recvmax[2];
  int dimx, dimy, dimz, recv_dimx, recv_dimy, recv_dimz;
  MPI_Status status;


  if(ThisTask == 0)
    {
      printf("Starting periodic PM calculation.\n");
      fflush(stdout);
    }

  #ifdef FFTW3
  if(fftw_plan_exists)
  {
	  /* macro defined in callgrind.h */
	  // CALLGRIND_START_INSTRUMENTATION;
  }
  #else
  // CALLGRIND_START_INSTRUMENTATION;
  #endif

  force_treefree();


  asmth2 = (2 * M_PI) * All.Asmth[0] / All.BoxSize;
  asmth2 *= asmth2;

  fac = All.G / (M_PI * All.BoxSize);	/* to get potential */
  fac *= 1 / (2 * All.BoxSize / PMGRID);	/* for finite differencing */

  /* first, establish the extension of the local patch in the PMGRID  */

  for(j = 0; j < 3; j++)
    {
      meshmin[j] = PMGRID;
      meshmax[j] = 0;
    }

  for(i = 0; i < NumPart; i++)
    {
      for(j = 0; j < 3; j++)
	{
	  slab = to_slab_fac * P[i].Pos[j];
	  if(slab >= PMGRID)
	    slab = PMGRID - 1;

	  if(slab < meshmin[j])
	    meshmin[j] = slab;

	  if(slab > meshmax[j])
	    meshmax[j] = slab;
	}
    }

  MPI_Allgather(meshmin, 3, MPI_INT, meshmin_list, 3, MPI_INT, MPI_COMM_WORLD);
  MPI_Allgather(meshmax, 3, MPI_INT, meshmax_list, 3, MPI_INT, MPI_COMM_WORLD);

  dimx = meshmax[0] - meshmin[0] + 2;
  dimy = meshmax[1] - meshmin[1] + 2;
  dimz = meshmax[2] - meshmin[2] + 2;

  pm_init_periodic_allocate((dimx + 4) * (dimy + 4) * (dimz + 4));

  #ifdef FFTW3
  if(!fftw_plan_exists)
  {
  	/* Create plan for in-place r2c DFT */
  	fft_forward_plan = fftw_mpi_plan_dft_r2c_3d(PMGRID, PMGRID, PMGRID, rhogrid, fft_of_rhogrid,
  												MPI_COMM_WORLD, FFTW_PATIENT | FFTW_MPI_TRANSPOSED_OUT);
  	fft_inverse_plan = fftw_mpi_plan_dft_c2r_3d(PMGRID, PMGRID, PMGRID, fft_of_rhogrid, rhogrid,
  												MPI_COMM_WORLD, FFTW_PATIENT | FFTW_MPI_TRANSPOSED_IN);
  	fftw_plan_exists = true;	// use C99 bool type
  	
  	if(ThisTask == 0)
  		printf("Created new FFTW3 plan.\n");
  } else {
  	/* do nothing, the plan has already been created by previous call to this function */
  }
  #endif

	/* For FFTW3, there is a different convention for fftsize for real-to-complex transforms, i.e.
		fftsize is the size of the complex data (number of complex values), NOT the size of the real data!
		We attempt to take care of this by defining fftsize to be fftsize_real when using FFTW3.  */

  for(i = 0; i < dimx * dimy * dimz; i++)
    workspace[i] = 0;

  for(i = 0; i < NumPart; i++)
    {
      slab_x = to_slab_fac * P[i].Pos[0];
      if(slab_x >= PMGRID)
	slab_x = PMGRID - 1;
      dx = to_slab_fac * P[i].Pos[0] - slab_x;
      slab_x -= meshmin[0];
      slab_xx = slab_x + 1;

      slab_y = to_slab_fac * P[i].Pos[1];
      if(slab_y >= PMGRID)
	slab_y = PMGRID - 1;
      dy = to_slab_fac * P[i].Pos[1] - slab_y;
      slab_y -= meshmin[1];
      slab_yy = slab_y + 1;

      slab_z = to_slab_fac * P[i].Pos[2];
      if(slab_z >= PMGRID)
	slab_z = PMGRID - 1;
      dz = to_slab_fac * P[i].Pos[2] - slab_z;
      slab_z -= meshmin[2];
      slab_zz = slab_z + 1;

      workspace[(slab_x * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
      workspace[(slab_x * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * dy * (1.0 - dz);
      workspace[(slab_x * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * dz;
      workspace[(slab_x * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * dy * dz;

      workspace[(slab_xx * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (dx) * (1.0 - dy) * (1.0 - dz);
      workspace[(slab_xx * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (dx) * dy * (1.0 - dz);
      workspace[(slab_xx * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (dx) * (1.0 - dy) * dz;
      workspace[(slab_xx * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (dx) * dy * dz;
    }


  for(i = 0; i < fftsize; i++)	/* clear local density field */
    rhogrid[i] = 0;

  for(level = 0; level < (1 << PTask); level++)	/* note: for level=0, target is the same task */
    {
      sendTask = ThisTask;
      recvTask = ThisTask ^ level;
      if(recvTask < NTask)
	{
	  /* check how much we have to send */
	  sendmin = 2 * PMGRID;
	  sendmax = -1;
	  for(slab_x = meshmin[0]; slab_x < meshmax[0] + 2; slab_x++)
	    if(slab_to_task[slab_x % PMGRID] == recvTask)
	      {
		if(slab_x < sendmin)
		  sendmin = slab_x;
		if(slab_x > sendmax)
		  sendmax = slab_x;
	      }
	  if(sendmax == -1)
	    sendmin = 0;

	  /* check how much we have to receive */
	  recvmin = 2 * PMGRID;
	  recvmax = -1;
	  for(slab_x = meshmin_list[3 * recvTask]; slab_x < meshmax_list[3 * recvTask] + 2; slab_x++)
	    if(slab_to_task[slab_x % PMGRID] == sendTask)
	      {
		if(slab_x < recvmin)
		  recvmin = slab_x;
		if(slab_x > recvmax)
		  recvmax = slab_x;
	      }
	  if(recvmax == -1)
	    recvmin = 0;


	  if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0)	/* ok, we have a contribution to the slab */
	    {
	      recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 2;
	      recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 2;
	      recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 2;

	      if(level > 0)
		{
		  MPI_Sendrecv(workspace + (sendmin - meshmin[0]) * dimy * dimz,
			       (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE, recvTask,
			       TAG_PERIODIC_A, forcegrid,
			       (recvmax - recvmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real), MPI_BYTE,
			       recvTask, TAG_PERIODIC_A, MPI_COMM_WORLD, &status);
		}
	      else
		{
		  memcpy(forcegrid, workspace + (sendmin - meshmin[0]) * dimy * dimz,
			 (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real));
		}

	      for(slab_x = recvmin; slab_x <= recvmax; slab_x++)
		{
		  slab_xx = (slab_x % PMGRID) - first_slab_of_task[ThisTask];

		  if(slab_xx >= 0 && slab_xx < slabs_per_task[ThisTask])
		    {
		      for(slab_y = meshmin_list[3 * recvTask + 1];
			  slab_y <= meshmax_list[3 * recvTask + 1] + 1; slab_y++)
			{
			  slab_yy = slab_y;
			  if(slab_yy >= PMGRID)
			    slab_yy -= PMGRID;

			  for(slab_z = meshmin_list[3 * recvTask + 2];
			      slab_z <= meshmax_list[3 * recvTask + 2] + 1; slab_z++)
			    {
			      slab_zz = slab_z;
			      if(slab_zz >= PMGRID)
				slab_zz -= PMGRID;

			      rhogrid[PMGRID * PMGRID2 * slab_xx + PMGRID2 * slab_yy + slab_zz] +=
				forcegrid[((slab_x - recvmin) * recv_dimy +
					   (slab_y - meshmin_list[3 * recvTask + 1])) * recv_dimz +
					  (slab_z - meshmin_list[3 * recvTask + 2])];
			    }
			}
		    }
		}
	    }
	}
    }

  #ifdef DEBUG_FFT
  
  double norm_density = 0.;
  for(i = 0; i < fftsize; i++)
  {
  	norm_density += rhogrid[i]*rhogrid[i];
  }
  
  /* Write out rhogrid to a 'fft-snapshot' file */
  if (ThisTask == 0) {
 	 FILE *fp;
 	 
 	 /* Print the norm of the fft */
 	 printf("L2-norm of density: %f\n", norm_density);
 	 printf("First five values of density: %f, %f, %f, %f, %f\n", rhogrid[0],rhogrid[1],rhogrid[2],rhogrid[3], rhogrid[4]);
 	 
  }
  #endif

  /* Do the FFT of the density field */
  #ifdef FFTW3
  fftw_execute_dft_r2c(fft_forward_plan, rhogrid, fft_of_rhogrid);
  #else
  rfftwnd_mpi(fft_forward_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER);
  #endif

  #ifdef DEBUG_FFT
  
  double norm_complex = 0.;
  for(i = 0; i < fftsize; i++)
  {
  	norm_complex += rhogrid[i]*rhogrid[i];
  }
  
  /* Write out rhogrid to a 'fft-snapshot' file */
  if (ThisTask == 0) {
 	 FILE *fp;
 	 
 	 /* Print the norm of the fft */
 	 printf("L2-norm of complex rhogrid: %f\n", norm_complex);
 	 printf("First two values of complex fft: %f + i*%f, %f + i*%f\n", rhogrid[0],rhogrid[1],rhogrid[2],rhogrid[3]);
 	 
  }
  #endif

  /* multiply with Green's function for the potential */

  for(y = slabstart_y; y < slabstart_y + nslab_y; y++)
    for(x = 0; x < PMGRID; x++)
      for(z = 0; z < PMGRID / 2 + 1; z++)
	{
	  if(x > PMGRID / 2)
	    kx = x - PMGRID;
	  else
	    kx = x;
	  if(y > PMGRID / 2)
	    ky = y - PMGRID;
	  else
	    ky = y;
	  if(z > PMGRID / 2)
	    kz = z - PMGRID;
	  else
	    kz = z;

	  k2 = kx * kx + ky * ky + kz * kz;

	  if(k2 > 0)
	    {
	      smth = -exp(-k2 * asmth2) / k2;

	      /* do deconvolution */

	      fx = fy = fz = 1;
	      if(kx != 0)
		{
		  fx = (M_PI * kx) / PMGRID;
		  fx = sin(fx) / fx;
		}
	      if(ky != 0)
		{
		  fy = (M_PI * ky) / PMGRID;
		  fy = sin(fy) / fy;
		}
	      if(kz != 0)
		{
		  fz = (M_PI * kz) / PMGRID;
		  fz = sin(fz) / fz;
		}
	      ff = 1 / (fx * fy * fz);
	      smth *= ff * ff * ff * ff;

	      /* end deconvolution */

	      ip = PMGRID * (PMGRID / 2 + 1) * (y - slabstart_y) + (PMGRID / 2 + 1) * x + z;
	      c_re(fft_of_rhogrid[ip]) *= smth;
	      c_im(fft_of_rhogrid[ip]) *= smth;
	    }
	}

  if(slabstart_y == 0)
    c_re(fft_of_rhogrid[0]) = c_im(fft_of_rhogrid[0]) = 0.0;

  /* Do the FFT to get the potential */
  #ifdef FFTW3
  fftw_execute_dft_c2r(fft_inverse_plan, fft_of_rhogrid, rhogrid);
  
  /* Now normalize the output 
  for(i = 0; i < fftsize; i++)
	rhogrid[i] = rhogrid[i] / (PMGRID*PMGRID*PMGRID); */
  #else
  rfftwnd_mpi(fft_inverse_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER);
  #endif
  
  #ifdef DEBUG_FFT
  
  double norm = 0.;
  for(i = 0; i < fftsize; i++)
  {
  	norm += rhogrid[i]*rhogrid[i];
  }
  
  /* Write out rhogrid to a 'fft-snapshot' file */
  if (ThisTask == 0) {
 	 FILE *fp;
 	 
 	 /* Print the norm of the fft */
 	 printf("L2-norm of rhogrid: %f\n", norm);
 	 printf("First five values of fft: %f, %f, %f, %f, %f\n\n", rhogrid[0],rhogrid[1],rhogrid[2],rhogrid[3],rhogrid[4]);
 	 
 	 /* fp = fopen("rhogrid.0", "wb"); /* add suffix to indicate which node this is */
 	 /* fwrite(rhogrid, sizeof(rhogrid[0]), fftsize, fp);*/
 	 /* fclose(fp); */
  }
  #endif

  /* Now rhogrid holds the potential */
  /* construct the potential for the local patch */


  dimx = meshmax[0] - meshmin[0] + 6;
  dimy = meshmax[1] - meshmin[1] + 6;
  dimz = meshmax[2] - meshmin[2] + 6;

  for(level = 0; level < (1 << PTask); level++)	/* note: for level=0, target is the same task */
    {
      sendTask = ThisTask;
      recvTask = ThisTask ^ level;

      if(recvTask < NTask)
	{

	  /* check how much we have to send */
	  sendmin = 2 * PMGRID;
	  sendmax = -PMGRID;
	  for(slab_x = meshmin_list[3 * recvTask] - 2; slab_x < meshmax_list[3 * recvTask] + 4; slab_x++)
	    if(slab_to_task[(slab_x + PMGRID) % PMGRID] == sendTask)
	      {
		if(slab_x < sendmin)
		  sendmin = slab_x;
		if(slab_x > sendmax)
		  sendmax = slab_x;
	      }
	  if(sendmax == -PMGRID)
	    sendmin = sendmax + 1;


	  /* check how much we have to receive */
	  recvmin = 2 * PMGRID;
	  recvmax = -PMGRID;
	  for(slab_x = meshmin[0] - 2; slab_x < meshmax[0] + 4; slab_x++)
	    if(slab_to_task[(slab_x + PMGRID) % PMGRID] == recvTask)
	      {
		if(slab_x < recvmin)
		  recvmin = slab_x;
		if(slab_x > recvmax)
		  recvmax = slab_x;
	      }
	  if(recvmax == -PMGRID)
	    recvmin = recvmax + 1;

	  if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0)	/* ok, we have a contribution to the slab */
	    {
	      recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 6;
	      recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 6;
	      recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 6;

	      ncont = 1;
	      cont_sendmin[0] = sendmin;
	      cont_sendmax[0] = sendmax;
	      cont_sendmin[1] = sendmax + 1;
	      cont_sendmax[1] = sendmax;

	      cont_recvmin[0] = recvmin;
	      cont_recvmax[0] = recvmax;
	      cont_recvmin[1] = recvmax + 1;
	      cont_recvmax[1] = recvmax;

	      for(slab_x = sendmin; slab_x <= sendmax; slab_x++)
		{
		  if(slab_to_task[(slab_x + PMGRID) % PMGRID] != ThisTask)
		    {
		      /* non-contiguous */
		      cont_sendmax[0] = slab_x - 1;
		      while(slab_to_task[(slab_x + PMGRID) % PMGRID] != ThisTask)
			slab_x++;
		      cont_sendmin[1] = slab_x;
		      ncont++;
		    }
		}

	      for(slab_x = recvmin; slab_x <= recvmax; slab_x++)
		{
		  if(slab_to_task[(slab_x + PMGRID) % PMGRID] != recvTask)
		    {
		      /* non-contiguous */
		      cont_recvmax[0] = slab_x - 1;
		      while(slab_to_task[(slab_x + PMGRID) % PMGRID] != recvTask)
			slab_x++;
		      cont_recvmin[1] = slab_x;
		      if(ncont == 1)
			ncont++;
		    }
		}


	      for(rep = 0; rep < ncont; rep++)
		{
		  sendmin = cont_sendmin[rep];
		  sendmax = cont_sendmax[rep];
		  recvmin = cont_recvmin[rep];
		  recvmax = cont_recvmax[rep];

		  /* prepare what we want to send */
		  if(sendmax - sendmin >= 0)
		    {
		      for(slab_x = sendmin; slab_x <= sendmax; slab_x++)
			{
			  slab_xx = ((slab_x + PMGRID) % PMGRID) - first_slab_of_task[ThisTask];

			  for(slab_y = meshmin_list[3 * recvTask + 1] - 2;
			      slab_y < meshmax_list[3 * recvTask + 1] + 4; slab_y++)
			    {
			      slab_yy = (slab_y + PMGRID) % PMGRID;

			      for(slab_z = meshmin_list[3 * recvTask + 2] - 2;
				  slab_z < meshmax_list[3 * recvTask + 2] + 4; slab_z++)
				{
				  slab_zz = (slab_z + PMGRID) % PMGRID;

				  forcegrid[((slab_x - sendmin) * recv_dimy +
					     (slab_y - (meshmin_list[3 * recvTask + 1] - 2))) * recv_dimz +
					    slab_z - (meshmin_list[3 * recvTask + 2] - 2)] =
				    rhogrid[PMGRID * PMGRID2 * slab_xx + PMGRID2 * slab_yy + slab_zz];
				}
			    }
			}
		    }

		  if(level > 0)
		    {
		      MPI_Sendrecv(forcegrid,
				   (sendmax - sendmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real),
				   MPI_BYTE, recvTask, TAG_PERIODIC_B,
				   workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz,
				   (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE,
				   recvTask, TAG_PERIODIC_B, MPI_COMM_WORLD, &status);
		    }
		  else
		    {
		      memcpy(workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz,
			     forcegrid, (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real));
		    }
		}
	    }
	}
    }


  dimx = meshmax[0] - meshmin[0] + 2;
  dimy = meshmax[1] - meshmin[1] + 2;
  dimz = meshmax[2] - meshmin[2] + 2;

  recv_dimx = meshmax[0] - meshmin[0] + 6;
  recv_dimy = meshmax[1] - meshmin[1] + 6;
  recv_dimz = meshmax[2] - meshmin[2] + 6;


  for(dim = 0; dim < 3; dim++)	/* Calculate each component of the force. */
    {
      /* get the force component by finite differencing the potential */
      /* note: "workspace" now contains the potential for the local patch, plus a suffiently large buffer region */

      for(x = 0; x < meshmax[0] - meshmin[0] + 2; x++)
	for(y = 0; y < meshmax[1] - meshmin[1] + 2; y++)
	  for(z = 0; z < meshmax[2] - meshmin[2] + 2; z++)
	    {
	      xrr = xll = xr = xl = x;
	      yrr = yll = yr = yl = y;
	      zrr = zll = zr = zl = z;

	      switch (dim)
		{
		case 0:
		  xr = x + 1;
		  xrr = x + 2;
		  xl = x - 1;
		  xll = x - 2;
		  break;
		case 1:
		  yr = y + 1;
		  yl = y - 1;
		  yrr = y + 2;
		  yll = y - 2;
		  break;
		case 2:
		  zr = z + 1;
		  zl = z - 1;
		  zrr = z + 2;
		  zll = z - 2;
		  break;
		}

	      forcegrid[(x * dimy + y) * dimz + z]
		=
		fac * ((4.0 / 3) *
		       (workspace[((xl + 2) * recv_dimy + (yl + 2)) * recv_dimz + (zl + 2)]
			- workspace[((xr + 2) * recv_dimy + (yr + 2)) * recv_dimz + (zr + 2)]) -
		       (1.0 / 6) *
		       (workspace[((xll + 2) * recv_dimy + (yll + 2)) * recv_dimz + (zll + 2)] -
			workspace[((xrr + 2) * recv_dimy + (yrr + 2)) * recv_dimz + (zrr + 2)]));
	    }

      /* read out the forces */

      for(i = 0; i < NumPart; i++)
	{
	  slab_x = to_slab_fac * P[i].Pos[0];
	  if(slab_x >= PMGRID)
	    slab_x = PMGRID - 1;
	  dx = to_slab_fac * P[i].Pos[0] - slab_x;
	  slab_x -= meshmin[0];
	  slab_xx = slab_x + 1;

	  slab_y = to_slab_fac * P[i].Pos[1];
	  if(slab_y >= PMGRID)
	    slab_y = PMGRID - 1;
	  dy = to_slab_fac * P[i].Pos[1] - slab_y;
	  slab_y -= meshmin[1];
	  slab_yy = slab_y + 1;

	  slab_z = to_slab_fac * P[i].Pos[2];
	  if(slab_z >= PMGRID)
	    slab_z = PMGRID - 1;
	  dz = to_slab_fac * P[i].Pos[2] - slab_z;
	  slab_z -= meshmin[2];
	  slab_zz = slab_z + 1;

	  acc_dim =
	    forcegrid[(slab_x * dimy + slab_y) * dimz + slab_z] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
	  acc_dim += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_z] * (1.0 - dx) * dy * (1.0 - dz);
	  acc_dim += forcegrid[(slab_x * dimy + slab_y) * dimz + slab_zz] * (1.0 - dx) * (1.0 - dy) * dz;
	  acc_dim += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_zz] * (1.0 - dx) * dy * dz;

	  acc_dim += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_z] * (dx) * (1.0 - dy) * (1.0 - dz);
	  acc_dim += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_z] * (dx) * dy * (1.0 - dz);
	  acc_dim += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_zz] * (dx) * (1.0 - dy) * dz;
	  acc_dim += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_zz] * (dx) * dy * dz;

	  P[i].GravPM[dim] = acc_dim;
	}
    }

  pm_init_periodic_free();
  force_treeallocate(All.TreeAllocFactor * All.MaxPart, All.MaxPart);

  All.NumForcesSinceLastDomainDecomp = 1 + All.TotNumPart * All.TreeDomainUpdateFrequency;

  if(ThisTask == 0)
    {
      printf("done PM.\n");
      fflush(stdout);
    }
  
  #ifdef FFTW3
  if(fftw_plan_exists)
  {
	  /* macro defined in callgrind.h */
	  // CALLGRIND_STOP_INSTRUMENTATION;
  }
  #else
  // CALLGRIND_STOP_INSTRUMENTATION;
  #endif
}
コード例 #20
0
ファイル: FFTHandler.cpp プロジェクト: KlugerLab/Ritornello
void FFTHandler::reverse(double* inout){
	fftw_execute_dft_c2r(fftReversePlan,(fftw_complex*)inout, inout);
	//normalize the reverse transform
	for(long ii =0; ii < n; ++ii) inout[ii]/=n;
}