Exemplo n.º 1
0
void FFTConvolver::ConvolveSameSize(double* image, const double* kernel, size_t imgWidth, size_t imgHeight)
{
	const size_t imgSize = imgWidth * imgHeight;
	const size_t complexSize = (imgWidth/2+1) * imgHeight;
	double* tempData = reinterpret_cast<double*>(fftw_malloc(imgSize * sizeof(double)));
	fftw_complex* fftImageData = reinterpret_cast<fftw_complex*>(fftw_malloc(complexSize * sizeof(fftw_complex)));
	fftw_complex* fftKernelData = reinterpret_cast<fftw_complex*>(fftw_malloc(complexSize * sizeof(fftw_complex)));
	
	boost::mutex::scoped_lock lock(_mutex);
	fftw_plan inToFPlan = fftw_plan_dft_r2c_2d(imgHeight, imgWidth, tempData, fftImageData, FFTW_ESTIMATE);
	fftw_plan fToOutPlan = fftw_plan_dft_c2r_2d(imgHeight, imgWidth, fftImageData, tempData, FFTW_ESTIMATE);
	lock.unlock();
	
	memcpy(tempData, image, imgSize * sizeof(double));
	fftw_execute_dft_r2c(inToFPlan, tempData, fftImageData);
	
	memcpy(tempData, kernel, imgSize * sizeof(double));
	fftw_execute_dft_r2c(inToFPlan, tempData, fftKernelData);
	
	double fact = 1.0/imgSize;
	for(size_t i=0; i!=complexSize; ++i)
		reinterpret_cast<std::complex<double>*>(fftImageData)[i] *= fact * reinterpret_cast<std::complex<double>*>(fftKernelData)[i];
		
	fftw_execute_dft_c2r(fToOutPlan, reinterpret_cast<fftw_complex*>(fftImageData), tempData);
	memcpy(image, tempData, imgSize * sizeof(double));
		
	fftw_free(fftImageData);
	fftw_free(fftKernelData);
	fftw_free(tempData);
	
	lock.lock();
	fftw_destroy_plan(inToFPlan);
	fftw_destroy_plan(fToOutPlan);
	lock.unlock();
}
Exemplo n.º 2
0
void remap_output(	double wri[], 
					const double t) {
					
	int i,j,k;
	double tvelocity;
	double tremap;
	complex double wexp;
	complex double phase;
	double complex		*w2d;
	
	DEBUG_START_FUNC;
	
	w2d = (double complex *) fftw_malloc( sizeof(double complex) * (NY/2+1) * NZ );
	if (w2d == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w2d allocation");
	
#ifdef TIME_DEPENDANT_SHEAR
	tremap = time_shift(t);
	tvelocity = 0.0;
#else	
	tremap = time_shift(t);
	tvelocity = fmod(t, 2.0 * param.ly / (param.shear * param.lx));
#endif	
	
	for( i = 0 ; i < NX/NPROC ; i++) {
#ifdef WITH_2D
		fftw_execute_dft_r2c(fft_1d_forward, wri + i*(NY+2), w2d);
#else
		fftw_execute_dft_r2c(fft_1d_forward, wri + i*(NZ+2)*NY, w2d);
#endif
		for( j = 0 ; j < NY/2+1 ; j++) {
			phase = (double complex) ((2.0 * M_PI) / param.ly *  ((double) j )  * 
									( ((double) (i + rank * (NX/NPROC)) / (double) NX ) * tremap - tvelocity / 2.0 ) * param.lx * param.shear);
			
			//printf("phase=%g + I %g\n",creal(phase), cimag(phase));
			
			wexp = cexp( I * phase)/NY;
			
			//printf("wexp=%g + I %g\n",creal(wexp), cimag(wexp));

			for( k = 0 ; k < NZ; k++) {
				w2d[ k + j * NZ ] = wexp * w2d[ k + j * NZ ];
			}
		}
#ifdef WITH_2D
		fftw_execute_dft_c2r(fft_1d_backward, w2d, wri + i*(NY+2));
#else
		fftw_execute_dft_c2r(fft_1d_backward, w2d, wri + i*(NZ+2)*NY);
#endif
	}
	
	fftw_free(w2d);
	
	DEBUG_END_FUNC;
	
	return;
}
Exemplo n.º 3
0
fftw_complex* gen_sec (const int nx, const int ny,
                       fft_plans_t* f, const double a, const double wd,
                       const double* g1, const fftw_complex* g2) {
    int i, j, x, y;
    double total = 0;
    double* out;

    double* sec_d = fftw_malloc (sizeof(double)*ny*2*(nx/2+1));
    if (NULL == sec_d) {
      return NULL;
    }
    fftw_complex* sec = (fftw_complex*)sec_d;
    memset(sec_d, 0, sizeof(double)*ny*2*(nx/2+1));
    for (j = 0; j < ny; j++) {
        for (i = 0; i < nx; i++) {
            register double x_val = -nx/2.0 + (i) * (double)nx/(double)(nx-1);
            register double y_val =  ny/2.0 - (j) * (double)ny/(double)(ny-1);

            sec_d[i+j*2*(nx/2+1)] = (x_val*cos(a-wd+M_PI/2.0) + y_val*sin(a-wd+M_PI/2.0) >  0 ? 1 : 0) *
                                    (x_val*cos(a+wd+M_PI/2.0) + y_val*sin(a+wd+M_PI/2.0) <= 0 ? 1 : 0);
        }
    }
    fftw_execute_dft_r2c (f->forward, sec_d, sec);

    for (i = 0; i < ny*(nx/2+1); i++) {
        sec[i] *= g2[i];
    }

    fftw_execute_dft_c2r (f->backward, sec, sec_d);

    if (NULL == (out = fftw_malloc (sizeof(double)*ny*2*(nx/2+1)))) {
      return NULL;
    }
    x = nx/2;
    y = ny/2;
    for (j = 0; j < ny; j++) {
        int k = (j - y) % ny;
        k = k < 0 ? ny + k : k;
        for (i = 0; i < nx; i++) {
            int h = (i - x) % nx;
            h = h < 0 ? nx + h : h;
            out[i+j*2*(nx/2+1)] = sec_d[h+k*2*(nx/2+1)] / (nx*ny) * g1[i+j*nx];
            total += out[i+j*2*(nx/2+1)];
        }
    }

    for (i = 0; i < ny*2*(nx/2+1); i++) {
        out[i] /= total;
    }

    fftw_execute_dft_r2c (f->forward, out, (fftw_complex*)out);

    fftw_free (sec);
    return (fftw_complex*)out;
}
Exemplo n.º 4
0
static R_INLINE void hankelize_fft(double *F,
                                   const double *U, const double *V,
                                   const hankel_matrix *h) {
  R_len_t N = h->length, L = h->window;
  R_len_t K = N - L + 1;
  R_len_t i;
  double *iU, *iV;
  fftw_complex *cU, *cV;

  /* Allocate needed memory */
  iU = (double*) fftw_malloc(N * sizeof(double));
  iV = (double*) fftw_malloc(N * sizeof(double));
  cU = (fftw_complex*) fftw_malloc((N/2 + 1) * sizeof(fftw_complex));
  cV = (fftw_complex*) fftw_malloc((N/2 + 1) * sizeof(fftw_complex));

  /* Fill in buffers */
  memcpy(iU, U, L*sizeof(double));
  memset(iU+L, 0, (K - 1)*sizeof(double));

  memcpy(iV, V, K*sizeof(double));
  memset(iV+K, 0, (L - 1)*sizeof(double));

  /* Compute the FFTs */
  fftw_execute_dft_r2c(h->r2c_plan, iU, cU);
  fftw_execute_dft_r2c(h->r2c_plan, iV, cV);

  /* Dot-multiply */
  for (i = 0; i < N/2 + 1; ++i)
    cU[i] = cU[i] * cV[i];

  /* Compute the inverse FFT */
  fftw_execute_dft_c2r(h->c2r_plan, cU, iU);

  /* Form the result */
  for (i = 0; i < N; ++i) {
    R_len_t leftu, rightu, l;

    if (i < L) leftu = i; else leftu = L - 1;
    if (i < K) rightu = 0; else  rightu = i - K + 1;

    l = (leftu - rightu + 1);

    F[i] = iU[i] / l / N;
  }

  fftw_free(iU);
  fftw_free(iV);
  fftw_free(cU);
  fftw_free(cV);
}
Exemplo n.º 5
0
static void hankel_matmul(double* out,
                          const double* v,
                          const void* matrix) {
  const hankel_matrix *h = matrix;
  R_len_t N = h->length, L = h->window;
  R_len_t K = N - L + 1, i;
  double *circ;
  fftw_complex *ocirc;

  /* Allocate needed memory */
  circ = (double*) fftw_malloc(N * sizeof(double));
  ocirc = (fftw_complex*) fftw_malloc((N/2 + 1) * sizeof(fftw_complex));

  /* Fill the arrays */
  for (i = 0; i < K; ++i)
    circ[i] = v[K - i - 1];
  memset(circ + K, 0, (L - 1)*sizeof(double));

  /* Compute the FFT of the reversed vector v */
  fftw_execute_dft_r2c(h->r2c_plan, circ, ocirc);

  /* Dot-multiply with pre-computed FFT of toeplitz circulant */
  for (i = 0; i < (N/2 + 1); ++i)
    ocirc[i] = ocirc[i] * h->circ_freq[i];

  /* Compute the reverse transform to obtain result */
  fftw_execute_dft_c2r(h->c2r_plan, ocirc, circ);

  /* Cleanup and return */
  for (i = 0; i < L; ++i)
    out[i] = circ[i] / N;

  fftw_free(circ);
  fftw_free(ocirc);
}
Exemplo n.º 6
0
void gfft_r2c(double *wrin) {
	double complex *win = (double complex *) wrin;
	fft_timer = fft_timer - get_c_time();
	fftw_execute_dft_r2c(r2cfft, wrin, win);
	fft_timer = fft_timer + get_c_time();
	return;
}
Exemplo n.º 7
0
// This is an inplace real 2 complex transform
// Assumes wrin has the logical dimensions [NY/PROC, NX, NZ] of real positions
// physical dimensions [NY/NPROC, NX, NZ+2];
void gfft_r2c_t(double *wrin) {
	int i;
	double complex *win = (double complex *) wrin;
	
	fft_timer = fft_timer - get_c_time();
	
	//start transforming in 2D wrin
	fftw_execute_dft_r2c(r2c_2d, wrin, win);
	
	// The logical dimensions of win are [NY_COMPLEX/NPROC, NX_COMPLEX, NZ_COMPLEX]
	// transpose it
	transpose_complex_YX(win, win);
	
	// We now have an array with logical dimensions[NX_COMPLEX/NPROC, NY_COMPLEX, NZ_COMPLEX]
#ifdef _OPENMP
	#pragma omp parallel for private(i) schedule(static)	
#endif
	for(i=0 ; i < NX_COMPLEX/NPROC ; i++) 
		fftw_execute_dft(r2c_1d, &win[i*NY_COMPLEX*NZ_COMPLEX],&win[i*NY_COMPLEX*NZ_COMPLEX]);

	fft_timer = fft_timer + get_c_time();
	
	// done...
	return;
}
Exemplo n.º 8
0
Arquivo: hbhankel.c Projeto: asl/rssa
static void convolveNd_half(const fftw_complex *ox,
                            double *y,
                            R_len_t rank,
                            const R_len_t *N,
                            int conjugate,
                            fftw_plan r2c_plan,
                            fftw_plan c2r_plan) {
  R_len_t i;
  fftw_complex *oy;
  R_len_t pN = prod(rank, N), phN = hprod(rank, N);

  /* Allocate needed memory */
  oy = (fftw_complex*) fftw_malloc(phN * sizeof(fftw_complex));

  /* Compute the Nd-FFT of the matrix y */
  fftw_execute_dft_r2c(r2c_plan, y, oy);

  /* Compute conjugation if needed */
  if (conjugate)
    for (i = 0; i < phN; ++i)
      oy[i] = conj(oy[i]);

  /* Dot-multiply ox and oy, and divide by Nx*...*Nz*/
  for (i = 0; i < phN; ++i)
    oy[i] *= ox[i] / pN;

  /* Compute the reverse transform to obtain result */
  fftw_execute_dft_c2r(c2r_plan, oy, y);

  /* Cleanup */
  fftw_free(oy);
}
void FFT::processToken(double* inPtr, const int N, double* out, const int outSize)
{
#ifdef WITH_FFTW3
    double* inFFT = (double*) fftw_malloc(m_nfft*sizeof(double));
    complex<double>* outFFT = (complex<double>*) fftw_malloc((m_nfft/2+1)*sizeof(complex<double>));
    Map<VectorXd> infft(inFFT,m_nfft);
#else
	VectorXd infft(m_nfft);
#endif
	Map<VectorXd> inData(inPtr,N);
	if (m_window.size()>0)
		infft.segment(0,N) = m_window.array() * inData.array();
	else
		infft.segment(0,N) = inData;
	if (N<m_nfft)
		infft.segment(N,m_nfft-N).setZero();
#ifdef WITH_FFTW3
	fftw_execute_dft_r2c(m_plan,inFFT,(fftw_complex*)outFFT);
	memcpy(out,outFFT,outSize*sizeof(double));
	fftw_free(inFFT);
	fftw_free(outFFT);
#else
	m_plan.fwd((std::complex<double>*) out,infft.data(),m_nfft);
#endif
}
Exemplo n.º 10
0
complex * FFT::execute(double * indata, complex * outdata) {
    memcpy(m_indata, indata, sizeof(double) * m_size * m_count);

	for(int i = 0; i < m_size; i++) {
		for(int j = 0; j < m_count; j++) {
			m_indata[m_count * i + j] *= m_window[i];
		}
	}

	fftw_execute_dft_r2c(m_plan, m_indata, outdata);
	
	return outdata;
}
Exemplo n.º 11
0
static void hbhankel_tmatmul(double* out,
                             const double* v,
                             const void* matrix) {
  const hbhankel_matrix *h = matrix;
  R_len_t Nx = h->length.x, Ny = h->length.y;
  R_len_t Lx = h->window.x, Ly = h->window.y;
  R_len_t Kx = Nx - Lx + 1, Ky = Ny - Ly + 1, i, j;
  double *circ;
  fftw_complex *ocirc;

  /* Allocate needed memory */
  circ = (double*) fftw_malloc(Nx * Ny * sizeof(double));
  ocirc = (fftw_complex*) fftw_malloc(Ny*(Nx / 2 + 1) * sizeof(fftw_complex));

  /*
  revv <- matrix(c(rep(0, C$Lx*(C$Ky-1)), rev(v)), C$Lx, ncol(C$Cblock));
  revv <- rbind(matrix(0, (C$Kx-1), ncol(revv)), revv);

  mult <- fft(C$Cblock * fft(revv), inverse = TRUE);

  Re((mult/(prod(dim(C$Cblock))))[C$Lx:(C$Lx+C$Kx-1),C$Ly:(C$Ly+C$Ky-1)]); */

  /* Fill the arrays */
  memset(circ, 0, Nx * Ny * sizeof(double));
  for (j = 0; j < Ly; ++j)
    for (i = 0; i < Lx; ++i)
      circ[(i + Kx - 1) + (j + Ky - 1)*Nx] = v[Lx*Ly - i - j*Lx - 1];

  /* Compute the FFT of the reversed vector v */
  fftw_execute_dft_r2c(h->r2c_plan, circ, ocirc);

  /* Dot-multiply with pre-computed FFT of toeplitz circulant */
  for (i = 0; i < Ny * (Nx/2 + 1); ++i)
    ocirc[i] = ocirc[i] * h->circ_freq[i];

  /* Compute the reverse transform to obtain result */
  fftw_execute_dft_c2r(h->c2r_plan, ocirc, circ);

  /* Cleanup and return */
  for (j = 0; j < Ky; ++j)
    for (i = 0; i < Kx; ++i)
      out[i + j*Kx] = circ[(i + Lx - 1) + (j + Ly - 1)*Nx] / (Nx*Ny);

  fftw_free(circ);
  fftw_free(ocirc);
}
Exemplo n.º 12
0
Arquivo: hbhankel.c Projeto: asl/rssa
static void convolveNd(double *x,
                       double *y,
                       R_len_t rank,
                       const R_len_t *N,
                       int conjugate,
                       fftw_plan r2c_plan,
                       fftw_plan c2r_plan) {
  fftw_complex *ox;
  R_len_t phN = hprod(rank, N);

  /* Allocate needed memory */
  ox = (fftw_complex*) fftw_malloc(phN * sizeof(fftw_complex));

  /* Compute the NdFFT of the arrays x and y */
  fftw_execute_dft_r2c(r2c_plan, x, ox);

  convolveNd_half(ox, y, rank, N, conjugate, r2c_plan, c2r_plan);

  /* Cleanup */
  fftw_free(ox);
}
Exemplo n.º 13
0
void
s_ncc_fft_run (FATM_Options* fopt)
{
    S_Ncc_Fft_Data* udp = (S_Ncc_Fft_Data*) fopt->alg_data;
    int fft_nx = fopt->sig_rect_scan.dims[1];    /* In fftw3, nx is rows */
    int fft_ny = fopt->sig_rect_scan.dims[0];    /* In fftw3, ny is cols */
    int fftw_size = fft_nx * (fft_ny/2+1);
    int i;

    /* Make integral images, etc. */
    s_ncc_fft_scorewin_initialize (fopt);

    /* Take fft of signal */
    fftw_execute_dft_r2c (udp->sig_fftw3_plan, 
	(double*) fopt->sig.data, udp->sig_fft);

    /* Debugging info */
    dump_fft (udp->sig_fft, fft_nx, fft_ny, "sig_fft.txt");

    /* Multiply fft of signal by fft of pattern */
    for (i = 0; i < fftw_size; i++) {
	double re = udp->sig_fft[i][0] * udp->pat_fft[i][0] 
		    - udp->sig_fft[i][1] * udp->pat_fft[i][1];
	double im = udp->sig_fft[i][0] * udp->pat_fft[i][1] 
		    + udp->sig_fft[i][1] * udp->pat_fft[i][0];
	udp->sig_fft[i][0] = re;
	udp->sig_fft[i][1] = im;
    }

    /* Debugging info */
    dump_fft (udp->sig_fft, fft_nx, fft_ny, "sco_fft.txt");

    /* Take ifft of signal */
    fftw_execute_dft_c2r (udp->sco_fftw3_plan, 
	udp->sig_fft, udp->padded_score);

    dump_txt (udp->padded_score, fft_nx, fft_ny, "sco_ifftd.txt");

}
Exemplo n.º 14
0
void kemo_fftw_execute_dft_r2c(fftw_plan *plan, double *dble_in, fftw_complex *cplx_out){
	fftw_execute_dft_r2c(*plan, dble_in, cplx_out);
	return;
}
Exemplo n.º 15
0
/* Real to complex forward transform.
 */
static int 
rfwfft1( IMAGE *dummy, IMAGE *in, IMAGE *out )
{
	const int size = in->Xsize * in->Ysize;
	const int half_width = in->Xsize / 2 + 1;

	/* Pack to double real here.
	 */
	IMAGE *real = im_open_local( dummy, "fwfft1:1", "t" );

	/* Transform to halfcomplex here.
	 */
	double *half_complex = IM_ARRAY( dummy, 
		in->Ysize * half_width * 2, double );

	/* We have to have a separate real buffer for the planner to work on.
	 */
	double *planner_scratch = IM_ARRAY( dummy, 
		in->Xsize * in->Ysize, double );

	fftw_plan plan;
	double *buf, *q, *p;
	int x, y;

	if( !real || !half_complex || im_pincheck( in ) || im_outcheck( out ) )
		return( -1 );
	if( in->Coding != IM_CODING_NONE || in->Bands != 1 ) {
                im_error( "im_fwfft", _( "one band uncoded only" ) );
                return( -1 );
	}
	if( im_clip2d( in, real ) )
                return( -1 );

	/* Make the plan for the transform. Yes, they really do use nx for
	 * height and ny for width. Use a separate scratch buffer for the
	 * planner, we can't overwrite real->data
	 */
	if( !(plan = fftw_plan_dft_r2c_2d( in->Ysize, in->Xsize,
		planner_scratch, (fftw_complex *) half_complex,
		0 )) ) {
                im_error( "im_fwfft", _( "unable to create transform plan" ) );
		return( -1 );
	}

	fftw_execute_dft_r2c( plan,
		(double *) real->data, (fftw_complex *) half_complex );

	fftw_destroy_plan( plan );

	/* WIO to out.
	 */
        if( im_cp_desc( out, in ) )
                return( -1 );
	out->Bbits = IM_BBITS_DPCOMPLEX;
	out->BandFmt = IM_BANDFMT_DPCOMPLEX;
        if( im_setupout( out ) )
                return( -1 );
	if( !(buf = (double *) IM_ARRAY( dummy, 
		IM_IMAGE_SIZEOF_LINE( out ), PEL )) )
		return( -1 );

	/* Copy to out and normalise. The right half is the up/down and 
	 * left/right flip of the left, but conjugated. Do the first 
	 * row separately, then mirror around the centre row.
	 */
	p = half_complex;
	q = buf;

	for( x = 0; x < half_width; x++ ) {
		q[0] = p[0] / size;
		q[1] = p[1] / size;
		p += 2;
		q += 2;
	}

	p = half_complex + ((in->Xsize + 1) / 2 - 1) * 2; 

	for( x = half_width; x < out->Xsize; x++ ) {
		q[0] = p[0] / size;
		q[1] = -1.0 * p[1] / size;
		p -= 2;
		q += 2;
	}

	if( im_writeline( 0, out, (PEL *) buf ) )
		return( -1 );

	for( y = 1; y < out->Ysize; y++ ) {
		p = half_complex + y * half_width * 2; 
		q = buf;

		for( x = 0; x < half_width; x++ ) {
			q[0] = p[0] / size;
			q[1] = p[1] / size;
			p += 2;
			q += 2;
		}

		/* Good grief. 
		 */
		p = half_complex + 2 *
			((out->Ysize - y + 1) * half_width - 2 + 
				(in->Xsize & 1));

		for( x = half_width; x < out->Xsize; x++ ) {
			q[0] = p[0] / size;
			q[1] = -1.0 * p[1] / size;
			p -= 2;
			q += 2;
		}

		if( im_writeline( y, out, (PEL *) buf ) )
			return( -1 );
	}

	return( 0 );
}
Exemplo n.º 16
0
void artistic_smooth (uint8_t* src,
                      uint8_t* dst,
                      double ns,
                      double q,
                      int nx, int ny, int pitch,
                      fft_plans_t* f,
                      artistic_buf_t* ab)
{
    int i, j, k, x, y;
    const int width = 2*(nx/2+1);
    const double nxny = nx*ny;
    const double nxny2 = nxny*nxny;
    const int n = ny*(nx/2+1);
    const int n2 = n*2;

    (void) q;

    fftw_complex** src1_c = ab->src1;
    fftw_complex** src2_c = ab->src2;
    fftw_complex** s_c = ab->s;
    fftw_complex** m_c = ab->m;

    double** num = ab->num;
    double* den = ab->den;

    double* src1_d[3];
    double* src2_d[3];
    double* s_d[3];
    double* m_d[3];

    for (k = 0; k < 3; k++) {
        src1_d[k]   = (double*) src1_c[k];
        src2_d[k]   = (double*) src2_c[k];
        s_d[k]      = (double*) s_c[k];
        m_d[k]      = (double*) m_c[k];
    }
                     
    for (j = 0; j < ny; j++) {
        uint8_t* data = src + j*pitch;
        double* src1_ptr[] = {src1_d[0]+j*width, src1_d[1]+j*width, src1_d[2]+j*width};
        double* src2_ptr[] = {src2_d[0]+j*width, src2_d[1]+j*width, src2_d[2]+j*width};

        for (i = 0; i < nx; i++) {
            for (k = 0; k < 3; k++) {
                register double temp = *src1_ptr[k]++ = *data++;
                *src2_ptr[k]++ = temp*temp;
            }
        }
    }

    for (k = 0; k < 3; k++) {
        fftw_execute_dft_r2c (f->forward, src1_d[k], src1_c[k]);
        fftw_execute_dft_r2c (f->forward, src2_d[k], src2_c[k]);
    }

    for (k = 0; k < 3; k++) {
        memset (num[k], 0, sizeof(double)*n2);
    }
    memset (den, 0, sizeof(double)*n2);

    for (i = 0; i < ns; i++) {
        fftw_complex* g_fft = ab->g[i];

        multiply_6_c(m_c[0], src1_c[0],
                     m_c[1], src1_c[1],
                     m_c[2], src1_c[2],
                     s_c[0], src2_c[0],
                     s_c[1], src2_c[1],
                     s_c[2], src2_c[2],
                     g_fft, n);

        for (k = 0; k < 3; k++) {
            fftw_execute_dft_c2r (f->backward, m_c[k], m_d[k]);
            fftw_execute_dft_c2r (f->backward, s_c[k], s_d[k]);
        }

        mesh_d (s_d[0], s_d[1], s_d[2], m_d[0], m_d[1], m_d[2], nxny, nxny2,
                num[0], num[1], num[2], den, n2);
    }

    for (k = 0; k < 3; k++) {
        divide_mul_const_d(num[k], den, nxny, n2);
    }

    x = nx/2.0;
    y = ny/2.0;
    for (j = 0; j < ny; j++) {
        int k = (j - y) % ny;
        k = (k < 0 ? ny + k : k) * 2*(nx/2+1);
        uint8_t* d = dst + j*pitch;
        for (i = 0; i < nx; i++) {
            int z;
            int h = (i - x) % nx;
            h = (h < 0 ? nx + h : h) + k;
            for (z = 0; z < 3; z++) {
                *d++ = num[z][h];
            }
        }
    }
}
Exemplo n.º 17
0
int init_thread_bufs (plugin_context* ctx, int thread_id)
{
    artistic_proc_context* c = ctx->data;
    const int nx = c->width;
    const int ny = c->height;
    const int nxny = nx*ny;
    const double sgm = c->sgm;
    const int ns = c->ns;
    fft_plans_t* p = c->p;

    {
        int i;
        double* g1;
        double* g;
        fftw_complex* g2;
        double* g2_d;
        fftw_complex** gc;

        artistic_buf_t* f = c->b[thread_id];
        f->g    = fftw_malloc (sizeof(fftw_complex*)*ns);
        f->src1 = fftw_malloc (sizeof(fftw_complex*)*3);
        f->src2 = fftw_malloc (sizeof(fftw_complex*)*3);
        f->s    = fftw_malloc (sizeof(fftw_complex*)*3);
        f->m    = fftw_malloc (sizeof(fftw_complex*)*3);
        f->num  = fftw_malloc (sizeof(double*)*3);
        f->den  = fftw_malloc (sizeof(double)*ny*2*(nx/2+1));
        if (NULL == f->g || NULL == f->src1 || NULL == f->src2 || NULL == f->s
         || NULL == f->m || NULL == f->num || NULL == f->den) {
          return -1;
        }

        for (i = 0; i < 3; i++) {
            f->src1[i]  = fftw_malloc (sizeof(fftw_complex)*ny*(nx/2+1));
            f->src2[i]  = fftw_malloc (sizeof(fftw_complex)*ny*(nx/2+1));
            f->s[i]     = fftw_malloc (sizeof(fftw_complex)*ny*(nx/2+1));
            f->m[i]     = fftw_malloc (sizeof(fftw_complex)*ny*(nx/2+1));
            f->num[i]   = fftw_malloc (sizeof(double)*ny*2*(nx/2+1));
            if (NULL == f->src1[i] || NULL == f->src2[i] || NULL == f->s[i]
              || NULL == f->m[i] || NULL == f->num[i]) {
              return -1;
            }
        }

        if (NULL == (g1 = malloc (sizeof(double)*nxny))) {
          return -1;
        }
        i = nxny;
        g = g1 + nxny - 1;
        while (i--) {
            register double x_val = -nx/2.0 + (i%nx) * nx/(nx-1);
            register double y_val =  ny/2.0 - (i/nx) * ny/(ny-1);
            *g-- = exp(-(x_val*x_val+y_val*y_val)/(2.0*sgm*sgm));
        }

        g2 = fftw_malloc (sizeof(fftw_complex)*ny*(nx/2+1));
        g2_d = (double*)g2;
        memset(g2, 0, sizeof(fftw_complex)*ny*(nx/2+1));
        g = g2_d + ny*2*(nx/2+1) - 1;
        i = ny*2*(nx/2+1);
        while (i--) {
            register const int y = (i / (2*(nx/2+1))) - ny/2.0;
            register const int x = (i % (2*(nx/2+1))) - nx/2.0;
            *g-- = exp(-0.5*(x*x+y*y));
        }
        fftw_execute_dft_r2c (((fft_plans_t*)(p))->forward, g2_d, g2);

        gc = f->g + ns - 1;
        i = ns;
        while (i--) {
            *gc-- = gen_sec(nx, ny, (fft_plans_t*)(p), i*2.0*M_PI/ns, M_PI/ns, g1, g2);
        }

        fftw_free (g2);
        free (g1);
    }
    return 0;
}
Exemplo n.º 18
0
void Transformer_CPU::transform_forward_x(double *inout)
{
	fftw_execute_dft_r2c(plan_x_r2c, (double*)inout, (fftw_complex*)inout);
}
Exemplo n.º 19
0
void gravity_calculate_acceleration(void){
	// Setting up the grid
	if (gravity_fft_init_done==0){
		gravity_fft_init();
		gravity_fft_init_done=1;
	}
#pragma omp parallel for schedule(guided)
	for (int i=0; i<N; i++){
		particles[i].ax = 0; 
		particles[i].ay = 0; 
		particles[i].az = 0; 
	}
	if (integrator == SEI){
		struct ghostbox gb = boundaries_get_ghostbox(1,0,0);
		shift_shear = gb.shifty;
	}
	gravity_fft_p2grid();
	
	if (integrator == SEI){
		// Remap in fourier space to deal with shearing sheet boundary conditions.
		gravity_fft_remap(density_r, 1);
	}
	
	fftw_execute_dft_r2c(r2cfft, density_r, (fftw_complex*)density);
	
	
	// Inverse Poisson equation
	
	for(int i = 0 ; i < grid_NCOMPLEX ; i++) {
		if (integrator == SEI){
			// Compute time-dependent wave-vectors
			kxt[i] = kx[i] + shift_shear/boxsize_y * ky[i];
			k[i]  = sqrt( kxt[i]*kxt[i] + ky[i] * ky[i]);
			// we will use 1/k, that prevents singularity 
			// (the k=0 is set to zero by renormalization...)
			if ( k[i] == 0.0 ) k[i] = 1.0; 
		}
		double q0 = - 2.0 * M_PI * density[2*i] / (k[i] * root_nx * root_ny);
		double q1 = - 2.0 * M_PI * density[2*i+1] / (k[i] * root_nx * root_ny);
		double sinkxt = sin(kxt[i] * dx);
		double sinky  = sin(ky[i] * dy);
		fx[2*i]		=   q1 * sinkxt / dx;		// Real part of Fx
		fx[2*i+1] 	= - q0 * sinkxt / dx;		// Imaginary part of Fx
		fy[2*i]		=   q1 * sinky  / dy;	
		fy[2*i+1] 	= - q0 * sinky  / dy;
	}
	
	// Transform back the force field
	fftw_execute_dft_c2r(c2rfft, (fftw_complex*)fx, fx);
	fftw_execute_dft_c2r(c2rfft, (fftw_complex*)fy, fy);
	
	if (integrator == SEI){
		// Remap in fourier space to deal with shearing sheet boundary conditions.
		gravity_fft_remap(fx, -1);
		gravity_fft_remap(fy, -1);
	}

	for(int i=0;i<N;i++){
		gravity_fft_grid2p(&(particles[i]));
	}
}
Exemplo n.º 20
0
/*! Calculates the long-range periodic force given the particle positions
 *  using the PM method.  The force is Gaussian filtered with Asmth, given in
 *  mesh-cell units. We carry out a CIC charge assignment, and compute the
 *  potenial by Fourier transform methods. The potential is finite differenced
 *  using a 4-point finite differencing formula, and the forces are
 *  interpolated tri-linearly to the particle positions. The CIC kernel is
 *  deconvolved. Note that the particle distribution is not in the slab
 *  decomposition that is used for the FFT. Instead, overlapping patches
 *  between local domains and FFT slabs are communicated as needed.
 */
void pmforce_periodic(void)
{
  double k2, kx, ky, kz, smth;
  double dx, dy, dz;
  double fx, fy, fz, ff;
  double asmth2, fac, acc_dim;
  int i, j, slab, level, sendTask, recvTask;
  int x, y, z, xl, yl, zl, xr, yr, zr, xll, yll, zll, xrr, yrr, zrr, ip, dim;
  int slab_x, slab_y, slab_z;
  int slab_xx, slab_yy, slab_zz;
  int meshmin[3], meshmax[3], sendmin, sendmax, recvmin, recvmax;
  int rep, ncont, cont_sendmin[2], cont_sendmax[2], cont_recvmin[2], cont_recvmax[2];
  int dimx, dimy, dimz, recv_dimx, recv_dimy, recv_dimz;
  MPI_Status status;


  if(ThisTask == 0)
    {
      printf("Starting periodic PM calculation.\n");
      fflush(stdout);
    }

  #ifdef FFTW3
  if(fftw_plan_exists)
  {
	  /* macro defined in callgrind.h */
	  // CALLGRIND_START_INSTRUMENTATION;
  }
  #else
  // CALLGRIND_START_INSTRUMENTATION;
  #endif

  force_treefree();


  asmth2 = (2 * M_PI) * All.Asmth[0] / All.BoxSize;
  asmth2 *= asmth2;

  fac = All.G / (M_PI * All.BoxSize);	/* to get potential */
  fac *= 1 / (2 * All.BoxSize / PMGRID);	/* for finite differencing */

  /* first, establish the extension of the local patch in the PMGRID  */

  for(j = 0; j < 3; j++)
    {
      meshmin[j] = PMGRID;
      meshmax[j] = 0;
    }

  for(i = 0; i < NumPart; i++)
    {
      for(j = 0; j < 3; j++)
	{
	  slab = to_slab_fac * P[i].Pos[j];
	  if(slab >= PMGRID)
	    slab = PMGRID - 1;

	  if(slab < meshmin[j])
	    meshmin[j] = slab;

	  if(slab > meshmax[j])
	    meshmax[j] = slab;
	}
    }

  MPI_Allgather(meshmin, 3, MPI_INT, meshmin_list, 3, MPI_INT, MPI_COMM_WORLD);
  MPI_Allgather(meshmax, 3, MPI_INT, meshmax_list, 3, MPI_INT, MPI_COMM_WORLD);

  dimx = meshmax[0] - meshmin[0] + 2;
  dimy = meshmax[1] - meshmin[1] + 2;
  dimz = meshmax[2] - meshmin[2] + 2;

  pm_init_periodic_allocate((dimx + 4) * (dimy + 4) * (dimz + 4));

  #ifdef FFTW3
  if(!fftw_plan_exists)
  {
  	/* Create plan for in-place r2c DFT */
  	fft_forward_plan = fftw_mpi_plan_dft_r2c_3d(PMGRID, PMGRID, PMGRID, rhogrid, fft_of_rhogrid,
  												MPI_COMM_WORLD, FFTW_PATIENT | FFTW_MPI_TRANSPOSED_OUT);
  	fft_inverse_plan = fftw_mpi_plan_dft_c2r_3d(PMGRID, PMGRID, PMGRID, fft_of_rhogrid, rhogrid,
  												MPI_COMM_WORLD, FFTW_PATIENT | FFTW_MPI_TRANSPOSED_IN);
  	fftw_plan_exists = true;	// use C99 bool type
  	
  	if(ThisTask == 0)
  		printf("Created new FFTW3 plan.\n");
  } else {
  	/* do nothing, the plan has already been created by previous call to this function */
  }
  #endif

	/* For FFTW3, there is a different convention for fftsize for real-to-complex transforms, i.e.
		fftsize is the size of the complex data (number of complex values), NOT the size of the real data!
		We attempt to take care of this by defining fftsize to be fftsize_real when using FFTW3.  */

  for(i = 0; i < dimx * dimy * dimz; i++)
    workspace[i] = 0;

  for(i = 0; i < NumPart; i++)
    {
      slab_x = to_slab_fac * P[i].Pos[0];
      if(slab_x >= PMGRID)
	slab_x = PMGRID - 1;
      dx = to_slab_fac * P[i].Pos[0] - slab_x;
      slab_x -= meshmin[0];
      slab_xx = slab_x + 1;

      slab_y = to_slab_fac * P[i].Pos[1];
      if(slab_y >= PMGRID)
	slab_y = PMGRID - 1;
      dy = to_slab_fac * P[i].Pos[1] - slab_y;
      slab_y -= meshmin[1];
      slab_yy = slab_y + 1;

      slab_z = to_slab_fac * P[i].Pos[2];
      if(slab_z >= PMGRID)
	slab_z = PMGRID - 1;
      dz = to_slab_fac * P[i].Pos[2] - slab_z;
      slab_z -= meshmin[2];
      slab_zz = slab_z + 1;

      workspace[(slab_x * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
      workspace[(slab_x * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (1.0 - dx) * dy * (1.0 - dz);
      workspace[(slab_x * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * (1.0 - dy) * dz;
      workspace[(slab_x * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (1.0 - dx) * dy * dz;

      workspace[(slab_xx * dimy + slab_y) * dimz + slab_z] += P[i].Mass * (dx) * (1.0 - dy) * (1.0 - dz);
      workspace[(slab_xx * dimy + slab_yy) * dimz + slab_z] += P[i].Mass * (dx) * dy * (1.0 - dz);
      workspace[(slab_xx * dimy + slab_y) * dimz + slab_zz] += P[i].Mass * (dx) * (1.0 - dy) * dz;
      workspace[(slab_xx * dimy + slab_yy) * dimz + slab_zz] += P[i].Mass * (dx) * dy * dz;
    }


  for(i = 0; i < fftsize; i++)	/* clear local density field */
    rhogrid[i] = 0;

  for(level = 0; level < (1 << PTask); level++)	/* note: for level=0, target is the same task */
    {
      sendTask = ThisTask;
      recvTask = ThisTask ^ level;
      if(recvTask < NTask)
	{
	  /* check how much we have to send */
	  sendmin = 2 * PMGRID;
	  sendmax = -1;
	  for(slab_x = meshmin[0]; slab_x < meshmax[0] + 2; slab_x++)
	    if(slab_to_task[slab_x % PMGRID] == recvTask)
	      {
		if(slab_x < sendmin)
		  sendmin = slab_x;
		if(slab_x > sendmax)
		  sendmax = slab_x;
	      }
	  if(sendmax == -1)
	    sendmin = 0;

	  /* check how much we have to receive */
	  recvmin = 2 * PMGRID;
	  recvmax = -1;
	  for(slab_x = meshmin_list[3 * recvTask]; slab_x < meshmax_list[3 * recvTask] + 2; slab_x++)
	    if(slab_to_task[slab_x % PMGRID] == sendTask)
	      {
		if(slab_x < recvmin)
		  recvmin = slab_x;
		if(slab_x > recvmax)
		  recvmax = slab_x;
	      }
	  if(recvmax == -1)
	    recvmin = 0;


	  if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0)	/* ok, we have a contribution to the slab */
	    {
	      recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 2;
	      recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 2;
	      recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 2;

	      if(level > 0)
		{
		  MPI_Sendrecv(workspace + (sendmin - meshmin[0]) * dimy * dimz,
			       (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE, recvTask,
			       TAG_PERIODIC_A, forcegrid,
			       (recvmax - recvmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real), MPI_BYTE,
			       recvTask, TAG_PERIODIC_A, MPI_COMM_WORLD, &status);
		}
	      else
		{
		  memcpy(forcegrid, workspace + (sendmin - meshmin[0]) * dimy * dimz,
			 (sendmax - sendmin + 1) * dimy * dimz * sizeof(fftw_real));
		}

	      for(slab_x = recvmin; slab_x <= recvmax; slab_x++)
		{
		  slab_xx = (slab_x % PMGRID) - first_slab_of_task[ThisTask];

		  if(slab_xx >= 0 && slab_xx < slabs_per_task[ThisTask])
		    {
		      for(slab_y = meshmin_list[3 * recvTask + 1];
			  slab_y <= meshmax_list[3 * recvTask + 1] + 1; slab_y++)
			{
			  slab_yy = slab_y;
			  if(slab_yy >= PMGRID)
			    slab_yy -= PMGRID;

			  for(slab_z = meshmin_list[3 * recvTask + 2];
			      slab_z <= meshmax_list[3 * recvTask + 2] + 1; slab_z++)
			    {
			      slab_zz = slab_z;
			      if(slab_zz >= PMGRID)
				slab_zz -= PMGRID;

			      rhogrid[PMGRID * PMGRID2 * slab_xx + PMGRID2 * slab_yy + slab_zz] +=
				forcegrid[((slab_x - recvmin) * recv_dimy +
					   (slab_y - meshmin_list[3 * recvTask + 1])) * recv_dimz +
					  (slab_z - meshmin_list[3 * recvTask + 2])];
			    }
			}
		    }
		}
	    }
	}
    }

  #ifdef DEBUG_FFT
  
  double norm_density = 0.;
  for(i = 0; i < fftsize; i++)
  {
  	norm_density += rhogrid[i]*rhogrid[i];
  }
  
  /* Write out rhogrid to a 'fft-snapshot' file */
  if (ThisTask == 0) {
 	 FILE *fp;
 	 
 	 /* Print the norm of the fft */
 	 printf("L2-norm of density: %f\n", norm_density);
 	 printf("First five values of density: %f, %f, %f, %f, %f\n", rhogrid[0],rhogrid[1],rhogrid[2],rhogrid[3], rhogrid[4]);
 	 
  }
  #endif

  /* Do the FFT of the density field */
  #ifdef FFTW3
  fftw_execute_dft_r2c(fft_forward_plan, rhogrid, fft_of_rhogrid);
  #else
  rfftwnd_mpi(fft_forward_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER);
  #endif

  #ifdef DEBUG_FFT
  
  double norm_complex = 0.;
  for(i = 0; i < fftsize; i++)
  {
  	norm_complex += rhogrid[i]*rhogrid[i];
  }
  
  /* Write out rhogrid to a 'fft-snapshot' file */
  if (ThisTask == 0) {
 	 FILE *fp;
 	 
 	 /* Print the norm of the fft */
 	 printf("L2-norm of complex rhogrid: %f\n", norm_complex);
 	 printf("First two values of complex fft: %f + i*%f, %f + i*%f\n", rhogrid[0],rhogrid[1],rhogrid[2],rhogrid[3]);
 	 
  }
  #endif

  /* multiply with Green's function for the potential */

  for(y = slabstart_y; y < slabstart_y + nslab_y; y++)
    for(x = 0; x < PMGRID; x++)
      for(z = 0; z < PMGRID / 2 + 1; z++)
	{
	  if(x > PMGRID / 2)
	    kx = x - PMGRID;
	  else
	    kx = x;
	  if(y > PMGRID / 2)
	    ky = y - PMGRID;
	  else
	    ky = y;
	  if(z > PMGRID / 2)
	    kz = z - PMGRID;
	  else
	    kz = z;

	  k2 = kx * kx + ky * ky + kz * kz;

	  if(k2 > 0)
	    {
	      smth = -exp(-k2 * asmth2) / k2;

	      /* do deconvolution */

	      fx = fy = fz = 1;
	      if(kx != 0)
		{
		  fx = (M_PI * kx) / PMGRID;
		  fx = sin(fx) / fx;
		}
	      if(ky != 0)
		{
		  fy = (M_PI * ky) / PMGRID;
		  fy = sin(fy) / fy;
		}
	      if(kz != 0)
		{
		  fz = (M_PI * kz) / PMGRID;
		  fz = sin(fz) / fz;
		}
	      ff = 1 / (fx * fy * fz);
	      smth *= ff * ff * ff * ff;

	      /* end deconvolution */

	      ip = PMGRID * (PMGRID / 2 + 1) * (y - slabstart_y) + (PMGRID / 2 + 1) * x + z;
	      c_re(fft_of_rhogrid[ip]) *= smth;
	      c_im(fft_of_rhogrid[ip]) *= smth;
	    }
	}

  if(slabstart_y == 0)
    c_re(fft_of_rhogrid[0]) = c_im(fft_of_rhogrid[0]) = 0.0;

  /* Do the FFT to get the potential */
  #ifdef FFTW3
  fftw_execute_dft_c2r(fft_inverse_plan, fft_of_rhogrid, rhogrid);
  
  /* Now normalize the output 
  for(i = 0; i < fftsize; i++)
	rhogrid[i] = rhogrid[i] / (PMGRID*PMGRID*PMGRID); */
  #else
  rfftwnd_mpi(fft_inverse_plan, 1, rhogrid, workspace, FFTW_TRANSPOSED_ORDER);
  #endif
  
  #ifdef DEBUG_FFT
  
  double norm = 0.;
  for(i = 0; i < fftsize; i++)
  {
  	norm += rhogrid[i]*rhogrid[i];
  }
  
  /* Write out rhogrid to a 'fft-snapshot' file */
  if (ThisTask == 0) {
 	 FILE *fp;
 	 
 	 /* Print the norm of the fft */
 	 printf("L2-norm of rhogrid: %f\n", norm);
 	 printf("First five values of fft: %f, %f, %f, %f, %f\n\n", rhogrid[0],rhogrid[1],rhogrid[2],rhogrid[3],rhogrid[4]);
 	 
 	 /* fp = fopen("rhogrid.0", "wb"); /* add suffix to indicate which node this is */
 	 /* fwrite(rhogrid, sizeof(rhogrid[0]), fftsize, fp);*/
 	 /* fclose(fp); */
  }
  #endif

  /* Now rhogrid holds the potential */
  /* construct the potential for the local patch */


  dimx = meshmax[0] - meshmin[0] + 6;
  dimy = meshmax[1] - meshmin[1] + 6;
  dimz = meshmax[2] - meshmin[2] + 6;

  for(level = 0; level < (1 << PTask); level++)	/* note: for level=0, target is the same task */
    {
      sendTask = ThisTask;
      recvTask = ThisTask ^ level;

      if(recvTask < NTask)
	{

	  /* check how much we have to send */
	  sendmin = 2 * PMGRID;
	  sendmax = -PMGRID;
	  for(slab_x = meshmin_list[3 * recvTask] - 2; slab_x < meshmax_list[3 * recvTask] + 4; slab_x++)
	    if(slab_to_task[(slab_x + PMGRID) % PMGRID] == sendTask)
	      {
		if(slab_x < sendmin)
		  sendmin = slab_x;
		if(slab_x > sendmax)
		  sendmax = slab_x;
	      }
	  if(sendmax == -PMGRID)
	    sendmin = sendmax + 1;


	  /* check how much we have to receive */
	  recvmin = 2 * PMGRID;
	  recvmax = -PMGRID;
	  for(slab_x = meshmin[0] - 2; slab_x < meshmax[0] + 4; slab_x++)
	    if(slab_to_task[(slab_x + PMGRID) % PMGRID] == recvTask)
	      {
		if(slab_x < recvmin)
		  recvmin = slab_x;
		if(slab_x > recvmax)
		  recvmax = slab_x;
	      }
	  if(recvmax == -PMGRID)
	    recvmin = recvmax + 1;

	  if((recvmax - recvmin) >= 0 || (sendmax - sendmin) >= 0)	/* ok, we have a contribution to the slab */
	    {
	      recv_dimx = meshmax_list[3 * recvTask + 0] - meshmin_list[3 * recvTask + 0] + 6;
	      recv_dimy = meshmax_list[3 * recvTask + 1] - meshmin_list[3 * recvTask + 1] + 6;
	      recv_dimz = meshmax_list[3 * recvTask + 2] - meshmin_list[3 * recvTask + 2] + 6;

	      ncont = 1;
	      cont_sendmin[0] = sendmin;
	      cont_sendmax[0] = sendmax;
	      cont_sendmin[1] = sendmax + 1;
	      cont_sendmax[1] = sendmax;

	      cont_recvmin[0] = recvmin;
	      cont_recvmax[0] = recvmax;
	      cont_recvmin[1] = recvmax + 1;
	      cont_recvmax[1] = recvmax;

	      for(slab_x = sendmin; slab_x <= sendmax; slab_x++)
		{
		  if(slab_to_task[(slab_x + PMGRID) % PMGRID] != ThisTask)
		    {
		      /* non-contiguous */
		      cont_sendmax[0] = slab_x - 1;
		      while(slab_to_task[(slab_x + PMGRID) % PMGRID] != ThisTask)
			slab_x++;
		      cont_sendmin[1] = slab_x;
		      ncont++;
		    }
		}

	      for(slab_x = recvmin; slab_x <= recvmax; slab_x++)
		{
		  if(slab_to_task[(slab_x + PMGRID) % PMGRID] != recvTask)
		    {
		      /* non-contiguous */
		      cont_recvmax[0] = slab_x - 1;
		      while(slab_to_task[(slab_x + PMGRID) % PMGRID] != recvTask)
			slab_x++;
		      cont_recvmin[1] = slab_x;
		      if(ncont == 1)
			ncont++;
		    }
		}


	      for(rep = 0; rep < ncont; rep++)
		{
		  sendmin = cont_sendmin[rep];
		  sendmax = cont_sendmax[rep];
		  recvmin = cont_recvmin[rep];
		  recvmax = cont_recvmax[rep];

		  /* prepare what we want to send */
		  if(sendmax - sendmin >= 0)
		    {
		      for(slab_x = sendmin; slab_x <= sendmax; slab_x++)
			{
			  slab_xx = ((slab_x + PMGRID) % PMGRID) - first_slab_of_task[ThisTask];

			  for(slab_y = meshmin_list[3 * recvTask + 1] - 2;
			      slab_y < meshmax_list[3 * recvTask + 1] + 4; slab_y++)
			    {
			      slab_yy = (slab_y + PMGRID) % PMGRID;

			      for(slab_z = meshmin_list[3 * recvTask + 2] - 2;
				  slab_z < meshmax_list[3 * recvTask + 2] + 4; slab_z++)
				{
				  slab_zz = (slab_z + PMGRID) % PMGRID;

				  forcegrid[((slab_x - sendmin) * recv_dimy +
					     (slab_y - (meshmin_list[3 * recvTask + 1] - 2))) * recv_dimz +
					    slab_z - (meshmin_list[3 * recvTask + 2] - 2)] =
				    rhogrid[PMGRID * PMGRID2 * slab_xx + PMGRID2 * slab_yy + slab_zz];
				}
			    }
			}
		    }

		  if(level > 0)
		    {
		      MPI_Sendrecv(forcegrid,
				   (sendmax - sendmin + 1) * recv_dimy * recv_dimz * sizeof(fftw_real),
				   MPI_BYTE, recvTask, TAG_PERIODIC_B,
				   workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz,
				   (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real), MPI_BYTE,
				   recvTask, TAG_PERIODIC_B, MPI_COMM_WORLD, &status);
		    }
		  else
		    {
		      memcpy(workspace + (recvmin - (meshmin[0] - 2)) * dimy * dimz,
			     forcegrid, (recvmax - recvmin + 1) * dimy * dimz * sizeof(fftw_real));
		    }
		}
	    }
	}
    }


  dimx = meshmax[0] - meshmin[0] + 2;
  dimy = meshmax[1] - meshmin[1] + 2;
  dimz = meshmax[2] - meshmin[2] + 2;

  recv_dimx = meshmax[0] - meshmin[0] + 6;
  recv_dimy = meshmax[1] - meshmin[1] + 6;
  recv_dimz = meshmax[2] - meshmin[2] + 6;


  for(dim = 0; dim < 3; dim++)	/* Calculate each component of the force. */
    {
      /* get the force component by finite differencing the potential */
      /* note: "workspace" now contains the potential for the local patch, plus a suffiently large buffer region */

      for(x = 0; x < meshmax[0] - meshmin[0] + 2; x++)
	for(y = 0; y < meshmax[1] - meshmin[1] + 2; y++)
	  for(z = 0; z < meshmax[2] - meshmin[2] + 2; z++)
	    {
	      xrr = xll = xr = xl = x;
	      yrr = yll = yr = yl = y;
	      zrr = zll = zr = zl = z;

	      switch (dim)
		{
		case 0:
		  xr = x + 1;
		  xrr = x + 2;
		  xl = x - 1;
		  xll = x - 2;
		  break;
		case 1:
		  yr = y + 1;
		  yl = y - 1;
		  yrr = y + 2;
		  yll = y - 2;
		  break;
		case 2:
		  zr = z + 1;
		  zl = z - 1;
		  zrr = z + 2;
		  zll = z - 2;
		  break;
		}

	      forcegrid[(x * dimy + y) * dimz + z]
		=
		fac * ((4.0 / 3) *
		       (workspace[((xl + 2) * recv_dimy + (yl + 2)) * recv_dimz + (zl + 2)]
			- workspace[((xr + 2) * recv_dimy + (yr + 2)) * recv_dimz + (zr + 2)]) -
		       (1.0 / 6) *
		       (workspace[((xll + 2) * recv_dimy + (yll + 2)) * recv_dimz + (zll + 2)] -
			workspace[((xrr + 2) * recv_dimy + (yrr + 2)) * recv_dimz + (zrr + 2)]));
	    }

      /* read out the forces */

      for(i = 0; i < NumPart; i++)
	{
	  slab_x = to_slab_fac * P[i].Pos[0];
	  if(slab_x >= PMGRID)
	    slab_x = PMGRID - 1;
	  dx = to_slab_fac * P[i].Pos[0] - slab_x;
	  slab_x -= meshmin[0];
	  slab_xx = slab_x + 1;

	  slab_y = to_slab_fac * P[i].Pos[1];
	  if(slab_y >= PMGRID)
	    slab_y = PMGRID - 1;
	  dy = to_slab_fac * P[i].Pos[1] - slab_y;
	  slab_y -= meshmin[1];
	  slab_yy = slab_y + 1;

	  slab_z = to_slab_fac * P[i].Pos[2];
	  if(slab_z >= PMGRID)
	    slab_z = PMGRID - 1;
	  dz = to_slab_fac * P[i].Pos[2] - slab_z;
	  slab_z -= meshmin[2];
	  slab_zz = slab_z + 1;

	  acc_dim =
	    forcegrid[(slab_x * dimy + slab_y) * dimz + slab_z] * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
	  acc_dim += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_z] * (1.0 - dx) * dy * (1.0 - dz);
	  acc_dim += forcegrid[(slab_x * dimy + slab_y) * dimz + slab_zz] * (1.0 - dx) * (1.0 - dy) * dz;
	  acc_dim += forcegrid[(slab_x * dimy + slab_yy) * dimz + slab_zz] * (1.0 - dx) * dy * dz;

	  acc_dim += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_z] * (dx) * (1.0 - dy) * (1.0 - dz);
	  acc_dim += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_z] * (dx) * dy * (1.0 - dz);
	  acc_dim += forcegrid[(slab_xx * dimy + slab_y) * dimz + slab_zz] * (dx) * (1.0 - dy) * dz;
	  acc_dim += forcegrid[(slab_xx * dimy + slab_yy) * dimz + slab_zz] * (dx) * dy * dz;

	  P[i].GravPM[dim] = acc_dim;
	}
    }

  pm_init_periodic_free();
  force_treeallocate(All.TreeAllocFactor * All.MaxPart, All.MaxPart);

  All.NumForcesSinceLastDomainDecomp = 1 + All.TotNumPart * All.TreeDomainUpdateFrequency;

  if(ThisTask == 0)
    {
      printf("done PM.\n");
      fflush(stdout);
    }
  
  #ifdef FFTW3
  if(fftw_plan_exists)
  {
	  /* macro defined in callgrind.h */
	  // CALLGRIND_STOP_INSTRUMENTATION;
  }
  #else
  // CALLGRIND_STOP_INSTRUMENTATION;
  #endif
}
Exemplo n.º 21
0
void gfft_r2c_2Dslice(double *wrin) {
	double complex *win= (double complex *) wrin;
	fft_timer -= get_c_time();
	fftw_execute_dft_r2c(r2cfft_2Dslice,wrin,win);
	fft_timer += get_c_time();
}
Exemplo n.º 22
0
Arquivo: hbhankel.c Projeto: asl/rssa
SEXP convolveN(SEXP x, SEXP y,
               SEXP input_dim, SEXP output_dim,
               SEXP Conj) {
  SEXP x_dim = NILSXP, y_dim = NILSXP;
  R_len_t rank = length(input_dim);
  R_len_t *N = INTEGER(input_dim);
  R_len_t pN = prod(rank, N), phN = hprod(rank, N);
  int conjugate = LOGICAL(Conj)[0];

  fftw_complex *ox, *oy;
  fftw_plan r2c_plan, c2r_plan;
  double *circ;
  R_len_t *revN, r, i;

  /* Allocate needed memory */
  circ = (double*) fftw_malloc(pN * sizeof(double));
  ox = (fftw_complex*) fftw_malloc(phN * sizeof(fftw_complex));
  oy = (fftw_complex*) fftw_malloc(phN * sizeof(fftw_complex));

  /* Estimate the best plans for given input length, note, that input data is
     stored in column-major mode, that's why we're passing dimensions in
     *reverse* order */
  revN = Calloc(rank, R_len_t);
  for (r = 0; r < rank; ++r) revN[r] = N[rank - 1 - r];
  r2c_plan = fftw_plan_dft_r2c(rank, revN, circ, ox, FFTW_ESTIMATE);
  c2r_plan = fftw_plan_dft_c2r(rank, revN, ox, circ, FFTW_ESTIMATE);
  Free(revN);

  PROTECT(x_dim = getAttrib(x, R_DimSymbol));
  PROTECT(y_dim = getAttrib(y, R_DimSymbol));

  /* Fill input buffer by X values*/
  memset(circ, 0, pN * sizeof(double));
  fill_subarray(circ, REAL(x), rank, N, INTEGER(x_dim), 1);

  /* Run the plan on X-input data */
  fftw_execute_dft_r2c(r2c_plan, circ, ox);

  /* Fill input buffer by Y values*/
  memset(circ, 0, pN * sizeof(double));
  fill_subarray(circ, REAL(y), rank, N, INTEGER(y_dim), 1);

  /* Run the plan on Y-input data */
  fftw_execute_dft_r2c(r2c_plan, circ, oy);

  /* Compute conjugation if needed */
  if (conjugate)
    for (i = 0; i < phN; ++i)
      oy[i] = conj(oy[i]);

  /* Dot-multiply ox and oy, and divide by Nx*...*Nz*/
  for (i = 0; i < phN; ++i)
    oy[i] *= ox[i] / pN;

  /* Compute the reverse transform to obtain result */
  fftw_execute_dft_c2r(c2r_plan, oy, circ);

  SEXP res;
  PROTECT(res = allocVector(REALSXP, prod(rank, INTEGER(output_dim))));
  fill_subarray(circ, REAL(res), rank, N, INTEGER(output_dim), 0);
  /* setAttrib(output_dim, R_NamesSymbol, R_NilValue); */
  setAttrib(res, R_DimSymbol, output_dim);
  /* setAttrib(res, R_DimNamesSymbol, R_NilValue); */

  /* Cleanup */
  fftw_free(ox);
  fftw_free(oy);
  fftw_free(circ);

  /* Return */
  UNPROTECT(3);
  return res;
}
Exemplo n.º 23
0
/* --------------------------------------------------------------------------- *
 * This overloaded function is an implementation of your tone mapping operator *
 * --------------------------------------------------------------------------- */
int TMOZhao10::Transform()
{
	double* data;
	int size = pSrc->GetHeight()*pSrc->GetWidth();
	int spec_size = pSrc->GetHeight()*(pSrc->GetWidth()/2+1);
	double *rgb[] = {fftw_alloc_real(size),fftw_alloc_real(size),fftw_alloc_real(size)};
	double *lab[] = {fftw_alloc_real(size),fftw_alloc_real(size),fftw_alloc_real(size)};
	fftw_complex *spec_rgb[] = {fftw_alloc_complex(spec_size),fftw_alloc_complex(spec_size),fftw_alloc_complex(spec_size)};
	fftw_complex *spec_lab[] = {fftw_alloc_complex(spec_size),fftw_alloc_complex(spec_size),fftw_alloc_complex(spec_size)};
	
	double *theta = fftw_alloc_real(spec_size);
	double *phi = fftw_alloc_real(spec_size);
	
	fftw_complex *spec_gray = fftw_alloc_complex(spec_size);
	double *gray = fftw_alloc_real(size);
	
	fftw_plan p = fftw_plan_dft_r2c_2d(pSrc->GetHeight(), pSrc->GetWidth(),rgb[0], spec_rgb[0], FFTW_ESTIMATE);
	
	//copy data channels to r,g,b arrays
	data=pSrc->GetData();
	for(int i=0;i<size;++i){
		rgb[0][i] = *data++;
		rgb[1][i] = *data++;
		rgb[2][i] = *data++;
	}
	
	//transform to Lab space
	pSrc->Convert(TMO_LAB);
	pDst->Convert(TMO_LAB);
	
	
	//copy data channels to l,a,b array
	data=pSrc->GetData();
	for(int i=0;i<size;++i){
		lab[0][i] = *data++/100;
		lab[1][i] = *data++/100;
		lab[2][i] = *data++/100;
		
		//fprintf(stderr,"%f %f %f\n",lab[0][i],lab[1][i],lab[2][i]);
	}
	
	//compute fft of all channels
	fftw_execute_dft_r2c(p,rgb[0], spec_rgb[0]);
	fftw_execute_dft_r2c(p,rgb[1], spec_rgb[1]);
	fftw_execute_dft_r2c(p,rgb[2], spec_rgb[2]);
	fftw_execute_dft_r2c(p,lab[0], spec_lab[0]);
	fftw_execute_dft_r2c(p,lab[1], spec_lab[1]);
	fftw_execute_dft_r2c(p,lab[2], spec_lab[2]);
	
	fftw_destroy_plan(p);
	p = fftw_plan_dft_c2r_2d(pSrc->GetHeight(), pSrc->GetWidth(),spec_gray, gray, FFTW_ESTIMATE);
	
	//compute phi and theta coefficient
	double thetasum=0;
	double phisum=0;
	for(int i=0;i<spec_size;++i){
		double a2 = spec_lab[1][i][0]*spec_lab[1][i][0]+spec_lab[1][i][1]*spec_lab[1][i][1];
		double b2 = spec_lab[2][i][0]*spec_lab[2][i][0]+spec_lab[2][i][1]*spec_lab[2][i][1];
		phi[i] = a2/(a2+b2);
		phisum += phi[i];
		
		double rr2 = spec_rgb[0][i][0]*spec_rgb[0][i][0]+spec_rgb[0][i][1]*spec_rgb[0][i][1];
		double gg2 = spec_rgb[1][i][0]*spec_rgb[1][i][0]+spec_rgb[1][i][1]*spec_rgb[1][i][1];
		double bb2 = spec_rgb[2][i][0]*spec_rgb[2][i][0]+spec_rgb[2][i][1]*spec_rgb[2][i][1];
		double l2 = spec_lab[0][i][0]*spec_lab[0][i][0]+spec_lab[0][i][1]*spec_lab[0][i][1];
		double rgb2 = rr2+gg2+bb2;
		theta[i] = (rgb2-l2)/rgb2;
		thetasum += theta[i];
	}
	
	thetasum=0.6*spec_size;
	phisum=0.9*spec_size;
	for(int i=0;i<spec_size;++i){
		spec_gray[i][0] = ((1-thetasum/spec_size)*spec_lab[0][i][0]+thetasum/spec_size*(phisum/spec_size*spec_lab[1][i][0]+(1-phisum/spec_size)*spec_lab[2][i][0]))/size;
		//spec_gray[i][0] = ((1-theta[i])*spec_lab[0][i][0]+theta[i]*(phi[i]*spec_lab[1][i][0]+(1-phi[i])*spec_lab[2][i][0]))/size;
		spec_gray[i][1] = ((1-thetasum/spec_size)*spec_lab[0][i][1]+thetasum/spec_size*(phisum/spec_size*spec_lab[1][i][1]+(1-phisum/spec_size)*spec_lab[2][i][1]))/size; 
		//spec_gray[i][1] = ((1-theta[i])*spec_lab[0][i][1]+theta[i]*(phi[i]*spec_lab[1][i][1]+(1-phi[i])*spec_lab[2][i][1]))/size; 
	}
	
	fftw_execute(p);
	
	double minimum = 99999999999999999;
	double maximum = -99999999999999999;
	data=pDst->GetData();
	for(int i=0;i<size;++i){
		if(gray[i]>maximum) maximum=gray[i];
		if(gray[i]<minimum) minimum=gray[i];
		*data++ = gray[i]*100;
		*data++ = 0;//gray[i];
		*data++ = 0;//gray[i];
	}
	
	fprintf(stderr,"%f %f %d %d\n",minimum,maximum,size,spec_size);
	data=pDst->GetData();
	for(int i=0;i<size;++i){
		*data = 100*(gray[i]-minimum)/(maximum-minimum);
		data += 3;
	}
	
	fftw_destroy_plan(p);
	fftw_free(gray); fftw_free(spec_gray);
	fftw_free(phi); fftw_free(theta);
	fftw_free(rgb[0]);fftw_free(rgb[1]);fftw_free(rgb[2]);
	fftw_free(lab[0]);fftw_free(lab[1]);fftw_free(lab[2]);
	fftw_free(spec_rgb[0]);fftw_free(spec_rgb[1]);fftw_free(spec_rgb[2]);
	fftw_free(spec_lab[0]);fftw_free(spec_lab[1]);fftw_free(spec_lab[2]);
	
	pDst->Convert(TMO_RGB);
	return 0;
}
Exemplo n.º 24
0
void FFTRealForward::transform(int size, rsFloat *in, Complex *out)
{
  if (plan == 0)
    throw FFTException("[BUG] Can not transform on NULL plan.");
  fftw_execute_dft_r2c((fftw_plan_s *)plan, in, reinterpret_cast<fftw_complex *>(out));
}
Exemplo n.º 25
0
/*!
 * Compute forward Fouier transform of real signal.  A real-to-complex
 * FFT is used (for speed optimisation) but the complex output signal
 * is filled to its full size through conjugate symmetry.
 * 
 * \param[out] out (complex double*) Forward Fourier transform of input signal.
 * \param[in] in (double*) Real input signal.
 * \param[in] data 
 * - data[0] (fftw_plan*): The real-to-complex FFTW plan to use when
 *      computing the Fourier transform (passed as an input so that the
 *      FFTW can be FFTW_MEASUREd beforehand).
 * - data[1] (purify_image*): The image defining the size of the Fourier
 *      transform.
 *
 * \authors <a href="http://www.jasonmcewen.org">Jason McEwen</a>
 */
void purify_measurement_fft_real(void *out, void *in, 
				 void **data) {

  fftw_plan *plan;
  int iu, iv, ind, ind_half;
  int iu_neg, iv_neg, ind_neg;
  double complex *y, *y_half;
  purify_image *img;

  // Cast intput pointers.
  y = (double complex*)out;
  plan = (fftw_plan*)data[0];
  img = (purify_image*)data[1];

  // Allocate space for output of real-to-complex FFT before compute
  // full plane through conjugate symmetry.
  y_half = (complex double*)malloc(img->nx*img->ny*sizeof(complex double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(y_half);

  // Perform real-to-complex FFT.
  fftw_execute_dft_r2c(*plan, 
		       (double*)in, 
		       y_half);

  // Compute other half of complex plane through conjugate symmetry.
  for (iu = 0; iu < img->nx; iu++) {
    for (iv = 0; iv < img->ny/2+1; iv++) {

      ind_half = iu*(img->ny/2+1) + iv;
      purify_visibility_iuiv2ind(&ind, iu, iv, 
				 img->nx, img->ny);

      // Copy current data element.
      y[ind] = y_half[ind_half];

      // Compute reflected data through conjugate symmetry if
      // necessary.
      if (iu == 0 && iv == 0) {
	// Do nothing for DC component.
      } 
      else if (iu == 0) {
	// Reflect along line iu = 0.
	iv_neg = img->ny - iv;
	purify_visibility_iuiv2ind(&ind_neg, iu, iv_neg, 
				 img->nx, img->ny);
	if (ind != ind_neg) y[ind_neg] = conj(y_half[ind_half]);
      }
      else if (iv == 0) {
	// Reflect along line iu = 0.
	iu_neg = img->nx - iu;
	purify_visibility_iuiv2ind(&ind_neg, iu_neg, iv, 
				 img->nx, img->ny);
	if (ind != ind_neg) y[ind_neg] = conj(y_half[ind_half]);
      }
      else {
	// Reflect along diagonal.
	iv_neg = img->ny - iv;
	iu_neg = img->nx - iu;
	purify_visibility_iuiv2ind(&ind_neg, iu_neg, iv_neg, 
				 img->nx, img->ny);
	if (ind != ind_neg) y[ind_neg] = conj(y_half[ind_half]);
      }
    }
  }
  
  // Free temporary memory.
  free(y_half);

}
Exemplo n.º 26
0
static R_INLINE void hbhankelize_fft(double *F,
                                     const double *U, const double *V,
                                     const hbhankel_matrix* h) {
  R_len_t Nx = h->length.x, Ny = h->length.y;
  R_len_t Lx = h->window.x, Ly = h->window.y;
  R_len_t Kx = Nx - Lx + 1, Ky = Ny - Ly + 1;
  R_len_t i, j;
  R_len_t wx, dwx, wy, dwy;

  double *iU, *iV;
  fftw_complex *cU, *cV;

  /* Allocate needed memory */
  iU = (double*) fftw_malloc(Nx * Ny * sizeof(double));
  iV = (double*) fftw_malloc(Nx * Ny * sizeof(double));
  cU = (fftw_complex*) fftw_malloc(Ny*(Nx / 2 + 1) * sizeof(fftw_complex));
  cV = (fftw_complex*) fftw_malloc(Ny*(Nx / 2 + 1) * sizeof(fftw_complex));

  /* Fill the arrays */
  memset(iU, 0, Nx * Ny * sizeof(double));
  for (j = 0; j < Ly; ++j)
    for (i = 0; i < Lx; ++i)
      iU[i + j*Nx] = U[i + j*Lx];

  memset(iV, 0, Nx * Ny * sizeof(double));
  for (j = 0; j < Ky; ++j)
    for (i = 0; i < Kx; ++i)
      iV[i + j*Nx] = V[i + j*Kx];

  /* Compute the FFTs */
  fftw_execute_dft_r2c(h->r2c_plan, iU, cU);
  fftw_execute_dft_r2c(h->r2c_plan, iV, cV);

   /* Dot-multiply */
  for (i = 0; i < Ny * (Nx/2 + 1); ++i)
    cU[i] = cU[i] * cV[i];

  /* Compute the inverse FFT */
  fftw_execute_dft_c2r(h->c2r_plan, cU, iU);

  /* Form the result */
  for (j = 0, wy = 1, dwy = 1; j < Ny; ++j, wy += dwy) {
    if (j == Ly - 1)
      dwy--;
    if (j == Ky - 1) /* Do not join two ifs! */
      dwy--;

    for (i = 0, wx = 1, dwx = 1; i < Nx; ++i, wx += dwx) {
      if (i == Lx - 1)
        dwx--;
      if (i == Kx - 1)
        dwx--;
      F[i+j*Nx] = iU[i+j*Nx] / wx / wy / Nx / Ny;
    }
  }

  fftw_free(iU);
  fftw_free(iV);
  fftw_free(cU);
  fftw_free(cV);
}
Exemplo n.º 27
0
void FFTHandler::forward(double* inout){
	fftw_execute_dft_r2c(fftForwardPlan,inout, (fftw_complex*)inout);
}