Exemplo n.º 1
0
void
gmx_fft_destroy(gmx_fft_t    fft)
{
    int i,j;
    
    if(fft != NULL)
    {
        for(i=0;i<2;i++)
        {
            for(j=0;j<2;j++)
            {
                if(fft->single[i][j] != NULL)
                {
                    rfftw_destroy_plan(fft->single[i][j]);
                    fft->single[i][j] = NULL;
                }
                if(fft->multi[i][j] != NULL)
                {
                    rfftwnd_destroy_plan(fft->multi[i][j]);
                    fft->multi[i][j] = NULL;
                }
            }
        }
        free(fft);
    }
}
Exemplo n.º 2
0
/**
 * Destroys a previously created plan.
 * The CUDA destructor returns a result code, while the fftw2 destructor is
 * a void function. For now, the result code in the CUDA destructor is
 * ignored.
 */
void sararfftnd_destroy_plan( sararfftnd_plan plan ) {
#ifdef USE_GPUS
  cufftDestroy( plan );
#else // #ifndef USE_GPUS
  rfftwnd_destroy_plan( plan );
#endif
}
Exemplo n.º 3
0
void destroy_maxwell_data(maxwell_data *d)
{
     if (d) {
	  int i;

	  for (i = 0; i < d->nplans; ++i) {
#if defined(HAVE_FFTW3)
	       FFTW(destroy_plan)((fftplan) (d->plans[i]));
	       FFTW(destroy_plan)((fftplan) (d->iplans[i]));
#elif defined(HAVE_FFTW)
#  ifdef HAVE_MPI
#    ifdef SCALAR_COMPLEX
	       fftwnd_mpi_destroy_plan((fftplan) (d->plans[i]));
	       fftwnd_mpi_destroy_plan((fftplan) (d->iplans[i]));
#    else /* not SCALAR_COMPLEX */
	       rfftwnd_mpi_destroy_plan((fftplan) (d->plans[i]));
	       rfftwnd_mpi_destroy_plan((fftplan) (d->iplans[i]));
#    endif /* not SCALAR_COMPLEX */
#  else /* not HAVE_MPI */
#    ifdef SCALAR_COMPLEX
	       fftwnd_destroy_plan((fftplan) (d->plans[i]));
	       fftwnd_destroy_plan((fftplan) (d->iplans[i]));
#    else /* not SCALAR_COMPLEX */
	       rfftwnd_destroy_plan((fftplan) (d->plans[i]));
	       rfftwnd_destroy_plan((fftplan) (d->iplans[i]));
#    endif /* not SCALAR_COMPLEX */
#  endif /* not HAVE_MPI */
#endif /* HAVE FFTW */
	  }

	  free(d->eps_inv);
#if defined(HAVE_FFTW3)
	  FFTW(free)(d->fft_data);
	  if (d->fft_data2 != d->fft_data)
	       FFTW(free)(d->fft_data2);
#else
	  free(d->fft_data);
#endif
	  free(d->k_plus_G);
	  free(d->k_plus_G_normsqr);

	  free(d);
     }
}
Exemplo n.º 4
0
/*
 * Class:     jfftw_real_nd_Plan
 * Method:    destroyPlan
 * Signature: ()V
 */
JNIEXPORT void JNICALL Java_jfftw_real_nd_Plan_destroyPlan( JNIEnv* env, jobject obj )
{
	jclass clazz = (*env)->GetObjectClass( env, obj );
	jfieldID id = (*env)->GetFieldID( env, clazz, "plan", "[B" );
	jbyteArray arr = (jbyteArray)(*env)->GetObjectField( env, obj, id );
	unsigned char* carr = (*env)->GetByteArrayElements( env, arr, 0 );

	rfftwnd_destroy_plan( *(rfftwnd_plan*)carr );

	(*env)->ReleaseByteArrayElements( env, arr, carr, 0 );
	(*env)->SetObjectField( env, obj, id, NULL );
}
Exemplo n.º 5
0
void rfftwnd_mpi_destroy_plan(rfftwnd_mpi_plan p)
{
    if (p) {
	if (p->p_fft_x)
	    fftw_destroy_plan(p->p_fft_x);
	if (p->p_fft)
	    rfftwnd_destroy_plan(p->p_fft);
	if (p->p_transpose)
	    transpose_mpi_destroy_plan(p->p_transpose);
	if (p->p_transpose_inv)
	    transpose_mpi_destroy_plan(p->p_transpose_inv);
	if (p->work)
	     fftw_free(p->work);
	fftw_free(p);
    }
}
Exemplo n.º 6
0
/* Call rfftw for a 1 band real image.
 */
static int 
rfwfft1( IMAGE *dummy, IMAGE *in, IMAGE *out )
{
	const int size = in->Xsize * in->Ysize;
	const int half_width = in->Xsize / 2 + 1;

	/* Pack to double real here.
	 */
	IMAGE *real = im_open_local( dummy, "fwfft1:1", "t" );

	/* Transform to halfcomplex here.
	 */
	double *half_complex = IM_ARRAY( dummy, 
		in->Ysize * half_width * 2, double );

	rfftwnd_plan plan;
	double *buf, *q, *p;
	int x, y;

	if( !real || !half_complex || im_pincheck( in ) || im_outcheck( out ) )
		return( -1 );
	if( in->Coding != IM_CODING_NONE || in->Bands != 1 ) {
                im_error( "im_fwfft", _( "one band uncoded only" ) );
                return( -1 );
	}
	if( im_clip2d( in, real ) )
                return( -1 );

	/* Make the plan for the transform. Yes, they really do use nx for
	 * height and ny for width.
	 */
	if( !(plan = rfftw2d_create_plan( in->Ysize, in->Xsize,
		FFTW_FORWARD, FFTW_MEASURE | FFTW_USE_WISDOM )) ) {
                im_error( "im_fwfft", _( "unable to create transform plan" ) );
		return( -1 );
	}

	rfftwnd_one_real_to_complex( plan, 
		(fftw_real *) real->data, (fftw_complex *) half_complex );

	rfftwnd_destroy_plan( plan );

	/* WIO to out.
	 */
        if( im_cp_desc( out, in ) )
                return( -1 );
	out->Bbits = IM_BBITS_DPCOMPLEX;
	out->BandFmt = IM_BANDFMT_DPCOMPLEX;
        if( im_setupout( out ) )
                return( -1 );
	if( !(buf = (double *) IM_ARRAY( dummy, 
		IM_IMAGE_SIZEOF_LINE( out ), PEL )) )
		return( -1 );

	/* Copy to out and normalise. The right half is the up/down and 
	 * left/right flip of the left, but conjugated. Do the first 
	 * row separately, then mirror around the centre row.
	 */
	p = half_complex;
	q = buf;

	for( x = 0; x < half_width; x++ ) {
		q[0] = p[0] / size;
		q[1] = p[1] / size;
		p += 2;
		q += 2;
	}

	p = half_complex + ((in->Xsize + 1) / 2 - 1) * 2; 

	for( x = half_width; x < out->Xsize; x++ ) {
		q[0] = p[0] / size;
		q[1] = -1.0 * p[1] / size;
		p -= 2;
		q += 2;
	}

	if( im_writeline( 0, out, (PEL *) buf ) )
		return( -1 );

	for( y = 1; y < out->Ysize; y++ ) {
		p = half_complex + y * half_width * 2; 
		q = buf;

		for( x = 0; x < half_width; x++ ) {
			q[0] = p[0] / size;
			q[1] = p[1] / size;
			p += 2;
			q += 2;
		}

		/* Good grief. 
		 */
		p = half_complex + 2 *
			((out->Ysize - y + 1) * half_width - 2 + 
				(in->Xsize & 1));

		for( x = half_width; x < out->Xsize; x++ ) {
			q[0] = p[0] / size;
			q[1] = -1.0 * p[1] / size;
			p -= 2;
			q += 2;
		}

		if( im_writeline( y, out, (PEL *) buf ) )
			return( -1 );
	}

	return( 0 );
}
Exemplo n.º 7
0
void F77_FUNC_(rfftwnd_f77_destroy_plan,RFFTWND_F77_DESTROY_PLAN)
(fftwnd_plan *p)
{
     rfftwnd_destroy_plan(*p);
}
Exemplo n.º 8
0
/*
 * Create an fftwnd_plan specialized for specific arrays.  (These
 * arrays are ignored, however, if they are NULL or if the flags
 * do not include FFTW_MEASURE.)  The main advantage of being
 * provided arrays like this is that we can do runtime timing
 * measurements of our options, without worrying about allocating
 * excessive scratch space. 
 */
fftwnd_plan rfftwnd_create_plan_specific(int rank, const int *n,
					 fftw_direction dir, int flags,
					 fftw_real *in, int istride,
					 fftw_real *out, int ostride)
{
     fftwnd_plan p;
     int i;
     int rflags = flags & ~FFTW_IN_PLACE;
     /* note that we always do rfftw transforms out-of-place in rexec2.c */

     if (flags & FFTW_IN_PLACE) {
	  out = NULL;
	  ostride = istride;
     }
     istride = ostride = 1;	/* 
				 * strides don't work yet, since it is not 
				 * clear whether they apply to real 
				 * or complex data 
				 */

     if (!(p = fftwnd_create_plan_aux(rank, n, dir, flags)))
	  return 0;

     for (i = 0; i < rank - 1; ++i)
	  p->n_after[i] = (n[rank - 1]/2 + 1) * (p->n_after[i] / n[rank - 1]);
     if (rank > 0)
	  p->n[rank - 1] = n[rank - 1] / 2 + 1;

     p->plans = fftwnd_new_plan_array(rank);
     if (rank > 0 && !p->plans) {
	  rfftwnd_destroy_plan(p);
	  return 0;
     }
     if (rank > 0) {
	  p->plans[rank - 1] = rfftw_create_plan(n[rank - 1], dir, rflags);
	  if (!p->plans[rank - 1]) {
	       rfftwnd_destroy_plan(p);
	       return 0;
	  }
     }
     if (rank > 1) {
	  if (!(flags & FFTW_MEASURE) || in == 0
	      || (!p->is_in_place && out == 0)) {
	       if (!fftwnd_create_plans_generic(p->plans, rank - 1, n,
					   dir, flags | FFTW_IN_PLACE)) {
		    rfftwnd_destroy_plan(p);
		    return 0;
	       }
	  } else if (dir == FFTW_COMPLEX_TO_REAL || (flags & FFTW_IN_PLACE)) {
	       if (!fftwnd_create_plans_specific(p->plans, rank - 1, n,
						 p->n_after,
					      dir, flags | FFTW_IN_PLACE,
						 (fftw_complex *) in,
						 istride,
						 0, 0)) {
		    rfftwnd_destroy_plan(p);
		    return 0;
	       }
	  } else {
	       if (!fftwnd_create_plans_specific(p->plans, rank - 1, n,
						 p->n_after,
					      dir, flags | FFTW_IN_PLACE,
						 (fftw_complex *) out,
						 ostride,
						 0, 0)) {
		    rfftwnd_destroy_plan(p);
		    return 0;
	       }
	  }
     }
     p->nbuffers = 0;
     p->nwork = fftwnd_work_size(rank, p->n, flags | FFTW_IN_PLACE,
				 p->nbuffers + 1);
     if (p->nwork && !(flags & FFTW_THREADSAFE)) {
	  p->work = (fftw_complex *) fftw_malloc(p->nwork
						 * sizeof(fftw_complex));
	  if (!p->work) {
	       rfftwnd_destroy_plan(p);
	       return 0;
	  }
     }
     return p;
}
Exemplo n.º 9
0
int main() {
    omp_set_num_threads(numCores); // Set the number of threads for OpenMP parallel sections
    fftw_threads_init(); // Initialize threaded FFTs
    rfftwnd_plan dp_c2r; // Inverse FFT plan
    rfftwnd_plan dp_r2c; // Forward FFT plan
    // Create the plans using FFTW_MEASURE to get fastest transforms, do this here so
    // that it is only done once and the plans reused.
    
    std::cout << "Creating FFTW plans...\n";
    dp_c2r = rfftw3d_create_plan(N, N, N, FFTW_COMPLEX_TO_REAL, FFTW_MEASURE);
    dp_r2c = rfftw3d_create_plan(N, N, N, FFTW_REAL_TO_COMPLEX, FFTW_MEASURE);
    
    double *kvec = new double[N];
    fftfreq(kvec);
    
    std::ofstream fout;
    std::ofstream tout;
    std::ifstream fin;
    
    fout.open("GalaxyNum.dat",std::ios::out);
    fout.close();
    
    std::vector< Pk > InputPower;
    int numKModes = 0;
    
    std::cout << "Reading input power file: " << CAMBfile << "\n";
    fin.open(CAMBfile.c_str(),std::ios::in);
    while (!fin.eof()) {
        Pk Input_temp;
        fin >> Input_temp.k >> Input_temp.P;
        
        if (!fin.eof()) {
            InputPower.push_back(Input_temp);
            ++numKModes;
        }
    }
    fin.close();
    
    double *kvals = new double[numKModes];
    double *InPow = new double[numKModes];
    
    for (int i = 0; i < numKModes; ++i) {
        kvals[i] = InputPower[i].k;
        InPow[i] = InputPower[i].P;
    }
    
    gsl_spline *Power = gsl_spline_alloc(gsl_interp_cspline, numKModes);
    gsl_interp_accel *acc = gsl_interp_accel_alloc();
    
    gsl_spline_init(Power, kvals, InPow, numKModes);
    
    fftw_complex *deltak3di = new fftw_complex[N_im];
    fftw_real *deltar3di = new fftw_real[N_tot];
    
#pragma omp parallel for
    for (int i = 0; i < N_tot; ++i) {
        deltar3di[i] = 0.0;
        if (i < N_im) {
            deltak3di[i].re = 0.0;
            deltak3di[i].im = 0.0;
        }
    }
    
    std::cout << "Distributing power over volume...\n";
    Gendk(kvec, Power, acc, deltak3di); // Call function to populate the power grid
    
    std::cout << "Performing initial one-time inverse FFT...\n";
    rfftwnd_threads_one_complex_to_real(numCores,dp_c2r,deltak3di,deltar3di); // FFT
    
    std::cout << "Taking the natural log...\n";
#pragma omp parallel for
    for (int i = 0; i < N_tot; ++i) {
        deltar3di[i] = log(1.0 + deltar3di[i]);
        if (i < N_im) {
            deltak3di[i].re = 0.0;
            deltak3di[i].im = 0.0;
        }
    }
    
    std::cout << "Performing initial one-time forward FFT...\n";
    rfftwnd_threads_one_real_to_complex(numCores,dp_r2c,deltar3di,deltak3di);
        
    std::cout << "Normalizing...\n";
#pragma omp parallel for
    for (int i = 0; i < N_im; ++i) {
        deltak3di[i].re /= N_tot;
        deltak3di[i].im /= N_tot;
    }
    
    delete[] deltar3di;
    
    tout.open("Timings.dat",std::ios::out);
    std::cout << "Starting to generate mocks...\n";
    for (int mock = startNum-1; mock < numMocks; ++mock) {
        double start_time = omp_get_wtime();
        std::string lrgfile = filename(base, mock+1, ext);
        std::cout << "Generating mock " << lrgfile << "\n";
        
        fftw_complex *deltak3d = new fftw_complex[N_im];
        fftw_real *deltar3d = new fftw_real[N_tot];
        
        // Initialize power array. Do it in parallel to speed things up.        
#pragma omp parallel for
        for (int i = 0; i < N_tot; ++i) {
            deltar3d[i] = 0.0;
            if (i < N_im) {
                deltak3d[i].re = 0.0;
                deltak3d[i].im = 0.0;
            }
        }
        
        std::cout << "    Setting up for the inverse FFT...\n";
        Sampdk(kvec, deltak3di, deltak3d);
        
        if (powOut) {
            std::cout << "    Outputting raw power array...\n";
            std::string powerfile = filename(powbase, mock+1, extbin);
            fout.open(powerfile.c_str(),std::ios::out|std::ios::binary);
            fout.write((char *) deltak3d, N_im*sizeof(fftw_complex));
            fout.close();
        }
        
        std::cout << "    Performing second inverse FFT...\n";
        rfftwnd_threads_one_complex_to_real(numCores,dp_c2r,deltak3d,deltar3d);
        
        if (matOut) {
            std::cout << "    Outputting matter field array...\n";
            std::string matterfile = filename(matbase, mock+1, extbin);
            fout.open(matterfile.c_str(),std::ios::out|std::ios::binary);
            fout.write((char *) deltar3d, N_tot*sizeof(fftw_real));
            fout.close();
        }
        
        double mean = 0.0;
        double variance = 0.0;
        double dr_max = 0.0;
        double dr_min = 0.0;
        
        for (int i = 0; i < N_tot; ++i) {
            mean += deltar3d[i]/N_tot;
            if (deltar3d[i] > dr_max) dr_max = deltar3d[i];
            if (deltar3d[i] < dr_min) dr_min = deltar3d[i];
        }
        std::cout << "    Max  = " << dr_max << "\n";
        std::cout << "    Min  = " << dr_min << "\n";
        std::cout << "    Mean = " << mean << "\n";
        
        std::cout << "    Calculating variance...\n";
        for (int i = 0; i < N_tot; ++i) {
            deltar3d[i] -= mean;
            variance += (deltar3d[i])*(deltar3d[i])/(N_tot-1);
        }
        
        std::cout << "    Poisson sampling...\n";
        Gendr(lrgfile, variance, deltar3d);
        
        delete[] deltak3d;
        delete[] deltar3d;
        
        double totaltime = omp_get_wtime()-start_time;
        std::cout << "    Time to generate mock: " << totaltime << " seconds\n";
        tout << lrgfile << " " << totaltime << "\n";
    }
    tout.close();
    
    delete[] kvec;
    delete[] deltak3di;
    delete[] kvals;
    delete[] InPow;
    
    rfftwnd_destroy_plan(dp_r2c);
    rfftwnd_destroy_plan(dp_c2r);
    
    gsl_spline_free(Power);
    gsl_interp_accel_free(acc);
    
    return 0;
}
Exemplo n.º 10
0
main (int argc, char *argv[])
{
  struct em_file inputdata1;
  struct em_file inputdata2;
  struct em_file inputdata3;
  struct em_file inputdata4;
  struct em_file outputdata;

  fftw_real *Vol_tmpl_sort, *Volume, *e3, *PointCorr, *sqconv;
  fftw_complex *C3, *PointVolume, *PointSq;
  rfftwnd_plan p3, pi3, r3, ri3;
  fftw_real scale;

  struct tm *zeit;
  struct tm start;
  char name[200];
  int Rx_max, Ry_max, Rz_max;
  int Rx_min, Ry_min, Rz_min;
  int Vx_min, Vy_min, Vz_min;
  int Vx_max, Vy_max, Vz_max;
  float Phi, Psi, Theta, winkel_lauf;
  float *Rot_tmpl, *Vol_tmpl;
  int i, j, k, tmpx, tmpy, tmpz,lauf_pe, ksub;
  int ijk;
  int lauf, n;
  float max, eps;
  time_t lt;
  float Ctmp, Ctmpim, Dtmp, Dtmpim;
  int dim_fft;
  int sub[3],range[3],range_sub[3],subc[3],offset[3],dimarray[3];
  int FullVolume_dims[3];
  int nr[3];
  int area[3];
  
/* MPI Variablen */
  int winkel_max, winkel_min;
  int winkel_max_pe, winkel_min_pe;
  int winkel_step_pe;
  int Phi_max, Psi_max, Theta_max;
  int Phi_min, Psi_min, Theta_min;
  int Phi_step, Psi_step, Theta_step;
  int Theta_winkel_start, Psi_winkel_start, Phi_winkel_start;
  int Theta_winkel_nr, Psi_winkel_nr, Phi_winkel_nr;
  int Theta_winkel_end, Psi_winkel_end, Phi_winkel_end;
  int Theta_steps, Psi_steps, Phi_steps;
  float Theta_winkel_rest_nr, Psi_winkel_rest_nr, Phi_winkel_rest_nr;
  int in_max;
  float rms_wedge, tempccf;
  float *Ergebnis, *conv;
  float cycles;
  int cycle;
  
/* MPI Variablen Ende*/

  if (argc < 15)
    {
      printf ("\n\n");
      printf (" 'OSCAR' is an Optimized SCanning AlgoRithm for \n");
      printf (" local correlation.\n");
      printf (" All files in EM-V-int4 format !!!\n\n");
      printf (" Input: Volume to be searched, Template mask for local \n ");
      printf ("   correlation, pointspread function and angular search \n");
      printf ("   range. \n");
      printf (" Output: locally normalized X-Correlation Function Out.ccf.norm, \n");
      printf ("   non-normalized X-Correlation Function Out.ccf, and Out.ang \n");
      printf ("   with the corresponding angles.\n\n");
      printf (" usage: oscar Volume Template Out ...\n");
      printf ("         ... Phi_min Phi_max Phi_step Psi_min Psi_max Psi_step The_min The_max The_step\n");
      printf ("    ... Poinspread-function mask-file dim_of_fft\n\n");
      printf (" with Message Passing Interface (MPI)\n");
      printf (" the total number of angles must be modulo\n");
      printf (" of used processors!\n\n");
      printf (" Linux:   	1.'lamboot' to start MPI\n");
      printf ("		2.'mpirun -np 2 oscar Volume Templ Out 30 180 30 30 180 30 30 180 30 Poinspread-function mask-file 256'\n\n");
      printf (" In this version asymmetric masks can be used ! \n");
      printf (" last revision  ,  11.11.03, Friedrich Foerster");
      printf (" \n\n");
      exit (1);
    }

  MPI_Init (&argc, &argv);
  MPI_Comm_size (MPI_COMM_WORLD, &mysize);
  MPI_Comm_rank (MPI_COMM_WORLD, &myrank);
  /* Dimensionen auslesen */
  // Dimension of fft
  dim_fft = atoi (argv[15]);
  nr[0]=1;
  nr[1]=1;
  nr[2]=1;
  area[0]=dim_fft;
  area[1]=dim_fft;
  area[2]=dim_fft;
  read_em_header(argv[1], &inputdata1); /* Searchvolume */
  read_em (argv[2], &inputdata2); /* Template */
  FullVolume_dims[0]=inputdata1.dims[0];
  FullVolume_dims[1]=inputdata1.dims[1];
  FullVolume_dims[2]=inputdata1.dims[2];
  Rx_min = 1;
  Ry_min = 1;
  Rz_min = 1;
  Rx_max = (inputdata2.dims[0]);
  Ry_max = (inputdata2.dims[1]);
  Rz_max = (inputdata2.dims[2]);
  Vx_min = 1;
  Vy_min = 1;
  Vz_min = 1;
  Vx_max = dim_fft;
  Vy_max = dim_fft;
  Vz_max = dim_fft;
  p3 = rfftw3d_create_plan (Vx_max, Vy_max, Vz_max, FFTW_REAL_TO_COMPLEX,
			    FFTW_MEASURE | FFTW_IN_PLACE);	/*FFTW_ESTIMATE FFTW_MEASURE */
  pi3 = rfftw3d_create_plan (Vx_max, Vy_max, Vz_max, FFTW_COMPLEX_TO_REAL,
			     FFTW_MEASURE | FFTW_IN_PLACE);
  r3 = rfftw3d_create_plan (Rx_max, Rx_max, Rx_max, FFTW_REAL_TO_COMPLEX,
  		    FFTW_MEASURE | FFTW_IN_PLACE);	/*FFTW_ESTIMATE FFTW_MEASURE */
  ri3 = rfftw3d_create_plan (Rx_max, Rx_max, Rx_max, FFTW_COMPLEX_TO_REAL,
  		     FFTW_MEASURE | FFTW_IN_PLACE);
  if (myrank == 0)
    {
      printf("Plans for FFTW created \n");fflush(stdout);
    }
  Volume = (fftw_real *) calloc (Vx_max * Vx_max * 2 * (Vx_max / 2 + 1),sizeof (fftw_real) );
  Rot_tmpl = (float *) malloc (sizeof (float) * Rx_max * Ry_max * Rz_max);
  Vol_tmpl = (float *) malloc (sizeof (float) * Vx_max * Vy_max * Vz_max);
  conv = (float *) malloc (sizeof (float) * Vx_max * Vy_max * Vz_max);
  sqconv = (fftw_real *) calloc(Vz_max * Vy_max * 2 * (Vx_max / 2 + 1), sizeof (fftw_real));
  if (!
      (inputdata1.floatdata =
       (float *) malloc (sizeof (float) * Vx_max * Vy_max * Vz_max)))
    {
      printf ("Memory allocation  failure in inputdata1.floatdata!!!");
      fflush (stdout);
      exit (1);
    }
  if (!
      (outputdata.floatdata =
       (float *) malloc (sizeof (float) * Vx_max * Vy_max * Vz_max)))
    {
      printf ("Memory allocation  failure in outputdata.floatdata!!!");
      fflush (stdout);
      exit (1);
    }
  
  if (!
      (Vol_tmpl_sort = (fftw_real *) calloc (Vz_max*Vy_max*2*(Vx_max / 2 + 1), sizeof (fftw_real) )))
    {
      printf ("Memory allocation  failure in Volume_tmpl_sort!!!");
      printf ("Nx = %i, Ny = %i, Nz = %i, bytes = %i \n",2 *(Vx_max / 2 + 1),Vy_max, Vz_max, sizeof (fftw_real));
      fflush (stdout);
      exit (1);
    }
  Ergebnis = (float *) calloc (Vz_max * Vy_max * Vx_max, sizeof (float));
   /* Winkelraum */
  Phi_min = atof (argv[4]);
  Phi_max = atof (argv[5]);
  Phi_step = atof (argv[6]);
  Psi_min = atof (argv[7]);
  Psi_max = atof (argv[8]);
  Psi_step = atof (argv[9]);
  Theta_min = atof (argv[10]);
  Theta_max = atof (argv[11]);
  Theta_step = atof (argv[12]);
  /* Pointspread Function*/
  read_em (argv[13], &inputdata3);
  /* mask function */
  read_em (argv[14], &inputdata4);
  Phi_steps = (Phi_max - Phi_min) / Phi_step + 1;
  Psi_steps = (Psi_max - Psi_min) / Psi_step + 1;
  Theta_steps = (Theta_max - Theta_min) / Theta_step + 1;
  winkel_max = Phi_steps * Psi_steps * Theta_steps;
  winkel_min = 0;
  range[0]=dim_fft-1;
  range[1]=dim_fft-1;
  range[2]=dim_fft-1;
  range_sub[0]=range[0]-Rx_max;
  range_sub[1]=range[1]-Rx_max;
  range_sub[2]=range[2]-Rx_max;
  sub[0]=1;
  sub[1]=1;
  sub[2]=1;
  cycles=(int)(FullVolume_dims[2]/(dim_fft-Rx_max)+0.5);
  cycles=(int)(FullVolume_dims[1]/(dim_fft-Rx_max)+0.5)*cycles;
  cycles=(int)(FullVolume_dims[0]/(dim_fft-Rx_max)+0.5)*cycles;
  cycle=0;
  if (myrank == 0)
    {
      printf ("\n oscar starts to run ... ");tack (&start);fflush (stdout);
      /* prepare Output */
      strcpy (name, argv[3]);
      strcat (name, ".ccf");
      printf ("\nCreate outputfile: %s ... \n", name);fflush(stdout);
      create_em (name, FullVolume_dims);
      strcpy (name, argv[3]);
      strcat (name, ".ang");
      printf ("Create outputfile: %s ... \n", name);fflush(stdout);
      create_em (name, FullVolume_dims);
      strcpy (name, argv[3]);
      strcat (name, ".ccf.norm");
      printf ("Create outputfile: %s ... \n", name);fflush(stdout);
      create_em (name, FullVolume_dims);
    }
  for (sub[2]=1; sub[2] < FullVolume_dims[2]-Rz_max;sub[2]=sub[2]+dim_fft-Rz_max)
    {		  
    if (myrank == 0)
	{
	tack (&start);
	printf ("%f%%..", (float) (cycle / cycles * 100));
	fflush (stdout);
	}

      for (sub[1]=1; sub[1] < FullVolume_dims[1]-Ry_max;sub[1]=sub[1]+dim_fft-Ry_max)
        {
	  for (sub[0]=1; sub[0] < FullVolume_dims[0]-Rx_max;sub[0]=sub[0]+dim_fft-Rx_max)
	    {
	      cycle=cycle+1;
	      subc[0]=sub[0];
	      subc[1]=sub[1];
	      subc[2]=sub[2]; 
	      if (sub[2] + range[2] > FullVolume_dims[2]) subc[2]=FullVolume_dims[2]-range[2];  /* we are at the corner ?!*/
	      if (sub[1] + range[1] > FullVolume_dims[1]) subc[1]=FullVolume_dims[1]-range[1];  /* we are at the corner ?!*/
	      if (sub[0] + range[0] > FullVolume_dims[0]) subc[0]=FullVolume_dims[0]-range[0];  /* we are at the corner ?!*/
	      read_em_subregion (argv[1], &inputdata1,subc,range);
	      read_em_subregion (argv[1], &outputdata,subc,range);
	      /* Umsortieren der Daten */
	      lauf = 0;
	      for (k = 0; k < Vz_max; k++)
		{
		  for (j = 0; j < Vy_max; j++)
		    {
		      for (i = 0; i < Vx_max; i++)
			{
			  /* square - needed for normalization */
			  sqconv[i + 2 * (Vx_max / 2 + 1) * (j + Vy_max * k)] = inputdata1.floatdata[lauf]*inputdata1.floatdata[lauf];
			  Volume[i + 2 * (Vx_max / 2 + 1) * (j + Vy_max * k)] = inputdata1.floatdata[lauf];
			  inputdata1.floatdata[lauf] = -1.0; /* kleine Zahl wg Max-Op , hier kommen die CCFs rein*/
			  outputdata.floatdata[lauf] = -1.0; /* hier kommen die Winkel rein*/
			  lauf++;
			}
		    }
		}
	      rfftwnd_one_real_to_complex (p3, &Volume[0], NULL); /* einmalige fft von Suchvolumen */
	      rfftwnd_one_real_to_complex (p3, &sqconv[0], NULL); /* FFT of square*/
	      winkel_step_pe = (int) winkel_max / mysize;
	      winkel_min_pe = myrank * winkel_step_pe;
	      winkel_max_pe = winkel_min_pe + winkel_step_pe;
	      Theta_winkel_nr = (int) winkel_min_pe / (Psi_steps * Phi_steps);
	      Theta_winkel_rest_nr = winkel_min_pe - Theta_winkel_nr * (Psi_steps * Phi_steps);
	      Psi_winkel_nr = (int) Theta_winkel_rest_nr / (Phi_steps);
	      Psi_winkel_rest_nr = Theta_winkel_rest_nr - Psi_winkel_nr * (Phi_steps);
	      Phi_winkel_nr = (int) Psi_winkel_rest_nr;
	      Theta = Theta_winkel_nr * Theta_step + Theta_min;
	      Phi = Phi_winkel_nr * Phi_step + Phi_min - Phi_step;
	      Psi = Psi_winkel_nr * Psi_step + Psi_min;
	      eps = 0.001;
	      n = 0;
	      //Friedrich -> Zaehlung der voxels 
	      n = countvoxel(inputdata4.dims[0], inputdata4.floatdata, eps);
	      eps = 0.001;
	      for (winkel_lauf = winkel_min_pe; winkel_lauf < winkel_max_pe;winkel_lauf++)
		{
		  if (Phi < Phi_max)
		    Phi = Phi + Phi_step;
		  else
		    {
		      Phi = Phi_min;
		      Psi = Psi + Psi_step;
		    }
		  if (Psi > Psi_max)
		    {
		      Psi = Psi_min;
		      Theta = Theta + Theta_step;
		    }
		  tom_rotate3d (&Rot_tmpl[0], &inputdata2.floatdata[0], Phi, Psi, Theta, Rx_max, Ry_max, Rz_max);
		  /*calculate Ref variance */
		  rms_wedge = energizer (Rx_min, Rx_max, n, &Rot_tmpl[0], &inputdata3.floatdata[0], &inputdata4.floatdata[0], r3, ri3); 
		  pastes (&Rot_tmpl[0], &Vol_tmpl[0], 1, 1, 1, Rx_max, Ry_max, Rz_max, Vx_max);
		  scale = 1.0 / ((double)Vx_max * (double)Vy_max * (double)Vz_max * ((double) rms_wedge) );
		  //printf("hippo1: scale = %.10f \n",scale);
		  sort4fftw(&Vol_tmpl_sort[0],&Vol_tmpl[0],Vx_max, Vy_max, Vz_max);
		  rfftwnd_one_real_to_complex (p3, &Vol_tmpl_sort[0], NULL);
		  PointVolume = (fftw_complex *) & Volume[0];
		  C3 = (fftw_complex *) & Vol_tmpl_sort[0];
		  /* Correlation */
		  correl(&PointVolume[0], &C3[0], Vx_max, Vy_max, Vz_max, scale);
		  /* back to real space */
		  rfftwnd_one_complex_to_real (pi3, &C3[0], NULL);
		  PointCorr = (fftw_real *) & C3[0];
		  /* Umsortieren der Daten */
		  sortback4fftw( &PointCorr[0], &Ergebnis[0], Vx_max, Vy_max, Vz_max);
		  // crossen 
		  cross(&Ergebnis[0], Vx_max);
		  /* 3rd: divide */
		  lauf = 0;
		  for (k = 0 ; k < Vz_max  ; k++)
		    {
		      for (j = 0; j < Vy_max; j++)
			{
			  for (i = 0; i < Vx_max; i++)
			    {
			      if (inputdata1.floatdata[lauf] < Ergebnis[lauf] )
				{
				  inputdata1.floatdata[lauf] = Ergebnis[lauf];
				  outputdata.floatdata[lauf] = (int) winkel_lauf;
				}
			      lauf++;
			    }
			}
		    }
		}				/* Ende winkel_lauf */
	      //FF
	      MPI_Barrier (MPI_COMM_WORLD);
	      /* Ergebnisse einsammeln (myrank 0)*/
	      if (myrank == 0)
		{
		  for (lauf_pe = 1; lauf_pe < mysize; lauf_pe++)
		    {
		      MPI_Recv (&Ergebnis[0], Vx_max * Vy_max * Vz_max, MPI_FLOAT, lauf_pe,
				99, MPI_COMM_WORLD, &status);
		      MPI_Recv (&conv[0], Vx_max * Vy_max * Vz_max, MPI_FLOAT,
				lauf_pe, 98, MPI_COMM_WORLD, &status);
		      /* use conv as temporary memory for angles  */
		      for (lauf = 0; lauf < Vx_max * Vy_max * Vz_max; lauf++)
			{
			  if (inputdata1.floatdata[lauf] < Ergebnis[lauf])
			    {
			      inputdata1.floatdata[lauf] = Ergebnis[lauf];
			      outputdata.floatdata[lauf] = conv[lauf];
			    }
			}
		    }
		  /*Ergebnisse eingesammelt */
		  
		}
	      // myrank > 0: Ergebnisse senden
	      else
		{
		  MPI_Send (inputdata1.floatdata, Vx_max * Vy_max * Vz_max, MPI_FLOAT, 0,
			    99, MPI_COMM_WORLD);
		  MPI_Send (outputdata.floatdata, Vx_max * Vy_max * Vz_max, MPI_FLOAT, 0,
			    98, MPI_COMM_WORLD);
		}
	      MPI_Barrier (MPI_COMM_WORLD);
	      // nicht normalisiertes Volumen und Winkel rausschreiben
	      subc[0]=subc[0]+Rx_max/2;
	      subc[1]=subc[1]+Rx_max/2;
	      subc[2]=subc[2]+Rx_max/2;
	      if (myrank==0)
		{
		  offset[0]=Rx_max/2;
		  offset[1]=Rx_max/2;
		  offset[2]=Rx_max/2;
		  dimarray[0]=dim_fft;
		  dimarray[1]=dim_fft;
		  dimarray[2]=dim_fft;
		  strcpy (name, argv[3]);
		  strcat (name, ".ccf");
		  write_em_subsubregion (name, &inputdata1,subc,range_sub,offset,dimarray); 
		  strcpy (name, argv[3]);
		  strcat (name, ".ang");
		  write_em_subsubregion (name, &outputdata,subc,range_sub,offset,dimarray);
		  /* ------------------- normalization - here only PE 0 ---------- */
		  pastes (&inputdata4.floatdata[0], &Vol_tmpl[0], 1, 1, 1, Rx_max, Ry_max, Rz_max, Vx_max); /* paste mask into zero volume*/
		  /* 1st local mean */
		  sort4fftw(&Vol_tmpl_sort[0], &Vol_tmpl[0], Vx_max, Vy_max, Vz_max);
		  rfftwnd_one_real_to_complex (p3, &Vol_tmpl_sort[0], NULL);
		  C3 = (fftw_complex *) & Vol_tmpl_sort[0];
		  /* Convolution of volume and mask */
		  scale = 1.0 / ((double)Vx_max * (double)Vy_max * (double)Vz_max );
		  convolve( &PointVolume[0], &C3[0], Vx_max, Vy_max, Vz_max, scale);
		  rfftwnd_one_complex_to_real (pi3, &C3[0], NULL);
		  PointCorr = (fftw_real *) & C3[0];
		  /* Umsortieren der Daten */
		  sortback4fftw( &PointCorr[0], &conv[0], Vx_max, Vy_max, Vz_max);
		  /* 2nd : convolution of square and resorting*/
		  pastes (&inputdata4.floatdata[0], &Vol_tmpl[0], 1, 1, 1, Rx_max, Ry_max, Rz_max, Vx_max); /* paste mask into zero volume*/
		  sort4fftw( &Vol_tmpl_sort[0], &Vol_tmpl[0], Vx_max, Vy_max, Vz_max);
		  rfftwnd_one_real_to_complex (p3, &Vol_tmpl_sort[0], NULL);
		  C3 = (fftw_complex *) & Vol_tmpl_sort[0];
		  PointSq = (fftw_complex *) & sqconv[0];// set pointer to FFT of square
		  convolve( &PointSq[0], &C3[0], Vx_max, Vy_max, Vz_max, scale);
		  rfftwnd_one_complex_to_real (pi3, &C3[0], NULL);
		  PointCorr = (fftw_real *) &C3[0];
		  //FF
		  lauf = 0;
		  for (k = 0; k < Vz_max; k++)
		    {
		      for (j = 0; j < Vy_max; j++)
			{
			  for (i = 0; i < Vx_max; i++)
			    {
			      conv[lauf] = sqrt(PointCorr[i + 2 * (Vx_max / 2 + 1) * (j + Vy_max * k)] - conv[lauf]*conv[lauf]/((float) n) ) ;/*local variance*/
			      lauf++;
			    }
			}
		    }
		  cross(&conv[0], Vx_max);
		  /* perform division */
		  for (lauf = 0; k < Vz_max*Vy_max*Vz_max; lauf++)
		    {
		      if (conv[lauf] > eps)
			{
			  inputdata1[lauf].floatdata = inputdata1[lauf].floatdata/conv[lauf];
			}
		      else
			{
			  inputdata1[lauf].floatdata = 0;
			}
		    }
		  strcpy (name, argv[3]);
		  strcat (name, ".ccf.norm");
		  write_em_subsubregion (name, &inputdata1,subc,range_sub,offset,dimarray);
		}
	      MPI_Barrier (MPI_COMM_WORLD);
	    }
	} /* these are the new brackets from the subregion_read , SN */
    }
  free(Ergebnis);
  free(inputdata1.floatdata);  
  free(inputdata2.floatdata);
  free(inputdata3.floatdata);
  free(inputdata4.floatdata);
  rfftwnd_destroy_plan(p3);
  rfftwnd_destroy_plan(pi3);
  rfftwnd_destroy_plan(r3);
  rfftwnd_destroy_plan(ri3);
  free(Volume);
  free(sqconv);
  free(conv);
  free(Rot_tmpl);
  free(Vol_tmpl_sort);
  free(outputdata.floatdata);
  if (myrank==0)
    {
      printf ("oscar finished. ");
      tack (&start); fflush(stdout);
    }
  MPI_Finalize();

  /* end main */
}
void test_speed_nd_aux(struct size sz,
		       fftw_direction dir, int flags, int specific)
{
     fftw_real *in;
     fftwnd_plan plan;
     double t;
     fftw_time begin, end;
     int i, N;

     /* only bench in-place multi-dim transforms */
     flags |= FFTW_IN_PLACE;	

     N = 1;
     for (i = 0; i < sz.rank - 1; ++i)
	  N *= sz.narray[i];

     N *= (sz.narray[i] + 2);

     in = (fftw_real *) fftw_malloc(N * howmany_fields * sizeof(fftw_real));

     if (specific) {
	  begin = fftw_get_time();
	  plan = rfftwnd_create_plan_specific(sz.rank, sz.narray, dir,
					      speed_flag | flags
					      | wisdom_flag | no_vector_flag,
					      in, howmany_fields, 0, 1);
     } else {
	  begin = fftw_get_time();
	  plan = rfftwnd_create_plan(sz.rank, sz.narray,
				     dir, speed_flag | flags
				     | wisdom_flag | no_vector_flag);
     }
     end = fftw_get_time();
     CHECK(plan != NULL, "can't create plan");

     t = fftw_time_to_sec(fftw_time_diff(end, begin));
     WHEN_VERBOSE(2, printf("time for planner: %f s\n", t));

     WHEN_VERBOSE(2, printf("\n"));
     WHEN_VERBOSE(2, (rfftwnd_print_plan(plan)));
     WHEN_VERBOSE(2, printf("\n"));

     if (dir == FFTW_REAL_TO_COMPLEX) {
	  FFTW_TIME_FFT(rfftwnd_real_to_complex(plan, howmany_fields,
						in, howmany_fields, 1,
						0, 0, 0),
			in, N * howmany_fields, t);
     } else {
	  FFTW_TIME_FFT(rfftwnd_complex_to_real(plan, howmany_fields,
						(fftw_complex *) in,
						howmany_fields, 1,
						0, 0, 0),
			in, N * howmany_fields, t);
     }

     rfftwnd_destroy_plan(plan);

     WHEN_VERBOSE(1, printf("time for one fft: %s", smart_sprint_time(t)));
     WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N)));
     WHEN_VERBOSE(1, printf("\"mflops\" = 5/2 (N log2 N) / (t in microseconds)"
			" = %f\n", 0.5 * howmany_fields * mflops(t, N)));

     fftw_free(in);

     WHEN_VERBOSE(1, printf("\n"));
}
void test_planner(int rank)
{
     /* 
      * create and destroy many plans, at random.  Check the
      * garbage-collecting allocator of twiddle factors 
      */
     int i, dim;
     int r, s;
     fftw_plan p[PLANNER_TEST_SIZE];
     fftwnd_plan pnd[PLANNER_TEST_SIZE];
     int *narr, maxdim;

     chk_mem_leak = 0;
     verbose--;

     please_wait();
     if (rank < 1)
	  rank = 1;

     narr = (int *) fftw_malloc(rank * sizeof(int));

     maxdim = (int) pow(8192.0, 1.0/rank);

     for (i = 0; i < PLANNER_TEST_SIZE; ++i) {
	  p[i] = (fftw_plan) 0;
	  pnd[i] = (fftwnd_plan) 0;
     }

     for (i = 0; i < PLANNER_TEST_SIZE * PLANNER_TEST_SIZE; ++i) {
	  r = rand();
	  if (r < 0)
	       r = -r;
	  r = r % PLANNER_TEST_SIZE;

	  for (dim = 0; dim < rank; ++dim) {
	       do {
		    s = rand();
		    if (s < 0)
			 s = -s;
		    s = s % maxdim + 1;
	       } while (s == 0);
	       narr[dim] = s;
	  }

	  if (rank == 1) {
	       if (p[r])
		    rfftw_destroy_plan(p[r]);

	       p[r] = rfftw_create_plan(narr[0], random_dir(), measure_flag |
					wisdom_flag);
	       if (paranoid && narr[0] < 200)
		    test_correctness(narr[0]);
	  }
	  if (pnd[r])
	       rfftwnd_destroy_plan(pnd[r]);

	  pnd[r] = rfftwnd_create_plan(rank, narr,
				       random_dir(), measure_flag |
				       wisdom_flag);

	  if (i % (PLANNER_TEST_SIZE * PLANNER_TEST_SIZE / 20) == 0) {
	       WHEN_VERBOSE(0, printf("test planner: so far so good\n"));
	       WHEN_VERBOSE(0, printf("test planner: iteration %d out of %d\n",
			      i, PLANNER_TEST_SIZE * PLANNER_TEST_SIZE));
	  }
     }

     for (i = 0; i < PLANNER_TEST_SIZE; ++i) {
	  if (p[i])
	       rfftw_destroy_plan(p[i]);
	  if (pnd[i])
	       rfftwnd_destroy_plan(pnd[i]);
     }

     fftw_free(narr);
     verbose++;
     chk_mem_leak = 1;
}
void testnd_in_place(int rank, int *n, fftwnd_plan validated_plan,
		     int alternate_api, int specific)
{
     int istride, ostride, howmany;
     int N, dim, i, j, k;
     int nc, nhc, nr;
     fftw_real *in1, *out3;
     fftw_complex *in2, *out1, *out2;
     fftwnd_plan p, ip;
     int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE;

     if (coinflip())
	  flags |= FFTW_THREADSAFE;

     N = nc = nr = nhc = 1;
     for (dim = 0; dim < rank; ++dim)
	  N *= n[dim];
     if (rank > 0) {
	  nr = n[rank - 1];
	  nc = N / nr;
	  nhc = nr / 2 + 1;
     }
     in1 = (fftw_real *) fftw_malloc(2 * nhc * nc * MAX_STRIDE * sizeof(fftw_real));
     out3 = in1;
     out1 = (fftw_complex *) in1;
     in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));
     out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));

     if (alternate_api && specific && (rank == 2 || rank == 3)) {
	  if (rank == 2) {
	       p = rfftw2d_create_plan_specific(n[0], n[1],
					     FFTW_REAL_TO_COMPLEX, flags,
						in1, MAX_STRIDE, 0, 0);
	       ip = rfftw2d_create_plan_specific(n[0], n[1],
					     FFTW_COMPLEX_TO_REAL, flags,
						 in1, MAX_STRIDE, 0, 0);
	  } else {
	       p = rfftw3d_create_plan_specific(n[0], n[1], n[2],
					     FFTW_REAL_TO_COMPLEX, flags,
						in1, MAX_STRIDE, 0, 0);
	       ip = rfftw3d_create_plan_specific(n[0], n[1], n[2],
					     FFTW_COMPLEX_TO_REAL, flags,
						 in1, MAX_STRIDE, 0, 0);
	  }
     } else if (specific) {
	  p = rfftwnd_create_plan_specific(rank, n, FFTW_REAL_TO_COMPLEX,
					   flags,
				       in1, MAX_STRIDE, in1, MAX_STRIDE);
	  ip = rfftwnd_create_plan_specific(rank, n, FFTW_COMPLEX_TO_REAL,
					    flags,
				       in1, MAX_STRIDE, in1, MAX_STRIDE);
     } else if (alternate_api && (rank == 2 || rank == 3)) {
	  if (rank == 2) {
	       p = rfftw2d_create_plan(n[0], n[1], FFTW_REAL_TO_COMPLEX,
				       flags);
	       ip = rfftw2d_create_plan(n[0], n[1], FFTW_COMPLEX_TO_REAL,
					flags);
	  } else {
	       p = rfftw3d_create_plan(n[0], n[1], n[2], FFTW_REAL_TO_COMPLEX,
				       flags);
	       ip = rfftw3d_create_plan(n[0], n[1], n[2], FFTW_COMPLEX_TO_REAL,
					flags);
	  }
     } else {
	  p = rfftwnd_create_plan(rank, n, FFTW_REAL_TO_COMPLEX, flags);
	  ip = rfftwnd_create_plan(rank, n, FFTW_COMPLEX_TO_REAL, flags);
     }

     CHECK(p != NULL && ip != NULL, "can't create plan");

     for (i = 0; i < nc * nhc * 2 * MAX_STRIDE; ++i)
	  out3[i] = 0;

     for (istride = 1; istride <= MAX_STRIDE; ++istride) {
	  /* generate random inputs */
	  for (i = 0; i < nc; ++i)
	       for (j = 0; j < nr; ++j) {
		    c_re(in2[i * nr + j]) = DRAND();
		    c_im(in2[i * nr + j]) = 0.0;
		    for (k = 0; k < istride; ++k)
			 in1[(i * nhc * 2 + j) * istride + k]
			     = c_re(in2[i * nr + j]);
	       }

	  fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1);

	  howmany = ostride = istride;

	  WHEN_VERBOSE(2, printf("\n    testing in-place stride %d...",
				 istride));

	  if (howmany != 1 || istride != 1 || ostride != 1 || coinflip())
	       rfftwnd_real_to_complex(p, howmany, in1, istride, 1,
				       out1, ostride, 1);
	  else
	       rfftwnd_one_real_to_complex(p, in1, NULL);

	  for (i = 0; i < nc; ++i)
	       for (k = 0; k < howmany; ++k)
		    CHECK(compute_error_complex(out1 + i * nhc * ostride + k,
						ostride,
						out2 + i * nr, 1,
						nhc) < TOLERANCE,
			  "in-place (r2c): wrong answer");

	  if (howmany != 1 || istride != 1 || ostride != 1 || coinflip())
	       rfftwnd_complex_to_real(ip, howmany, out1, ostride, 1,
				       out3, istride, 1);
	  else
	       rfftwnd_one_complex_to_real(ip, out1, NULL);

	  for (i = 0; i < nc * nhc * 2 * istride; ++i)
	       out3[i] *= 1.0 / N;

	  for (i = 0; i < nc; ++i)
	       for (k = 0; k < howmany; ++k)
		    CHECK(compute_error(out3 + i * nhc * 2 * istride + k,
					istride,
					(fftw_real *) (in2 + i * nr), 2,
					nr) < TOLERANCE,
			  "in-place (c2r): wrong answer (check 2)");
     }

     rfftwnd_destroy_plan(p);
     rfftwnd_destroy_plan(ip);

     fftw_free(out2);
     fftw_free(in2);
     fftw_free(in1);
}
void testnd_out_of_place(int rank, int *n, fftwnd_plan validated_plan)
{
     int istride, ostride;
     int N, dim, i, j, k;
     int nc, nhc, nr;
     fftw_real *in1, *out3;
     fftw_complex *in2, *out1, *out2;
     fftwnd_plan p, ip;
     int flags = measure_flag | wisdom_flag;

     if (coinflip())
	  flags |= FFTW_THREADSAFE;

     N = nc = nr = nhc = 1;
     for (dim = 0; dim < rank; ++dim)
	  N *= n[dim];
     if (rank > 0) {
	  nr = n[rank - 1];
	  nc = N / nr;
	  nhc = nr / 2 + 1;
     }
     in1 = (fftw_real *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_real));
     out3 = (fftw_real *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_real));
     out1 = (fftw_complex *) fftw_malloc(nhc * nc * MAX_STRIDE
					 * sizeof(fftw_complex));
     in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));
     out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex));

     p = rfftwnd_create_plan(rank, n, FFTW_REAL_TO_COMPLEX, flags);
     ip = rfftwnd_create_plan(rank, n, FFTW_COMPLEX_TO_REAL, flags);
     CHECK(p != NULL && ip != NULL, "can't create plan");

     for (istride = 1; istride <= MAX_STRIDE; ++istride) {
	  /* generate random inputs */
	  for (i = 0; i < nc; ++i)
	       for (j = 0; j < nr; ++j) {
		    c_re(in2[i * nr + j]) = DRAND();
		    c_im(in2[i * nr + j]) = 0.0;
		    for (k = 0; k < istride; ++k)
			 in1[(i * nr + j) * istride + k]
			     = c_re(in2[i * nr + j]);
	       }
	  for (i = 0; i < N * istride; ++i)
	       out3[i] = 0.0;

	  fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1);

	  for (ostride = 1; ostride <= MAX_STRIDE; ++ostride) {
	       int howmany = (istride < ostride) ? istride : ostride;

	       WHEN_VERBOSE(2, printf("\n    testing stride %d/%d...",
				      istride, ostride));

	       if (howmany != 1 || istride != 1 || ostride != 1 || coinflip())
		    rfftwnd_real_to_complex(p, howmany, in1, istride, 1,
					    out1, ostride, 1);
	       else
		    rfftwnd_one_real_to_complex(p, in1, out1);

	       for (i = 0; i < nc; ++i)
		    for (k = 0; k < howmany; ++k)
			 CHECK(compute_error_complex(out1 + i * nhc * ostride + k,
						     ostride,
						     out2 + i * nr, 1,
						     nhc) < TOLERANCE,
			       "out-of-place (r2c): wrong answer");

	       if (howmany != 1 || istride != 1 || ostride != 1 || coinflip())
		    rfftwnd_complex_to_real(ip, howmany, out1, ostride, 1,
					    out3, istride, 1);
	       else
		    rfftwnd_one_complex_to_real(ip, out1, out3);

	       for (i = 0; i < N * istride; ++i)
		    out3[i] *= 1.0 / N;

	       if (istride == howmany)
		    CHECK(compute_error(out3, 1, in1, 1, N * istride)
			< TOLERANCE, "out-of-place (c2r): wrong answer");
	       for (i = 0; i < nc; ++i)
		    for (k = 0; k < howmany; ++k)
			 CHECK(compute_error(out3 + i * nr * istride + k,
					     istride,
					 (fftw_real *) (in2 + i * nr), 2,
					     nr) < TOLERANCE,
			   "out-of-place (c2r): wrong answer (check 2)");
	  }
     }

     rfftwnd_destroy_plan(p);
     rfftwnd_destroy_plan(ip);

     fftw_free(out3);
     fftw_free(out2);
     fftw_free(in2);
     fftw_free(out1);
     fftw_free(in1);
}
Exemplo n.º 15
0
/* Use fftw2.
 */
static int 
invfft1( IMAGE *dummy, IMAGE *in, IMAGE *out )
{
	IMAGE *cmplx = im_open_local( dummy, "invfft1-1", "t" );
	IMAGE *real = im_open_local( out, "invfft1-2", "t" );
	const int half_width = in->Xsize / 2 + 1;

	/* Transform to halfcomplex here.
	 */
	double *half_complex = IM_ARRAY( dummy, 
		in->Ysize * half_width * 2, double );

	rfftwnd_plan plan;
	int x, y;
	double *q, *p;

	if( !cmplx || !real || !half_complex || im_pincheck( in ) || 
		im_poutcheck( out ) )
		return( -1 );
	if( in->Coding != IM_CODING_NONE || in->Bands != 1 ) {
                im_error( "im_invfft", _( "one band uncoded only" ) );
                return( -1 );
	}

	/* Make dp complex image for input.
	 */
	if( im_clip2fmt( in, cmplx, IM_BANDFMT_DPCOMPLEX ) )
                return( -1 );

	/* Make mem buffer real image for output.
	 */
        if( im_cp_desc( real, in ) )
                return( -1 );
	real->BandFmt = IM_BANDFMT_DOUBLE;
        if( im_setupout( real ) )
                return( -1 );

	/* Build half-complex image.
	 */
	q = half_complex;
	for( y = 0; y < cmplx->Ysize; y++ ) {
		p = ((double *) cmplx->data) + y * in->Xsize * 2; 

		for( x = 0; x < half_width; x++ ) {
			q[0] = p[0];
			q[1] = p[1];
			p += 2;
			q += 2;
		}
	}

	/* Make the plan for the transform. Yes, they really do use nx for
	 * height and ny for width.
	 */
	if( !(plan = rfftw2d_create_plan( in->Ysize, in->Xsize,
		FFTW_BACKWARD, FFTW_MEASURE | FFTW_USE_WISDOM )) ) {
                im_error( "im_invfft", _( "unable to create transform plan" ) );
		return( -1 );
	}

	rfftwnd_one_complex_to_real( plan, 
		(fftw_complex *) half_complex, (fftw_real *) real->data );

	rfftwnd_destroy_plan( plan );

	/* Copy to out.
	 */
        if( im_copy( real, out ) )
                return( -1 );

	return( 0 );
}
Exemplo n.º 16
0
void destroy(int status)
{
    /* External Variables.  All external variables are defined in main.h */
    extern double *Kx, *Kz, **K2, *cfl2;
    extern double **Q, **Qp, **Qpp, **R, **Rp, **Qw, **Qpw, **Rw, **Qs,
        **Qps, **Qpps, **Rs, **Rps, *Rp0, **Rpw, **Qppw, *Rpp0;

    extern double *Uadd, *Vadd, *Vpadd;
    extern double *Qy;
    extern double *W;

    extern mcomplex ****U, ****C;    /* state variables */
    extern mcomplex **Fa, **Fb, **TM;
    extern mcomplex *fa, *fb, *tm;
    extern double **MZ;
    extern double ***M;

    extern mcomplex ****IU, ****IC;    /* incremental state variables */
    extern mcomplex **IFa, **IFb, **ITM;
    extern mcomplex *Ifa, *Ifb, *Itm;

    extern mcomplex ****AU, ****AC;    /* adjoint variables and will use
                       the same other variables
                       used in state equations */

    extern mcomplex ****IAU, ****IAC;    /* incremental adjoint variables */

    extern mcomplex **Uxbt, **Uzb;    /* variables used to store dux duz
                       evaluated at y=-1 used for
                       computing boundary conditions for
                       incremental state equations */
    extern mcomplex **Uxb, **Uzb;    /* variables used to store dux duz
                       evaluated at y=-1 from previous 
                       state used for boundary conditions
                       for incremental state equations */
    extern mcomplex **IUxb, **IUzb;
    extern mcomplex **IAUxb, **IAUzb;
    extern mcomplex **AUxb, **AUzb;    /* variables used to store dux duz
                       evaluated at y=-1 used for
                       computing boundary conditions
                       for incremental state equations */
    extern fftw_complex ***CT, ***ICT;    /* variables used in fft */
    extern fftw_plan pf1, pf2;
    extern fftw_plan Ipf1, Ipf2;
    extern rfftwnd_plan pr1, pr2;

    extern mcomplex *****MC, *****MIC;    /* variables used to store state and
                           incremental state solutions
                           between two check points. */

    extern mcomplex ****MU, ****MIU;    /* variables used to store
                           manufacture solutions */
    extern mcomplex ****LU, ****LIU;

    if (status & DESTROY_STATUS_FFTW) {
        fftw_destroy_plan(pf1);
        fftw_destroy_plan(pf2);
        rfftwnd_destroy_plan(pr1);
        rfftwnd_destroy_plan(pr2);
        fftw_destroy_plan(Ipf1);
        fftw_destroy_plan(Ipf2);
    }

    if (status & DESTROY_STATUS_GETMEM) {
        freec4Darray(U);
        freec4Darray(C);
        freec3Darray(CT);
        freecMatrix(Fa);
        freecMatrix(Fb);
        freed3Darray(M);
        freecMatrix(TM);
        freedMatrix(MZ);
        freecVector(fa);
        freecVector(fb);
        freecVector(tm);
        freedVector(cfl2);
        freec4Darray(IU);
        freec4Darray(IC);
        freecMatrix(IFa);
        freecMatrix(IFb);
        freecMatrix(ITM);
        freecVector(Ifa);
        freecVector(Ifb);
        freecVector(Itm);
        freec3Darray(ICT);
        freecMatrix(Uxbt);
        freecMatrix(Uzbt);
        freecMatrix(Uxb);
        freecMatrix(Uzb);
        freec4Darray(AU);
        freec4Darray(IAU);
        freec4Darray(AC);
        freec4Darray(IAC);
        freec5Darray(MC);
        freec5Darray(MIC);
        freec4Darray(MU);
        freec4Darray(MIU);
        freec4Darray(LU);
        freec4Darray(LIU);
        freecMatrix(AUxb);
        freecMatrix(AUzb);
        freecMatrix(IUzb);
        freecMatrix(IUxb);
        freecMatrix(IAUxb);
        freecMatrix(IAUzb);
        freecMatrix(grad);
        freecMatrix(GUxb);
        freecMatrix(GUzb);
        freecMatrix(GIUxb);
        freecMatrix(GIUzb);
        freecMatrix(HUxb);
        freecMatrix(HUzb);
        freecMatrix(HAUxb);
        freecMatrix(HAUzb);
        freecMatrix(hess);
    }

    if (status & DESTROY_STATUS_WAVENUMS) {
        freedVector(Kx);
        freedVector(Kz);
        freedMatrix(K2);
    }

    if (status & DESTROY_STATUS_LEGENDRE) {
        freedMatrix(Q);
        freedMatrix(Qp);
        freedMatrix(Qpp);
        freedMatrix(R);
        freedMatrix(Rp);
        freedMatrix(Qw);
        freedMatrix(Qpw);
        freedMatrix(Rw);
        freedMatrix(Qs);
        freedMatrix(Qps);
        freedMatrix(Qpps);
        freedMatrix(Rs);
        freedMatrix(Rps);
        freedVector(Rp0);
        freedVector(W);
        freedVector(Vadd);
        freedVector(Vpadd);
        freedVector(Uadd);
        freedVector(Qy);
        freedVector(Rpp0);
        freedMatrix(Qppw);
        freedMatrix(Rpw);
    }
}