void gmx_fft_destroy(gmx_fft_t fft) { int i,j; if(fft != NULL) { for(i=0;i<2;i++) { for(j=0;j<2;j++) { if(fft->single[i][j] != NULL) { rfftw_destroy_plan(fft->single[i][j]); fft->single[i][j] = NULL; } if(fft->multi[i][j] != NULL) { rfftwnd_destroy_plan(fft->multi[i][j]); fft->multi[i][j] = NULL; } } } free(fft); } }
/** * Destroys a previously created plan. * The CUDA destructor returns a result code, while the fftw2 destructor is * a void function. For now, the result code in the CUDA destructor is * ignored. */ void sararfftnd_destroy_plan( sararfftnd_plan plan ) { #ifdef USE_GPUS cufftDestroy( plan ); #else // #ifndef USE_GPUS rfftwnd_destroy_plan( plan ); #endif }
void destroy_maxwell_data(maxwell_data *d) { if (d) { int i; for (i = 0; i < d->nplans; ++i) { #if defined(HAVE_FFTW3) FFTW(destroy_plan)((fftplan) (d->plans[i])); FFTW(destroy_plan)((fftplan) (d->iplans[i])); #elif defined(HAVE_FFTW) # ifdef HAVE_MPI # ifdef SCALAR_COMPLEX fftwnd_mpi_destroy_plan((fftplan) (d->plans[i])); fftwnd_mpi_destroy_plan((fftplan) (d->iplans[i])); # else /* not SCALAR_COMPLEX */ rfftwnd_mpi_destroy_plan((fftplan) (d->plans[i])); rfftwnd_mpi_destroy_plan((fftplan) (d->iplans[i])); # endif /* not SCALAR_COMPLEX */ # else /* not HAVE_MPI */ # ifdef SCALAR_COMPLEX fftwnd_destroy_plan((fftplan) (d->plans[i])); fftwnd_destroy_plan((fftplan) (d->iplans[i])); # else /* not SCALAR_COMPLEX */ rfftwnd_destroy_plan((fftplan) (d->plans[i])); rfftwnd_destroy_plan((fftplan) (d->iplans[i])); # endif /* not SCALAR_COMPLEX */ # endif /* not HAVE_MPI */ #endif /* HAVE FFTW */ } free(d->eps_inv); #if defined(HAVE_FFTW3) FFTW(free)(d->fft_data); if (d->fft_data2 != d->fft_data) FFTW(free)(d->fft_data2); #else free(d->fft_data); #endif free(d->k_plus_G); free(d->k_plus_G_normsqr); free(d); } }
/* * Class: jfftw_real_nd_Plan * Method: destroyPlan * Signature: ()V */ JNIEXPORT void JNICALL Java_jfftw_real_nd_Plan_destroyPlan( JNIEnv* env, jobject obj ) { jclass clazz = (*env)->GetObjectClass( env, obj ); jfieldID id = (*env)->GetFieldID( env, clazz, "plan", "[B" ); jbyteArray arr = (jbyteArray)(*env)->GetObjectField( env, obj, id ); unsigned char* carr = (*env)->GetByteArrayElements( env, arr, 0 ); rfftwnd_destroy_plan( *(rfftwnd_plan*)carr ); (*env)->ReleaseByteArrayElements( env, arr, carr, 0 ); (*env)->SetObjectField( env, obj, id, NULL ); }
void rfftwnd_mpi_destroy_plan(rfftwnd_mpi_plan p) { if (p) { if (p->p_fft_x) fftw_destroy_plan(p->p_fft_x); if (p->p_fft) rfftwnd_destroy_plan(p->p_fft); if (p->p_transpose) transpose_mpi_destroy_plan(p->p_transpose); if (p->p_transpose_inv) transpose_mpi_destroy_plan(p->p_transpose_inv); if (p->work) fftw_free(p->work); fftw_free(p); } }
/* Call rfftw for a 1 band real image. */ static int rfwfft1( IMAGE *dummy, IMAGE *in, IMAGE *out ) { const int size = in->Xsize * in->Ysize; const int half_width = in->Xsize / 2 + 1; /* Pack to double real here. */ IMAGE *real = im_open_local( dummy, "fwfft1:1", "t" ); /* Transform to halfcomplex here. */ double *half_complex = IM_ARRAY( dummy, in->Ysize * half_width * 2, double ); rfftwnd_plan plan; double *buf, *q, *p; int x, y; if( !real || !half_complex || im_pincheck( in ) || im_outcheck( out ) ) return( -1 ); if( in->Coding != IM_CODING_NONE || in->Bands != 1 ) { im_error( "im_fwfft", _( "one band uncoded only" ) ); return( -1 ); } if( im_clip2d( in, real ) ) return( -1 ); /* Make the plan for the transform. Yes, they really do use nx for * height and ny for width. */ if( !(plan = rfftw2d_create_plan( in->Ysize, in->Xsize, FFTW_FORWARD, FFTW_MEASURE | FFTW_USE_WISDOM )) ) { im_error( "im_fwfft", _( "unable to create transform plan" ) ); return( -1 ); } rfftwnd_one_real_to_complex( plan, (fftw_real *) real->data, (fftw_complex *) half_complex ); rfftwnd_destroy_plan( plan ); /* WIO to out. */ if( im_cp_desc( out, in ) ) return( -1 ); out->Bbits = IM_BBITS_DPCOMPLEX; out->BandFmt = IM_BANDFMT_DPCOMPLEX; if( im_setupout( out ) ) return( -1 ); if( !(buf = (double *) IM_ARRAY( dummy, IM_IMAGE_SIZEOF_LINE( out ), PEL )) ) return( -1 ); /* Copy to out and normalise. The right half is the up/down and * left/right flip of the left, but conjugated. Do the first * row separately, then mirror around the centre row. */ p = half_complex; q = buf; for( x = 0; x < half_width; x++ ) { q[0] = p[0] / size; q[1] = p[1] / size; p += 2; q += 2; } p = half_complex + ((in->Xsize + 1) / 2 - 1) * 2; for( x = half_width; x < out->Xsize; x++ ) { q[0] = p[0] / size; q[1] = -1.0 * p[1] / size; p -= 2; q += 2; } if( im_writeline( 0, out, (PEL *) buf ) ) return( -1 ); for( y = 1; y < out->Ysize; y++ ) { p = half_complex + y * half_width * 2; q = buf; for( x = 0; x < half_width; x++ ) { q[0] = p[0] / size; q[1] = p[1] / size; p += 2; q += 2; } /* Good grief. */ p = half_complex + 2 * ((out->Ysize - y + 1) * half_width - 2 + (in->Xsize & 1)); for( x = half_width; x < out->Xsize; x++ ) { q[0] = p[0] / size; q[1] = -1.0 * p[1] / size; p -= 2; q += 2; } if( im_writeline( y, out, (PEL *) buf ) ) return( -1 ); } return( 0 ); }
void F77_FUNC_(rfftwnd_f77_destroy_plan,RFFTWND_F77_DESTROY_PLAN) (fftwnd_plan *p) { rfftwnd_destroy_plan(*p); }
/* * Create an fftwnd_plan specialized for specific arrays. (These * arrays are ignored, however, if they are NULL or if the flags * do not include FFTW_MEASURE.) The main advantage of being * provided arrays like this is that we can do runtime timing * measurements of our options, without worrying about allocating * excessive scratch space. */ fftwnd_plan rfftwnd_create_plan_specific(int rank, const int *n, fftw_direction dir, int flags, fftw_real *in, int istride, fftw_real *out, int ostride) { fftwnd_plan p; int i; int rflags = flags & ~FFTW_IN_PLACE; /* note that we always do rfftw transforms out-of-place in rexec2.c */ if (flags & FFTW_IN_PLACE) { out = NULL; ostride = istride; } istride = ostride = 1; /* * strides don't work yet, since it is not * clear whether they apply to real * or complex data */ if (!(p = fftwnd_create_plan_aux(rank, n, dir, flags))) return 0; for (i = 0; i < rank - 1; ++i) p->n_after[i] = (n[rank - 1]/2 + 1) * (p->n_after[i] / n[rank - 1]); if (rank > 0) p->n[rank - 1] = n[rank - 1] / 2 + 1; p->plans = fftwnd_new_plan_array(rank); if (rank > 0 && !p->plans) { rfftwnd_destroy_plan(p); return 0; } if (rank > 0) { p->plans[rank - 1] = rfftw_create_plan(n[rank - 1], dir, rflags); if (!p->plans[rank - 1]) { rfftwnd_destroy_plan(p); return 0; } } if (rank > 1) { if (!(flags & FFTW_MEASURE) || in == 0 || (!p->is_in_place && out == 0)) { if (!fftwnd_create_plans_generic(p->plans, rank - 1, n, dir, flags | FFTW_IN_PLACE)) { rfftwnd_destroy_plan(p); return 0; } } else if (dir == FFTW_COMPLEX_TO_REAL || (flags & FFTW_IN_PLACE)) { if (!fftwnd_create_plans_specific(p->plans, rank - 1, n, p->n_after, dir, flags | FFTW_IN_PLACE, (fftw_complex *) in, istride, 0, 0)) { rfftwnd_destroy_plan(p); return 0; } } else { if (!fftwnd_create_plans_specific(p->plans, rank - 1, n, p->n_after, dir, flags | FFTW_IN_PLACE, (fftw_complex *) out, ostride, 0, 0)) { rfftwnd_destroy_plan(p); return 0; } } } p->nbuffers = 0; p->nwork = fftwnd_work_size(rank, p->n, flags | FFTW_IN_PLACE, p->nbuffers + 1); if (p->nwork && !(flags & FFTW_THREADSAFE)) { p->work = (fftw_complex *) fftw_malloc(p->nwork * sizeof(fftw_complex)); if (!p->work) { rfftwnd_destroy_plan(p); return 0; } } return p; }
int main() { omp_set_num_threads(numCores); // Set the number of threads for OpenMP parallel sections fftw_threads_init(); // Initialize threaded FFTs rfftwnd_plan dp_c2r; // Inverse FFT plan rfftwnd_plan dp_r2c; // Forward FFT plan // Create the plans using FFTW_MEASURE to get fastest transforms, do this here so // that it is only done once and the plans reused. std::cout << "Creating FFTW plans...\n"; dp_c2r = rfftw3d_create_plan(N, N, N, FFTW_COMPLEX_TO_REAL, FFTW_MEASURE); dp_r2c = rfftw3d_create_plan(N, N, N, FFTW_REAL_TO_COMPLEX, FFTW_MEASURE); double *kvec = new double[N]; fftfreq(kvec); std::ofstream fout; std::ofstream tout; std::ifstream fin; fout.open("GalaxyNum.dat",std::ios::out); fout.close(); std::vector< Pk > InputPower; int numKModes = 0; std::cout << "Reading input power file: " << CAMBfile << "\n"; fin.open(CAMBfile.c_str(),std::ios::in); while (!fin.eof()) { Pk Input_temp; fin >> Input_temp.k >> Input_temp.P; if (!fin.eof()) { InputPower.push_back(Input_temp); ++numKModes; } } fin.close(); double *kvals = new double[numKModes]; double *InPow = new double[numKModes]; for (int i = 0; i < numKModes; ++i) { kvals[i] = InputPower[i].k; InPow[i] = InputPower[i].P; } gsl_spline *Power = gsl_spline_alloc(gsl_interp_cspline, numKModes); gsl_interp_accel *acc = gsl_interp_accel_alloc(); gsl_spline_init(Power, kvals, InPow, numKModes); fftw_complex *deltak3di = new fftw_complex[N_im]; fftw_real *deltar3di = new fftw_real[N_tot]; #pragma omp parallel for for (int i = 0; i < N_tot; ++i) { deltar3di[i] = 0.0; if (i < N_im) { deltak3di[i].re = 0.0; deltak3di[i].im = 0.0; } } std::cout << "Distributing power over volume...\n"; Gendk(kvec, Power, acc, deltak3di); // Call function to populate the power grid std::cout << "Performing initial one-time inverse FFT...\n"; rfftwnd_threads_one_complex_to_real(numCores,dp_c2r,deltak3di,deltar3di); // FFT std::cout << "Taking the natural log...\n"; #pragma omp parallel for for (int i = 0; i < N_tot; ++i) { deltar3di[i] = log(1.0 + deltar3di[i]); if (i < N_im) { deltak3di[i].re = 0.0; deltak3di[i].im = 0.0; } } std::cout << "Performing initial one-time forward FFT...\n"; rfftwnd_threads_one_real_to_complex(numCores,dp_r2c,deltar3di,deltak3di); std::cout << "Normalizing...\n"; #pragma omp parallel for for (int i = 0; i < N_im; ++i) { deltak3di[i].re /= N_tot; deltak3di[i].im /= N_tot; } delete[] deltar3di; tout.open("Timings.dat",std::ios::out); std::cout << "Starting to generate mocks...\n"; for (int mock = startNum-1; mock < numMocks; ++mock) { double start_time = omp_get_wtime(); std::string lrgfile = filename(base, mock+1, ext); std::cout << "Generating mock " << lrgfile << "\n"; fftw_complex *deltak3d = new fftw_complex[N_im]; fftw_real *deltar3d = new fftw_real[N_tot]; // Initialize power array. Do it in parallel to speed things up. #pragma omp parallel for for (int i = 0; i < N_tot; ++i) { deltar3d[i] = 0.0; if (i < N_im) { deltak3d[i].re = 0.0; deltak3d[i].im = 0.0; } } std::cout << " Setting up for the inverse FFT...\n"; Sampdk(kvec, deltak3di, deltak3d); if (powOut) { std::cout << " Outputting raw power array...\n"; std::string powerfile = filename(powbase, mock+1, extbin); fout.open(powerfile.c_str(),std::ios::out|std::ios::binary); fout.write((char *) deltak3d, N_im*sizeof(fftw_complex)); fout.close(); } std::cout << " Performing second inverse FFT...\n"; rfftwnd_threads_one_complex_to_real(numCores,dp_c2r,deltak3d,deltar3d); if (matOut) { std::cout << " Outputting matter field array...\n"; std::string matterfile = filename(matbase, mock+1, extbin); fout.open(matterfile.c_str(),std::ios::out|std::ios::binary); fout.write((char *) deltar3d, N_tot*sizeof(fftw_real)); fout.close(); } double mean = 0.0; double variance = 0.0; double dr_max = 0.0; double dr_min = 0.0; for (int i = 0; i < N_tot; ++i) { mean += deltar3d[i]/N_tot; if (deltar3d[i] > dr_max) dr_max = deltar3d[i]; if (deltar3d[i] < dr_min) dr_min = deltar3d[i]; } std::cout << " Max = " << dr_max << "\n"; std::cout << " Min = " << dr_min << "\n"; std::cout << " Mean = " << mean << "\n"; std::cout << " Calculating variance...\n"; for (int i = 0; i < N_tot; ++i) { deltar3d[i] -= mean; variance += (deltar3d[i])*(deltar3d[i])/(N_tot-1); } std::cout << " Poisson sampling...\n"; Gendr(lrgfile, variance, deltar3d); delete[] deltak3d; delete[] deltar3d; double totaltime = omp_get_wtime()-start_time; std::cout << " Time to generate mock: " << totaltime << " seconds\n"; tout << lrgfile << " " << totaltime << "\n"; } tout.close(); delete[] kvec; delete[] deltak3di; delete[] kvals; delete[] InPow; rfftwnd_destroy_plan(dp_r2c); rfftwnd_destroy_plan(dp_c2r); gsl_spline_free(Power); gsl_interp_accel_free(acc); return 0; }
main (int argc, char *argv[]) { struct em_file inputdata1; struct em_file inputdata2; struct em_file inputdata3; struct em_file inputdata4; struct em_file outputdata; fftw_real *Vol_tmpl_sort, *Volume, *e3, *PointCorr, *sqconv; fftw_complex *C3, *PointVolume, *PointSq; rfftwnd_plan p3, pi3, r3, ri3; fftw_real scale; struct tm *zeit; struct tm start; char name[200]; int Rx_max, Ry_max, Rz_max; int Rx_min, Ry_min, Rz_min; int Vx_min, Vy_min, Vz_min; int Vx_max, Vy_max, Vz_max; float Phi, Psi, Theta, winkel_lauf; float *Rot_tmpl, *Vol_tmpl; int i, j, k, tmpx, tmpy, tmpz,lauf_pe, ksub; int ijk; int lauf, n; float max, eps; time_t lt; float Ctmp, Ctmpim, Dtmp, Dtmpim; int dim_fft; int sub[3],range[3],range_sub[3],subc[3],offset[3],dimarray[3]; int FullVolume_dims[3]; int nr[3]; int area[3]; /* MPI Variablen */ int winkel_max, winkel_min; int winkel_max_pe, winkel_min_pe; int winkel_step_pe; int Phi_max, Psi_max, Theta_max; int Phi_min, Psi_min, Theta_min; int Phi_step, Psi_step, Theta_step; int Theta_winkel_start, Psi_winkel_start, Phi_winkel_start; int Theta_winkel_nr, Psi_winkel_nr, Phi_winkel_nr; int Theta_winkel_end, Psi_winkel_end, Phi_winkel_end; int Theta_steps, Psi_steps, Phi_steps; float Theta_winkel_rest_nr, Psi_winkel_rest_nr, Phi_winkel_rest_nr; int in_max; float rms_wedge, tempccf; float *Ergebnis, *conv; float cycles; int cycle; /* MPI Variablen Ende*/ if (argc < 15) { printf ("\n\n"); printf (" 'OSCAR' is an Optimized SCanning AlgoRithm for \n"); printf (" local correlation.\n"); printf (" All files in EM-V-int4 format !!!\n\n"); printf (" Input: Volume to be searched, Template mask for local \n "); printf (" correlation, pointspread function and angular search \n"); printf (" range. \n"); printf (" Output: locally normalized X-Correlation Function Out.ccf.norm, \n"); printf (" non-normalized X-Correlation Function Out.ccf, and Out.ang \n"); printf (" with the corresponding angles.\n\n"); printf (" usage: oscar Volume Template Out ...\n"); printf (" ... Phi_min Phi_max Phi_step Psi_min Psi_max Psi_step The_min The_max The_step\n"); printf (" ... Poinspread-function mask-file dim_of_fft\n\n"); printf (" with Message Passing Interface (MPI)\n"); printf (" the total number of angles must be modulo\n"); printf (" of used processors!\n\n"); printf (" Linux: 1.'lamboot' to start MPI\n"); printf (" 2.'mpirun -np 2 oscar Volume Templ Out 30 180 30 30 180 30 30 180 30 Poinspread-function mask-file 256'\n\n"); printf (" In this version asymmetric masks can be used ! \n"); printf (" last revision , 11.11.03, Friedrich Foerster"); printf (" \n\n"); exit (1); } MPI_Init (&argc, &argv); MPI_Comm_size (MPI_COMM_WORLD, &mysize); MPI_Comm_rank (MPI_COMM_WORLD, &myrank); /* Dimensionen auslesen */ // Dimension of fft dim_fft = atoi (argv[15]); nr[0]=1; nr[1]=1; nr[2]=1; area[0]=dim_fft; area[1]=dim_fft; area[2]=dim_fft; read_em_header(argv[1], &inputdata1); /* Searchvolume */ read_em (argv[2], &inputdata2); /* Template */ FullVolume_dims[0]=inputdata1.dims[0]; FullVolume_dims[1]=inputdata1.dims[1]; FullVolume_dims[2]=inputdata1.dims[2]; Rx_min = 1; Ry_min = 1; Rz_min = 1; Rx_max = (inputdata2.dims[0]); Ry_max = (inputdata2.dims[1]); Rz_max = (inputdata2.dims[2]); Vx_min = 1; Vy_min = 1; Vz_min = 1; Vx_max = dim_fft; Vy_max = dim_fft; Vz_max = dim_fft; p3 = rfftw3d_create_plan (Vx_max, Vy_max, Vz_max, FFTW_REAL_TO_COMPLEX, FFTW_MEASURE | FFTW_IN_PLACE); /*FFTW_ESTIMATE FFTW_MEASURE */ pi3 = rfftw3d_create_plan (Vx_max, Vy_max, Vz_max, FFTW_COMPLEX_TO_REAL, FFTW_MEASURE | FFTW_IN_PLACE); r3 = rfftw3d_create_plan (Rx_max, Rx_max, Rx_max, FFTW_REAL_TO_COMPLEX, FFTW_MEASURE | FFTW_IN_PLACE); /*FFTW_ESTIMATE FFTW_MEASURE */ ri3 = rfftw3d_create_plan (Rx_max, Rx_max, Rx_max, FFTW_COMPLEX_TO_REAL, FFTW_MEASURE | FFTW_IN_PLACE); if (myrank == 0) { printf("Plans for FFTW created \n");fflush(stdout); } Volume = (fftw_real *) calloc (Vx_max * Vx_max * 2 * (Vx_max / 2 + 1),sizeof (fftw_real) ); Rot_tmpl = (float *) malloc (sizeof (float) * Rx_max * Ry_max * Rz_max); Vol_tmpl = (float *) malloc (sizeof (float) * Vx_max * Vy_max * Vz_max); conv = (float *) malloc (sizeof (float) * Vx_max * Vy_max * Vz_max); sqconv = (fftw_real *) calloc(Vz_max * Vy_max * 2 * (Vx_max / 2 + 1), sizeof (fftw_real)); if (! (inputdata1.floatdata = (float *) malloc (sizeof (float) * Vx_max * Vy_max * Vz_max))) { printf ("Memory allocation failure in inputdata1.floatdata!!!"); fflush (stdout); exit (1); } if (! (outputdata.floatdata = (float *) malloc (sizeof (float) * Vx_max * Vy_max * Vz_max))) { printf ("Memory allocation failure in outputdata.floatdata!!!"); fflush (stdout); exit (1); } if (! (Vol_tmpl_sort = (fftw_real *) calloc (Vz_max*Vy_max*2*(Vx_max / 2 + 1), sizeof (fftw_real) ))) { printf ("Memory allocation failure in Volume_tmpl_sort!!!"); printf ("Nx = %i, Ny = %i, Nz = %i, bytes = %i \n",2 *(Vx_max / 2 + 1),Vy_max, Vz_max, sizeof (fftw_real)); fflush (stdout); exit (1); } Ergebnis = (float *) calloc (Vz_max * Vy_max * Vx_max, sizeof (float)); /* Winkelraum */ Phi_min = atof (argv[4]); Phi_max = atof (argv[5]); Phi_step = atof (argv[6]); Psi_min = atof (argv[7]); Psi_max = atof (argv[8]); Psi_step = atof (argv[9]); Theta_min = atof (argv[10]); Theta_max = atof (argv[11]); Theta_step = atof (argv[12]); /* Pointspread Function*/ read_em (argv[13], &inputdata3); /* mask function */ read_em (argv[14], &inputdata4); Phi_steps = (Phi_max - Phi_min) / Phi_step + 1; Psi_steps = (Psi_max - Psi_min) / Psi_step + 1; Theta_steps = (Theta_max - Theta_min) / Theta_step + 1; winkel_max = Phi_steps * Psi_steps * Theta_steps; winkel_min = 0; range[0]=dim_fft-1; range[1]=dim_fft-1; range[2]=dim_fft-1; range_sub[0]=range[0]-Rx_max; range_sub[1]=range[1]-Rx_max; range_sub[2]=range[2]-Rx_max; sub[0]=1; sub[1]=1; sub[2]=1; cycles=(int)(FullVolume_dims[2]/(dim_fft-Rx_max)+0.5); cycles=(int)(FullVolume_dims[1]/(dim_fft-Rx_max)+0.5)*cycles; cycles=(int)(FullVolume_dims[0]/(dim_fft-Rx_max)+0.5)*cycles; cycle=0; if (myrank == 0) { printf ("\n oscar starts to run ... ");tack (&start);fflush (stdout); /* prepare Output */ strcpy (name, argv[3]); strcat (name, ".ccf"); printf ("\nCreate outputfile: %s ... \n", name);fflush(stdout); create_em (name, FullVolume_dims); strcpy (name, argv[3]); strcat (name, ".ang"); printf ("Create outputfile: %s ... \n", name);fflush(stdout); create_em (name, FullVolume_dims); strcpy (name, argv[3]); strcat (name, ".ccf.norm"); printf ("Create outputfile: %s ... \n", name);fflush(stdout); create_em (name, FullVolume_dims); } for (sub[2]=1; sub[2] < FullVolume_dims[2]-Rz_max;sub[2]=sub[2]+dim_fft-Rz_max) { if (myrank == 0) { tack (&start); printf ("%f%%..", (float) (cycle / cycles * 100)); fflush (stdout); } for (sub[1]=1; sub[1] < FullVolume_dims[1]-Ry_max;sub[1]=sub[1]+dim_fft-Ry_max) { for (sub[0]=1; sub[0] < FullVolume_dims[0]-Rx_max;sub[0]=sub[0]+dim_fft-Rx_max) { cycle=cycle+1; subc[0]=sub[0]; subc[1]=sub[1]; subc[2]=sub[2]; if (sub[2] + range[2] > FullVolume_dims[2]) subc[2]=FullVolume_dims[2]-range[2]; /* we are at the corner ?!*/ if (sub[1] + range[1] > FullVolume_dims[1]) subc[1]=FullVolume_dims[1]-range[1]; /* we are at the corner ?!*/ if (sub[0] + range[0] > FullVolume_dims[0]) subc[0]=FullVolume_dims[0]-range[0]; /* we are at the corner ?!*/ read_em_subregion (argv[1], &inputdata1,subc,range); read_em_subregion (argv[1], &outputdata,subc,range); /* Umsortieren der Daten */ lauf = 0; for (k = 0; k < Vz_max; k++) { for (j = 0; j < Vy_max; j++) { for (i = 0; i < Vx_max; i++) { /* square - needed for normalization */ sqconv[i + 2 * (Vx_max / 2 + 1) * (j + Vy_max * k)] = inputdata1.floatdata[lauf]*inputdata1.floatdata[lauf]; Volume[i + 2 * (Vx_max / 2 + 1) * (j + Vy_max * k)] = inputdata1.floatdata[lauf]; inputdata1.floatdata[lauf] = -1.0; /* kleine Zahl wg Max-Op , hier kommen die CCFs rein*/ outputdata.floatdata[lauf] = -1.0; /* hier kommen die Winkel rein*/ lauf++; } } } rfftwnd_one_real_to_complex (p3, &Volume[0], NULL); /* einmalige fft von Suchvolumen */ rfftwnd_one_real_to_complex (p3, &sqconv[0], NULL); /* FFT of square*/ winkel_step_pe = (int) winkel_max / mysize; winkel_min_pe = myrank * winkel_step_pe; winkel_max_pe = winkel_min_pe + winkel_step_pe; Theta_winkel_nr = (int) winkel_min_pe / (Psi_steps * Phi_steps); Theta_winkel_rest_nr = winkel_min_pe - Theta_winkel_nr * (Psi_steps * Phi_steps); Psi_winkel_nr = (int) Theta_winkel_rest_nr / (Phi_steps); Psi_winkel_rest_nr = Theta_winkel_rest_nr - Psi_winkel_nr * (Phi_steps); Phi_winkel_nr = (int) Psi_winkel_rest_nr; Theta = Theta_winkel_nr * Theta_step + Theta_min; Phi = Phi_winkel_nr * Phi_step + Phi_min - Phi_step; Psi = Psi_winkel_nr * Psi_step + Psi_min; eps = 0.001; n = 0; //Friedrich -> Zaehlung der voxels n = countvoxel(inputdata4.dims[0], inputdata4.floatdata, eps); eps = 0.001; for (winkel_lauf = winkel_min_pe; winkel_lauf < winkel_max_pe;winkel_lauf++) { if (Phi < Phi_max) Phi = Phi + Phi_step; else { Phi = Phi_min; Psi = Psi + Psi_step; } if (Psi > Psi_max) { Psi = Psi_min; Theta = Theta + Theta_step; } tom_rotate3d (&Rot_tmpl[0], &inputdata2.floatdata[0], Phi, Psi, Theta, Rx_max, Ry_max, Rz_max); /*calculate Ref variance */ rms_wedge = energizer (Rx_min, Rx_max, n, &Rot_tmpl[0], &inputdata3.floatdata[0], &inputdata4.floatdata[0], r3, ri3); pastes (&Rot_tmpl[0], &Vol_tmpl[0], 1, 1, 1, Rx_max, Ry_max, Rz_max, Vx_max); scale = 1.0 / ((double)Vx_max * (double)Vy_max * (double)Vz_max * ((double) rms_wedge) ); //printf("hippo1: scale = %.10f \n",scale); sort4fftw(&Vol_tmpl_sort[0],&Vol_tmpl[0],Vx_max, Vy_max, Vz_max); rfftwnd_one_real_to_complex (p3, &Vol_tmpl_sort[0], NULL); PointVolume = (fftw_complex *) & Volume[0]; C3 = (fftw_complex *) & Vol_tmpl_sort[0]; /* Correlation */ correl(&PointVolume[0], &C3[0], Vx_max, Vy_max, Vz_max, scale); /* back to real space */ rfftwnd_one_complex_to_real (pi3, &C3[0], NULL); PointCorr = (fftw_real *) & C3[0]; /* Umsortieren der Daten */ sortback4fftw( &PointCorr[0], &Ergebnis[0], Vx_max, Vy_max, Vz_max); // crossen cross(&Ergebnis[0], Vx_max); /* 3rd: divide */ lauf = 0; for (k = 0 ; k < Vz_max ; k++) { for (j = 0; j < Vy_max; j++) { for (i = 0; i < Vx_max; i++) { if (inputdata1.floatdata[lauf] < Ergebnis[lauf] ) { inputdata1.floatdata[lauf] = Ergebnis[lauf]; outputdata.floatdata[lauf] = (int) winkel_lauf; } lauf++; } } } } /* Ende winkel_lauf */ //FF MPI_Barrier (MPI_COMM_WORLD); /* Ergebnisse einsammeln (myrank 0)*/ if (myrank == 0) { for (lauf_pe = 1; lauf_pe < mysize; lauf_pe++) { MPI_Recv (&Ergebnis[0], Vx_max * Vy_max * Vz_max, MPI_FLOAT, lauf_pe, 99, MPI_COMM_WORLD, &status); MPI_Recv (&conv[0], Vx_max * Vy_max * Vz_max, MPI_FLOAT, lauf_pe, 98, MPI_COMM_WORLD, &status); /* use conv as temporary memory for angles */ for (lauf = 0; lauf < Vx_max * Vy_max * Vz_max; lauf++) { if (inputdata1.floatdata[lauf] < Ergebnis[lauf]) { inputdata1.floatdata[lauf] = Ergebnis[lauf]; outputdata.floatdata[lauf] = conv[lauf]; } } } /*Ergebnisse eingesammelt */ } // myrank > 0: Ergebnisse senden else { MPI_Send (inputdata1.floatdata, Vx_max * Vy_max * Vz_max, MPI_FLOAT, 0, 99, MPI_COMM_WORLD); MPI_Send (outputdata.floatdata, Vx_max * Vy_max * Vz_max, MPI_FLOAT, 0, 98, MPI_COMM_WORLD); } MPI_Barrier (MPI_COMM_WORLD); // nicht normalisiertes Volumen und Winkel rausschreiben subc[0]=subc[0]+Rx_max/2; subc[1]=subc[1]+Rx_max/2; subc[2]=subc[2]+Rx_max/2; if (myrank==0) { offset[0]=Rx_max/2; offset[1]=Rx_max/2; offset[2]=Rx_max/2; dimarray[0]=dim_fft; dimarray[1]=dim_fft; dimarray[2]=dim_fft; strcpy (name, argv[3]); strcat (name, ".ccf"); write_em_subsubregion (name, &inputdata1,subc,range_sub,offset,dimarray); strcpy (name, argv[3]); strcat (name, ".ang"); write_em_subsubregion (name, &outputdata,subc,range_sub,offset,dimarray); /* ------------------- normalization - here only PE 0 ---------- */ pastes (&inputdata4.floatdata[0], &Vol_tmpl[0], 1, 1, 1, Rx_max, Ry_max, Rz_max, Vx_max); /* paste mask into zero volume*/ /* 1st local mean */ sort4fftw(&Vol_tmpl_sort[0], &Vol_tmpl[0], Vx_max, Vy_max, Vz_max); rfftwnd_one_real_to_complex (p3, &Vol_tmpl_sort[0], NULL); C3 = (fftw_complex *) & Vol_tmpl_sort[0]; /* Convolution of volume and mask */ scale = 1.0 / ((double)Vx_max * (double)Vy_max * (double)Vz_max ); convolve( &PointVolume[0], &C3[0], Vx_max, Vy_max, Vz_max, scale); rfftwnd_one_complex_to_real (pi3, &C3[0], NULL); PointCorr = (fftw_real *) & C3[0]; /* Umsortieren der Daten */ sortback4fftw( &PointCorr[0], &conv[0], Vx_max, Vy_max, Vz_max); /* 2nd : convolution of square and resorting*/ pastes (&inputdata4.floatdata[0], &Vol_tmpl[0], 1, 1, 1, Rx_max, Ry_max, Rz_max, Vx_max); /* paste mask into zero volume*/ sort4fftw( &Vol_tmpl_sort[0], &Vol_tmpl[0], Vx_max, Vy_max, Vz_max); rfftwnd_one_real_to_complex (p3, &Vol_tmpl_sort[0], NULL); C3 = (fftw_complex *) & Vol_tmpl_sort[0]; PointSq = (fftw_complex *) & sqconv[0];// set pointer to FFT of square convolve( &PointSq[0], &C3[0], Vx_max, Vy_max, Vz_max, scale); rfftwnd_one_complex_to_real (pi3, &C3[0], NULL); PointCorr = (fftw_real *) &C3[0]; //FF lauf = 0; for (k = 0; k < Vz_max; k++) { for (j = 0; j < Vy_max; j++) { for (i = 0; i < Vx_max; i++) { conv[lauf] = sqrt(PointCorr[i + 2 * (Vx_max / 2 + 1) * (j + Vy_max * k)] - conv[lauf]*conv[lauf]/((float) n) ) ;/*local variance*/ lauf++; } } } cross(&conv[0], Vx_max); /* perform division */ for (lauf = 0; k < Vz_max*Vy_max*Vz_max; lauf++) { if (conv[lauf] > eps) { inputdata1[lauf].floatdata = inputdata1[lauf].floatdata/conv[lauf]; } else { inputdata1[lauf].floatdata = 0; } } strcpy (name, argv[3]); strcat (name, ".ccf.norm"); write_em_subsubregion (name, &inputdata1,subc,range_sub,offset,dimarray); } MPI_Barrier (MPI_COMM_WORLD); } } /* these are the new brackets from the subregion_read , SN */ } free(Ergebnis); free(inputdata1.floatdata); free(inputdata2.floatdata); free(inputdata3.floatdata); free(inputdata4.floatdata); rfftwnd_destroy_plan(p3); rfftwnd_destroy_plan(pi3); rfftwnd_destroy_plan(r3); rfftwnd_destroy_plan(ri3); free(Volume); free(sqconv); free(conv); free(Rot_tmpl); free(Vol_tmpl_sort); free(outputdata.floatdata); if (myrank==0) { printf ("oscar finished. "); tack (&start); fflush(stdout); } MPI_Finalize(); /* end main */ }
void test_speed_nd_aux(struct size sz, fftw_direction dir, int flags, int specific) { fftw_real *in; fftwnd_plan plan; double t; fftw_time begin, end; int i, N; /* only bench in-place multi-dim transforms */ flags |= FFTW_IN_PLACE; N = 1; for (i = 0; i < sz.rank - 1; ++i) N *= sz.narray[i]; N *= (sz.narray[i] + 2); in = (fftw_real *) fftw_malloc(N * howmany_fields * sizeof(fftw_real)); if (specific) { begin = fftw_get_time(); plan = rfftwnd_create_plan_specific(sz.rank, sz.narray, dir, speed_flag | flags | wisdom_flag | no_vector_flag, in, howmany_fields, 0, 1); } else { begin = fftw_get_time(); plan = rfftwnd_create_plan(sz.rank, sz.narray, dir, speed_flag | flags | wisdom_flag | no_vector_flag); } end = fftw_get_time(); CHECK(plan != NULL, "can't create plan"); t = fftw_time_to_sec(fftw_time_diff(end, begin)); WHEN_VERBOSE(2, printf("time for planner: %f s\n", t)); WHEN_VERBOSE(2, printf("\n")); WHEN_VERBOSE(2, (rfftwnd_print_plan(plan))); WHEN_VERBOSE(2, printf("\n")); if (dir == FFTW_REAL_TO_COMPLEX) { FFTW_TIME_FFT(rfftwnd_real_to_complex(plan, howmany_fields, in, howmany_fields, 1, 0, 0, 0), in, N * howmany_fields, t); } else { FFTW_TIME_FFT(rfftwnd_complex_to_real(plan, howmany_fields, (fftw_complex *) in, howmany_fields, 1, 0, 0, 0), in, N * howmany_fields, t); } rfftwnd_destroy_plan(plan); WHEN_VERBOSE(1, printf("time for one fft: %s", smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N))); WHEN_VERBOSE(1, printf("\"mflops\" = 5/2 (N log2 N) / (t in microseconds)" " = %f\n", 0.5 * howmany_fields * mflops(t, N))); fftw_free(in); WHEN_VERBOSE(1, printf("\n")); }
void test_planner(int rank) { /* * create and destroy many plans, at random. Check the * garbage-collecting allocator of twiddle factors */ int i, dim; int r, s; fftw_plan p[PLANNER_TEST_SIZE]; fftwnd_plan pnd[PLANNER_TEST_SIZE]; int *narr, maxdim; chk_mem_leak = 0; verbose--; please_wait(); if (rank < 1) rank = 1; narr = (int *) fftw_malloc(rank * sizeof(int)); maxdim = (int) pow(8192.0, 1.0/rank); for (i = 0; i < PLANNER_TEST_SIZE; ++i) { p[i] = (fftw_plan) 0; pnd[i] = (fftwnd_plan) 0; } for (i = 0; i < PLANNER_TEST_SIZE * PLANNER_TEST_SIZE; ++i) { r = rand(); if (r < 0) r = -r; r = r % PLANNER_TEST_SIZE; for (dim = 0; dim < rank; ++dim) { do { s = rand(); if (s < 0) s = -s; s = s % maxdim + 1; } while (s == 0); narr[dim] = s; } if (rank == 1) { if (p[r]) rfftw_destroy_plan(p[r]); p[r] = rfftw_create_plan(narr[0], random_dir(), measure_flag | wisdom_flag); if (paranoid && narr[0] < 200) test_correctness(narr[0]); } if (pnd[r]) rfftwnd_destroy_plan(pnd[r]); pnd[r] = rfftwnd_create_plan(rank, narr, random_dir(), measure_flag | wisdom_flag); if (i % (PLANNER_TEST_SIZE * PLANNER_TEST_SIZE / 20) == 0) { WHEN_VERBOSE(0, printf("test planner: so far so good\n")); WHEN_VERBOSE(0, printf("test planner: iteration %d out of %d\n", i, PLANNER_TEST_SIZE * PLANNER_TEST_SIZE)); } } for (i = 0; i < PLANNER_TEST_SIZE; ++i) { if (p[i]) rfftw_destroy_plan(p[i]); if (pnd[i]) rfftwnd_destroy_plan(pnd[i]); } fftw_free(narr); verbose++; chk_mem_leak = 1; }
void testnd_in_place(int rank, int *n, fftwnd_plan validated_plan, int alternate_api, int specific) { int istride, ostride, howmany; int N, dim, i, j, k; int nc, nhc, nr; fftw_real *in1, *out3; fftw_complex *in2, *out1, *out2; fftwnd_plan p, ip; int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE; if (coinflip()) flags |= FFTW_THREADSAFE; N = nc = nr = nhc = 1; for (dim = 0; dim < rank; ++dim) N *= n[dim]; if (rank > 0) { nr = n[rank - 1]; nc = N / nr; nhc = nr / 2 + 1; } in1 = (fftw_real *) fftw_malloc(2 * nhc * nc * MAX_STRIDE * sizeof(fftw_real)); out3 = in1; out1 = (fftw_complex *) in1; in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); if (alternate_api && specific && (rank == 2 || rank == 3)) { if (rank == 2) { p = rfftw2d_create_plan_specific(n[0], n[1], FFTW_REAL_TO_COMPLEX, flags, in1, MAX_STRIDE, 0, 0); ip = rfftw2d_create_plan_specific(n[0], n[1], FFTW_COMPLEX_TO_REAL, flags, in1, MAX_STRIDE, 0, 0); } else { p = rfftw3d_create_plan_specific(n[0], n[1], n[2], FFTW_REAL_TO_COMPLEX, flags, in1, MAX_STRIDE, 0, 0); ip = rfftw3d_create_plan_specific(n[0], n[1], n[2], FFTW_COMPLEX_TO_REAL, flags, in1, MAX_STRIDE, 0, 0); } } else if (specific) { p = rfftwnd_create_plan_specific(rank, n, FFTW_REAL_TO_COMPLEX, flags, in1, MAX_STRIDE, in1, MAX_STRIDE); ip = rfftwnd_create_plan_specific(rank, n, FFTW_COMPLEX_TO_REAL, flags, in1, MAX_STRIDE, in1, MAX_STRIDE); } else if (alternate_api && (rank == 2 || rank == 3)) { if (rank == 2) { p = rfftw2d_create_plan(n[0], n[1], FFTW_REAL_TO_COMPLEX, flags); ip = rfftw2d_create_plan(n[0], n[1], FFTW_COMPLEX_TO_REAL, flags); } else { p = rfftw3d_create_plan(n[0], n[1], n[2], FFTW_REAL_TO_COMPLEX, flags); ip = rfftw3d_create_plan(n[0], n[1], n[2], FFTW_COMPLEX_TO_REAL, flags); } } else { p = rfftwnd_create_plan(rank, n, FFTW_REAL_TO_COMPLEX, flags); ip = rfftwnd_create_plan(rank, n, FFTW_COMPLEX_TO_REAL, flags); } CHECK(p != NULL && ip != NULL, "can't create plan"); for (i = 0; i < nc * nhc * 2 * MAX_STRIDE; ++i) out3[i] = 0; for (istride = 1; istride <= MAX_STRIDE; ++istride) { /* generate random inputs */ for (i = 0; i < nc; ++i) for (j = 0; j < nr; ++j) { c_re(in2[i * nr + j]) = DRAND(); c_im(in2[i * nr + j]) = 0.0; for (k = 0; k < istride; ++k) in1[(i * nhc * 2 + j) * istride + k] = c_re(in2[i * nr + j]); } fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1); howmany = ostride = istride; WHEN_VERBOSE(2, printf("\n testing in-place stride %d...", istride)); if (howmany != 1 || istride != 1 || ostride != 1 || coinflip()) rfftwnd_real_to_complex(p, howmany, in1, istride, 1, out1, ostride, 1); else rfftwnd_one_real_to_complex(p, in1, NULL); for (i = 0; i < nc; ++i) for (k = 0; k < howmany; ++k) CHECK(compute_error_complex(out1 + i * nhc * ostride + k, ostride, out2 + i * nr, 1, nhc) < TOLERANCE, "in-place (r2c): wrong answer"); if (howmany != 1 || istride != 1 || ostride != 1 || coinflip()) rfftwnd_complex_to_real(ip, howmany, out1, ostride, 1, out3, istride, 1); else rfftwnd_one_complex_to_real(ip, out1, NULL); for (i = 0; i < nc * nhc * 2 * istride; ++i) out3[i] *= 1.0 / N; for (i = 0; i < nc; ++i) for (k = 0; k < howmany; ++k) CHECK(compute_error(out3 + i * nhc * 2 * istride + k, istride, (fftw_real *) (in2 + i * nr), 2, nr) < TOLERANCE, "in-place (c2r): wrong answer (check 2)"); } rfftwnd_destroy_plan(p); rfftwnd_destroy_plan(ip); fftw_free(out2); fftw_free(in2); fftw_free(in1); }
void testnd_out_of_place(int rank, int *n, fftwnd_plan validated_plan) { int istride, ostride; int N, dim, i, j, k; int nc, nhc, nr; fftw_real *in1, *out3; fftw_complex *in2, *out1, *out2; fftwnd_plan p, ip; int flags = measure_flag | wisdom_flag; if (coinflip()) flags |= FFTW_THREADSAFE; N = nc = nr = nhc = 1; for (dim = 0; dim < rank; ++dim) N *= n[dim]; if (rank > 0) { nr = n[rank - 1]; nc = N / nr; nhc = nr / 2 + 1; } in1 = (fftw_real *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_real)); out3 = (fftw_real *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_real)); out1 = (fftw_complex *) fftw_malloc(nhc * nc * MAX_STRIDE * sizeof(fftw_complex)); in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); p = rfftwnd_create_plan(rank, n, FFTW_REAL_TO_COMPLEX, flags); ip = rfftwnd_create_plan(rank, n, FFTW_COMPLEX_TO_REAL, flags); CHECK(p != NULL && ip != NULL, "can't create plan"); for (istride = 1; istride <= MAX_STRIDE; ++istride) { /* generate random inputs */ for (i = 0; i < nc; ++i) for (j = 0; j < nr; ++j) { c_re(in2[i * nr + j]) = DRAND(); c_im(in2[i * nr + j]) = 0.0; for (k = 0; k < istride; ++k) in1[(i * nr + j) * istride + k] = c_re(in2[i * nr + j]); } for (i = 0; i < N * istride; ++i) out3[i] = 0.0; fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1); for (ostride = 1; ostride <= MAX_STRIDE; ++ostride) { int howmany = (istride < ostride) ? istride : ostride; WHEN_VERBOSE(2, printf("\n testing stride %d/%d...", istride, ostride)); if (howmany != 1 || istride != 1 || ostride != 1 || coinflip()) rfftwnd_real_to_complex(p, howmany, in1, istride, 1, out1, ostride, 1); else rfftwnd_one_real_to_complex(p, in1, out1); for (i = 0; i < nc; ++i) for (k = 0; k < howmany; ++k) CHECK(compute_error_complex(out1 + i * nhc * ostride + k, ostride, out2 + i * nr, 1, nhc) < TOLERANCE, "out-of-place (r2c): wrong answer"); if (howmany != 1 || istride != 1 || ostride != 1 || coinflip()) rfftwnd_complex_to_real(ip, howmany, out1, ostride, 1, out3, istride, 1); else rfftwnd_one_complex_to_real(ip, out1, out3); for (i = 0; i < N * istride; ++i) out3[i] *= 1.0 / N; if (istride == howmany) CHECK(compute_error(out3, 1, in1, 1, N * istride) < TOLERANCE, "out-of-place (c2r): wrong answer"); for (i = 0; i < nc; ++i) for (k = 0; k < howmany; ++k) CHECK(compute_error(out3 + i * nr * istride + k, istride, (fftw_real *) (in2 + i * nr), 2, nr) < TOLERANCE, "out-of-place (c2r): wrong answer (check 2)"); } } rfftwnd_destroy_plan(p); rfftwnd_destroy_plan(ip); fftw_free(out3); fftw_free(out2); fftw_free(in2); fftw_free(out1); fftw_free(in1); }
/* Use fftw2. */ static int invfft1( IMAGE *dummy, IMAGE *in, IMAGE *out ) { IMAGE *cmplx = im_open_local( dummy, "invfft1-1", "t" ); IMAGE *real = im_open_local( out, "invfft1-2", "t" ); const int half_width = in->Xsize / 2 + 1; /* Transform to halfcomplex here. */ double *half_complex = IM_ARRAY( dummy, in->Ysize * half_width * 2, double ); rfftwnd_plan plan; int x, y; double *q, *p; if( !cmplx || !real || !half_complex || im_pincheck( in ) || im_poutcheck( out ) ) return( -1 ); if( in->Coding != IM_CODING_NONE || in->Bands != 1 ) { im_error( "im_invfft", _( "one band uncoded only" ) ); return( -1 ); } /* Make dp complex image for input. */ if( im_clip2fmt( in, cmplx, IM_BANDFMT_DPCOMPLEX ) ) return( -1 ); /* Make mem buffer real image for output. */ if( im_cp_desc( real, in ) ) return( -1 ); real->BandFmt = IM_BANDFMT_DOUBLE; if( im_setupout( real ) ) return( -1 ); /* Build half-complex image. */ q = half_complex; for( y = 0; y < cmplx->Ysize; y++ ) { p = ((double *) cmplx->data) + y * in->Xsize * 2; for( x = 0; x < half_width; x++ ) { q[0] = p[0]; q[1] = p[1]; p += 2; q += 2; } } /* Make the plan for the transform. Yes, they really do use nx for * height and ny for width. */ if( !(plan = rfftw2d_create_plan( in->Ysize, in->Xsize, FFTW_BACKWARD, FFTW_MEASURE | FFTW_USE_WISDOM )) ) { im_error( "im_invfft", _( "unable to create transform plan" ) ); return( -1 ); } rfftwnd_one_complex_to_real( plan, (fftw_complex *) half_complex, (fftw_real *) real->data ); rfftwnd_destroy_plan( plan ); /* Copy to out. */ if( im_copy( real, out ) ) return( -1 ); return( 0 ); }
void destroy(int status) { /* External Variables. All external variables are defined in main.h */ extern double *Kx, *Kz, **K2, *cfl2; extern double **Q, **Qp, **Qpp, **R, **Rp, **Qw, **Qpw, **Rw, **Qs, **Qps, **Qpps, **Rs, **Rps, *Rp0, **Rpw, **Qppw, *Rpp0; extern double *Uadd, *Vadd, *Vpadd; extern double *Qy; extern double *W; extern mcomplex ****U, ****C; /* state variables */ extern mcomplex **Fa, **Fb, **TM; extern mcomplex *fa, *fb, *tm; extern double **MZ; extern double ***M; extern mcomplex ****IU, ****IC; /* incremental state variables */ extern mcomplex **IFa, **IFb, **ITM; extern mcomplex *Ifa, *Ifb, *Itm; extern mcomplex ****AU, ****AC; /* adjoint variables and will use the same other variables used in state equations */ extern mcomplex ****IAU, ****IAC; /* incremental adjoint variables */ extern mcomplex **Uxbt, **Uzb; /* variables used to store dux duz evaluated at y=-1 used for computing boundary conditions for incremental state equations */ extern mcomplex **Uxb, **Uzb; /* variables used to store dux duz evaluated at y=-1 from previous state used for boundary conditions for incremental state equations */ extern mcomplex **IUxb, **IUzb; extern mcomplex **IAUxb, **IAUzb; extern mcomplex **AUxb, **AUzb; /* variables used to store dux duz evaluated at y=-1 used for computing boundary conditions for incremental state equations */ extern fftw_complex ***CT, ***ICT; /* variables used in fft */ extern fftw_plan pf1, pf2; extern fftw_plan Ipf1, Ipf2; extern rfftwnd_plan pr1, pr2; extern mcomplex *****MC, *****MIC; /* variables used to store state and incremental state solutions between two check points. */ extern mcomplex ****MU, ****MIU; /* variables used to store manufacture solutions */ extern mcomplex ****LU, ****LIU; if (status & DESTROY_STATUS_FFTW) { fftw_destroy_plan(pf1); fftw_destroy_plan(pf2); rfftwnd_destroy_plan(pr1); rfftwnd_destroy_plan(pr2); fftw_destroy_plan(Ipf1); fftw_destroy_plan(Ipf2); } if (status & DESTROY_STATUS_GETMEM) { freec4Darray(U); freec4Darray(C); freec3Darray(CT); freecMatrix(Fa); freecMatrix(Fb); freed3Darray(M); freecMatrix(TM); freedMatrix(MZ); freecVector(fa); freecVector(fb); freecVector(tm); freedVector(cfl2); freec4Darray(IU); freec4Darray(IC); freecMatrix(IFa); freecMatrix(IFb); freecMatrix(ITM); freecVector(Ifa); freecVector(Ifb); freecVector(Itm); freec3Darray(ICT); freecMatrix(Uxbt); freecMatrix(Uzbt); freecMatrix(Uxb); freecMatrix(Uzb); freec4Darray(AU); freec4Darray(IAU); freec4Darray(AC); freec4Darray(IAC); freec5Darray(MC); freec5Darray(MIC); freec4Darray(MU); freec4Darray(MIU); freec4Darray(LU); freec4Darray(LIU); freecMatrix(AUxb); freecMatrix(AUzb); freecMatrix(IUzb); freecMatrix(IUxb); freecMatrix(IAUxb); freecMatrix(IAUzb); freecMatrix(grad); freecMatrix(GUxb); freecMatrix(GUzb); freecMatrix(GIUxb); freecMatrix(GIUzb); freecMatrix(HUxb); freecMatrix(HUzb); freecMatrix(HAUxb); freecMatrix(HAUzb); freecMatrix(hess); } if (status & DESTROY_STATUS_WAVENUMS) { freedVector(Kx); freedVector(Kz); freedMatrix(K2); } if (status & DESTROY_STATUS_LEGENDRE) { freedMatrix(Q); freedMatrix(Qp); freedMatrix(Qpp); freedMatrix(R); freedMatrix(Rp); freedMatrix(Qw); freedMatrix(Qpw); freedMatrix(Rw); freedMatrix(Qs); freedMatrix(Qps); freedMatrix(Qpps); freedMatrix(Rs); freedMatrix(Rps); freedVector(Rp0); freedVector(W); freedVector(Vadd); freedVector(Vpadd); freedVector(Uadd); freedVector(Qy); freedVector(Rpp0); freedMatrix(Qppw); freedMatrix(Rpw); } }