/** * Initializes the library. * @param nthreads The number of OpenMP threads to use for execution of local FFT. * @return 0 if successful */ int accfft_init(int nthreads){ int threads_ok=1; if (threads_ok) threads_ok = fftw_init_threads(); if (threads_ok) fftw_plan_with_nthreads(nthreads); return (!threads_ok); }
///Initialize FFTW void FFTInit(int threads) { #ifdef FFTW_WITH_THREADS fftw_init_threads(); fftw_plan_with_nthreads(threads); #endif }
void dct(int N, double *in, double *out){ // compute variables int ii; fftw_plan my_plan; fftw_init_threads(); // define plan fftw_plan_with_nthreads(omp_get_max_threads()); my_plan = fftw_plan_r2r_1d(N, in, out, FFTW_REDFT00, FFTW_ESTIMATE); //execute plan fftw_execute(my_plan); // scale output for(ii=0; ii < N; ii++){ if(ii == 0 || ii == N-1){ out[ii] = out[ii]/(double)(N-1)/2.0; } else{ out[ii] = out[ii]/(double)(N-1); } } // destroy plan fftw_destroy_plan(my_plan); fftw_cleanup_threads(); }
convolution_plan::convolution_plan(int width, int height, int kw, int mode, int threadMaxCount) { switch (mode) { case 0: this->width = width; this->height = height; break; case 1: this->width = width + kw - 1; this->height = height + kw - 1; break; default: throw std::invalid_argument("Warning: 2d convolution plan: Invalid mode"); } if (threadMaxCount > 1) { fftw_init_threads(); // This MUST come before all other fftw calls fftw_plan_with_nthreads(threadMaxCount); } this->depth = 1; this->dim = 2; this->kw = kw; this->threadMaxCount = threadMaxCount; fftw_complex* benchmarkArray1 = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * this->width * this->height); fftw_complex* benchmarkArray2 = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * this->width * this->height); this->forwardPlan = fftw_plan_dft_2d(this->height, this->width, benchmarkArray1, benchmarkArray2, FFTW_FORWARD, FFTW_MEASURE); this->backwardPlan = fftw_plan_dft_2d(this->height, this->width, benchmarkArray1, benchmarkArray2, FFTW_BACKWARD, FFTW_MEASURE); fftw_free(benchmarkArray1); fftw_free(benchmarkArray2); this->staticKernel = NULL; }
void plan_fftw( Search_settings *sett, Command_line_opts *opts, FFTW_plans *plans, FFTW_arrays *fftw_arr, Aux_arrays *aux_arr) { char hostname[512], wfilename[512]; FILE *wisdom; /* Imports a "wisdom file" containing information * (previous tests) about how to optimally compute Fourier * transforms on a given machine. If wisdom file is not present, * it will be created after the test (measure) runs * of the fft_plans are performed below * (see http://www.fftw.org/fftw3_doc/Wisdom.html) */ fftw_init_threads(); gethostname(hostname, 512); sprintf (wfilename, "wisdom-%s.dat", hostname); if((wisdom = fopen (wfilename, "r")) != NULL) { fftw_import_wisdom_from_file(wisdom); fclose (wisdom); } sett->Ninterp = sett->interpftpad*sett->nfft; // array length (xa, xb) is max{fftpad*nfft, Ninterp} fftw_arr->arr_len = (sett->fftpad*sett->nfft > sett->Ninterp ? sett->fftpad*sett->nfft : sett->Ninterp); // fftw_arr->xa = fftw_malloc(2*fftw_arr->arr_len*sizeof(fftw_complex)); //fftw_arr->xb = fftw_arr->xa + fftw_arr->arr_len; fftw_arr->xa = fftw_malloc(fftw_arr->arr_len*sizeof(fftw_complex)); fftw_arr->xb = fftw_malloc(fftw_arr->arr_len*sizeof(fftw_complex)); sett->nfftf = sett->fftpad*sett->nfft; // Change FFTW_MEASURE to FFTW_PATIENT for more optimized plan // (takes more time to generate the wisdom file) plans->plan = fftw_plan_dft_1d(sett->nfftf, fftw_arr->xa, fftw_arr->xa, FFTW_FORWARD, FFTW_MEASURE); fftw_plan_with_nthreads(omp_get_max_threads()); plans->pl_int = fftw_plan_dft_1d(sett->nfft, fftw_arr->xa, fftw_arr->xa, FFTW_FORWARD, FFTW_MEASURE); plans->pl_inv = fftw_plan_dft_1d(sett->Ninterp, fftw_arr->xa, fftw_arr->xa, FFTW_BACKWARD, FFTW_MEASURE); // Generates a wisdom FFT file if there is none if((wisdom = fopen(wfilename, "r")) == NULL) { wisdom = fopen(wfilename, "w"); fftw_export_wisdom_to_file(wisdom); } fclose (wisdom); } // end of FFT plans
void set_num_threads(int nr) { num_threads = nr; omp_set_num_threads(nr); int ret = fftw_init_threads(); if (ret == 0) {cout << "error" << endl; exit(1);} fftw_plan_with_nthreads(nr); }
FFT::FFT(size_t threads) : forward_plans(), backward_plans() { if (!fftw_init_threads()) { std::cerr << "Unable to init threads in fftw\n"; throw 1; } fftw_plan_with_nthreads(threads); }
void cSystem::startNthreadsFFTW(void) { require( fftw_init_threads() != 0, "void cSystem::startNthreadsFFTW(void)"); require(fftwf_init_threads() != 0, "void cSystem::startNthreadsFFTW(void)"); fftw_plan_with_nthreads(getNumProcessors()); fftwf_plan_with_nthreads(getNumProcessors()); std::cout << "FFTW multithreading is turned on: " << getNumProcessors() << " threads\n\n"; }
void fftInitThreading() { #ifdef _OPENMP #ifdef INTEL_MKL_VERSION // NOTE: Using Intel MKL (and threading particularly) // could require a lot of additional setup fftw3_mkl.number_of_user_threads = omp_get_max_threads(); #else fftw_init_threads(); fftw_plan_with_nthreads(omp_get_max_threads()); #endif #endif }
int main(void) { printf("nthreads = %d\n", nfft_get_omp_num_threads()); /* init */ fftw_init_threads(); printf("Computing an NDSFT, an NFSFT, an adjoint NDSFT, and an adjoint NFSFT" "...\n\n"); simple_test_nfsft(); return EXIT_SUCCESS; }
JNIEXPORT void JNICALL Java_br_usp_ime_dspbenchmarking_algorithms_fftw_FFTW_initThreadsJNI(JNIEnv *pEnv, jobject pObj, jint num_of_threads) { if (!threads_initialized && !fftw_init_threads()) { char buff[150]; sprintf(buff, "Failed to initialize thread"); (*pEnv)->ThrowNew(pEnv, (*pEnv)->FindClass(pEnv, "java/lang/Exception"), buff); } else { fftw_plan_with_nthreads(num_of_threads); threads_enabled = 1; threads_initialized = 1; __android_log_print(ANDROID_LOG_INFO, LOG_TAG, "Threads enabled"); } }
void FFTHandler::init(long arg_n){ //#ifndef DEBUG fftw_init_threads(); fftw_plan_with_nthreads(omp_get_max_threads()); //#endif n = arg_n; leased=0; //fprintf(stderr, "Initializing fft plan of size [%ld]\n", n); memoryPool.resize(1); memoryPool[0] = (double*)fftw_malloc(sizeof(double)*2*(n/2+1)); fftForwardPlan = fftw_plan_dft_r2c_1d(n, memoryPool[0], (fftw_complex*)memoryPool[0],FFTW_MEASURE); fftReversePlan = fftw_plan_dft_c2r_1d(n, (fftw_complex*)memoryPool[0], memoryPool[0],FFTW_MEASURE); }
void fft_init() { fftw_init_threads(); fftw_plan_with_nthreads(6); int i; for(i=0;i<2;i++){ fft_in[i] = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * width[i]*height[i]); fft_out[i] = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * width[i]*height[i]); fft_plan[i]=fftw_plan_dft_2d(height[i],width[i],fft_in[i],fft_out[i], FFTW_FORWARD, FFTW_ESTIMATE); } }
int cfft2_init(int pad1 /* padding on the first axis */, int nx, int ny /* input data size */, int *nx2, int *ny2 /* padded data size */) /*< initialize >*/ { #ifdef SF_HAS_FFTW #ifdef _OPENMP fftw_init_threads(); sf_warning("Using threaded FFTW3! \n"); fftw_plan_with_nthreads(omp_get_max_threads()); #endif #endif #ifndef SF_HAS_FFTW int i2; #endif nk = n1 = kiss_fft_next_fast_size(nx*pad1); #ifndef SF_HAS_FFTW cfg1 = kiss_fft_alloc(n1,0,NULL,NULL); icfg1 = kiss_fft_alloc(n1,1,NULL,NULL); #endif n2 = kiss_fft_next_fast_size(ny); cc = sf_complexalloc2(n1,n2); dd = sf_complexalloc2(nk,n2); #ifndef SF_HAS_FFTW cfg2 = kiss_fft_alloc(n2,0,NULL,NULL); icfg2 = kiss_fft_alloc(n2,1,NULL,NULL); tmp = (kiss_fft_cpx **) sf_alloc(n2,sizeof(*tmp)); tmp[0] = (kiss_fft_cpx *) sf_alloc(nk*n2,sizeof(kiss_fft_cpx)); for (i2=0; i2 < n2; i2++) { tmp[i2] = tmp[0]+i2*nk; } trace2 = sf_complexalloc(n2); ctrace2 = (kiss_fft_cpx *) trace2; #endif *nx2 = n1; *ny2 = n2; wt = 1.0/(n1*n2); return (nk*n2); }
void FourierTransformer::setThreadsNumber(int tNumber) { if (tNumber != 1) { threadsSetOn = true; nthreads = tNumber; pthread_mutex_lock(&fftw_plan_mutex); if (fftw_init_threads() == 0) { REPORT_ERROR("FFTW cannot init threads (setThreadsNumber)"); } fftw_plan_with_nthreads(nthreads); pthread_mutex_unlock(&fftw_plan_mutex); } }
/****** fft_init ************************************************************ PROTO void fft_init(void) PURPOSE Initialize the FFT routines INPUT -. OUTPUT -. NOTES Global preferences are used for multhreading. AUTHOR E. Bertin (IAP) VERSION 29/11/2006 ***/ void fft_init(void) { if (!firsttimeflag) { #ifdef USE_THREADS if (!fftw_init_threads()) error(EXIT_FAILURE, "*Error*: thread initialization failed in ", "FFTW"); fftw_plan_with_nthreads(prefs.nthreads); QPTHREAD_MUTEX_INIT(&fftmutex, NULL); #endif firsttimeflag = 1; } return; }
// have a look for parallel ffts int parallel_ffts( void ) { // initialise parallel fft ? #ifdef OMP_FFTW if( fftw_init_threads() == 0 ) { return GLU_FAILURE ; } else { // in here I set the number of fftw threads to be the same // as the usual number of parallel threads .. fftw_plan_with_nthreads( Latt.Nthreads ) ; fprintf( stdout , "[PAR] FFTW using %d thread(s) \n" , Latt.Nthreads ) ; } #endif return GLU_SUCCESS ; }
/****** fft_init ************************************************************ PROTO void fft_init(void) PURPOSE Initialize the FFT routines INPUT -. OUTPUT -. NOTES Global preferences are used for multhreading. AUTHOR E. Bertin (IAP) VERSION 26/06/2009 ***/ void fft_init(int nthreads) { if (!firsttimeflag) { #ifdef USE_THREADS if (nthreads > 1) { if (!fftw_init_threads()) error(EXIT_FAILURE, "*Error*: thread initialization failed in ", "FFTW"); fftwf_plan_with_nthreads(prefs.nthreads); } #endif firsttimeflag = 1; } return; }
// have a look for parallel ffts static int parallel_ffts( void ) { // initialise parallel fft ? #ifdef OMP_FFTW if( fftw_init_threads( ) == 0 ) { return FAILURE ; } else { // in here I set the number of fftw threads to be the same // as the usual number of parallel threads .. #pragma omp parallel { nthreads = omp_get_num_threads( ) ; } // set nthreads } fftw_plan_with_nthreads( nthreads ) ; printf("[PAR] FFTW using %d thread(s) \n" , nthreads ) ; #endif return SUCCESS ; }
void ini() { LogLevel = 2; fftw_init_threads(); //<-- Desini aufrufen fftw_plan_with_nthreads(1); iniTools(5517+Parameter_SD_Selector*12345); physicalScale = NaN; physicalScaleError = NaN; GoldstoneRenormFactor = 1.0; GoldstoneRenormFactorError = 0.0; GoldstoneRenormFactorDetermined = false; xmlOutput_Tag = new char*[10000]; xmlOutput_Description = new char*[10000]; xmlOutput_Value = new double[10000]; xmlOutput_ValueError = new double[10000]; xmlOutput_Count = 0; }
/*! this is a straight forward main function, that does the following - load lua config file - get configuration table from config - contruct a preferences class from them - start simulation according to the preferences - dump the result as precified in preferences */ int main(int argc, char * argv[argc]) { fftw_init_threads(); fftw_plan_with_nthreads(4); if (argc != 3) { fprintf(stderr, "usage: %s config.lua result.dat\n", argv[0]); return -1; } lua_State * L = luaL_newstate(); luaL_openlibs(L); preferences_t * prefs = preferences_new(); if (luaL_dofile(L, argv[1])) { fprintf(stderr, "could not load '%s' : %s\n", argv[1], lua_tostring(L, 1)); } else { // get config table lua_getfield(L, LUA_GLOBALSINDEX, "config"); if (lua_isnil(L, -1)) { fprintf(stderr, "table config undefined\n"); } else { // ref config table, so we can access it in preferences_read() prefs->config = luaL_ref(L, LUA_REGISTRYINDEX); if (!preferences_read(L, prefs)) { if (!start_simulation(prefs)) { FILE * fp = fopen(argv[2], "wb"); assert(fp); dump_results(prefs, fp); fclose(fp); } } } } preferences_free(prefs); lua_close(L); fftw_cleanup(); fftw_cleanup_threads(); pthread_exit(NULL); }
void Transformer::init() { //printf("num_threads = %d\n", omp_get_max_threads()); printf("nthreads = %d\n", nfft_get_num_threads()); /* init */ fftw_init_threads(); fftw_plan_with_nthreads(omp_get_max_threads()); /* precomputation (for fast polynomial transform) */ nfsft_precompute(N, 1000.0, 0U, 0U); /* Initialize transform plan using the guru interface. All input and output * arrays are allocated by nfsft_init_guru(). Computations are performed with * respect to L^2-normalized spherical harmonics Y_k^n. The array of spherical * Fourier coefficients is preserved during transformations. The NFFT uses a * cut-off parameter m = 6. See the NFFT 3 manual for details. */ nfsft_init(&plan, N, M); //nfsft_init_guru(&plan, N, M, NFSFT_MALLOC_X | NFSFT_MALLOC_F | // NFSFT_MALLOC_F_HAT | NFSFT_NORMALIZED | NFSFT_PRESERVE_F_HAT, // PRE_PHI_HUT | PRE_PSI | FFTW_INIT | FFT_OUT_OF_PLACE, 6); }
int main(int argc, char *argv[]) { printf("Memmory Allocation...\n"); fftw_init_threads(); fftw_plan_with_nthreads(4); load_files(); init_buffers(); printf("Analysis Process Started...\n"); analysis(); printf("Process Finished.\n"); clean_up_memmory(); fftw_cleanup_threads(); return 0; }
Space_trans::Space_trans(const Config & configSettings):Data(configSettings) { fftw_iodim dims[DIM], howmany_dims[1]; int rank = DIM; int howmany_rank = 1; if(DIM == 1){ dims[0].n = m[0]; dims[0].is = n3; dims[0].os = n3; } if(DIM == 2){ dims[0].n = m[0]; dims[0].is = n3; dims[0].os = n3; dims[1].n = m[1]; dims[1].is = n3*m[0]; dims[1].os = n3*m[0]; } howmany_dims[0].n = n3; howmany_dims[0].is = 1; howmany_dims[0].os = 1; fftw_init_threads(); fftw_plan_with_nthreads(NUM_THREADS); pf = fftw_plan_guru_dft( rank, dims, howmany_rank, howmany_dims, realdata, realdata, FFTW_FORWARD, FFTW_ESTIMATE ); pi = fftw_plan_guru_dft( rank, dims, howmany_rank, howmany_dims, realdata, realdata, FFTW_BACKWARD, FFTW_ESTIMATE ); fftw_plan_with_nthreads(1); }
int main(int argc, char **argv) { int m, psi_flag; #ifdef _OPENMP int nthreads; if (argc != 4) return 1; nthreads = atoi(argv[3]); fftw_init_threads(); omp_set_num_threads(nthreads); #else if (argc != 3) return 1; #endif m = atoi(argv[1]); psi_flag = atoi(argv[2]); bench_openmp(stdin, m, psi_flag); return 0; }
// Run function of whole program static void run(const char *config_filename) { double max_z, min_z; source_object *object; double complex *optical_field; // Initialize FFTW threads fftw_init_threads(); // Load config to memory load_config(config_filename); // Initialize logger initialize_logger(get_integer_value(CONF_LOGGER_DEBUG_MODE)); // Load source object load_source_object(&object, get_string_value(CONF_OBJECT_POINTS_FILE)); // Initialize final optical field initialize_optical_field(&optical_field); // Extract z position extremes of source object extract_object_proportions(object, &min_z, &max_z, OBJECT_DEPTH); // Modified WRP method itself perform_wrp_method(object, optical_field, min_z, max_z); // Numerical reconstruction on hologram perform_numerical_reconstruction(optical_field, min_z); // Memory clear log_info("Deleting all structures from memory\n"); free(optical_field); delete_source_object(object); delete_lookup_table(); delete_config(); }
int main(int argc, char **argv) { int m, nfsft_flags, psi_flags; int nrepeat; int trafo_adjoint, N, M, r; double *x; C *f_hat, *f; #ifdef _OPENMP int nthreads; if (argc != 6) return 1; nthreads = atoi(argv[5]); fftw_init_threads(); omp_set_num_threads(nthreads); #else if (argc != 5) return 1; #endif m = atoi(argv[1]); nfsft_flags = atoi(argv[2]); psi_flags = atoi(argv[3]); nrepeat = atoi(argv[4]); bench_openmp_readfile(stdin, &trafo_adjoint, &N, &M, &x, &f_hat, &f); /* precomputation (for fast polynomial transform) */ nfsft_precompute(N,1000.0,0U,0U); for (r = 0; r < nrepeat; r++) bench_openmp(trafo_adjoint, N, M, x, f_hat, f, m, nfsft_flags, psi_flags); return 0; }
//static void GradientsBase::solveImage(imageType_t const &rLaplaceImage_p, imageType_t &rSolution_p) { int const nRows=rLaplaceImage_p.Height(); int const nCols=rLaplaceImage_p.Width(); int const nChannels=rLaplaceImage_p.NumberOfChannels(); imageType_t::color_space colorSpace=rLaplaceImage_p.ColorSpace(); #ifdef USE_FFTW // adapted from http://www.umiacs.umd.edu/~aagrawal/software.html, AssertColImage(rLaplaceImage_p); // just in case we accidentally change this, because code below believes in double... Assert(typeid(realType_t)==typeid(double)); // check assumption of row major format Assert(rLaplaceImage_p.PixelAddress(0,0)+1==rLaplaceImage_p.PixelAddress(1,0)); rSolution_p.AllocateData(nCols,nRows,nChannels,colorSpace); rSolution_p.ResetSelections(); rSolution_p.Black(); #ifdef USE_THREADS // threaded version int const nElements=nRows*nCols; int const nThreads=Thread::NumberOfThreads(nElements); if(fftw_init_threads()==0){ throw Error("Problem initilizing threads"); } fftw_plan_with_nthreads(nThreads); #endif for(int chan=0;chan<nChannels;++chan){ TimeMessage startSolver(String("FFTW Solver, Channel ")+String(chan)); // FIXME see if fttw_allocate gives us something... imageType_t fcos(nCols,nRows); #if 0 // During experiment, the higher optimization did not give us anything except for an additional delay. May change later. fftw_plan pForward= fftw_plan_r2r_2d(nRows, nCols, const_cast<double *>(rLaplaceImage_p.PixelData(chan)), fcos.PixelData(), FFTW_REDFT10, FFTW_REDFT10, FFTW_MEASURE); fftw_plan pInverse = fftw_plan_r2r_2d(nRows, nCols, fcos.PixelData(), rSolution_p.PixelData(chan), FFTW_REDFT01, FFTW_REDFT01, FFTW_ESTIMATE); #else fftw_plan pForward= fftw_plan_r2r_2d(nRows, nCols, const_cast<double *>(rLaplaceImage_p.PixelData(chan)), fcos.PixelData(), FFTW_REDFT10, FFTW_REDFT10, FFTW_MEASURE); fftw_plan pInverse = fftw_plan_r2r_2d(nRows, nCols, fcos.PixelData(), rSolution_p.PixelData(chan), FFTW_REDFT01, FFTW_REDFT01, FFTW_ESTIMATE); #endif // find DCT fftw_execute(pForward); realType_t const pi=pcl::Pi(); for(int row = 0 ; row < nRows; ++row){ for(int col = 0 ; col < nCols; ++col){ fcos.Pixel(col,row) /= 2*cos(pi*col/( (double) nCols)) - 2 + 2*cos(pi*row/((double) nRows)) - 2; } } fcos.Pixel(0,0)=0.0; // Inverse DCT fftw_execute(pInverse); fftw_destroy_plan(pForward); fftw_destroy_plan(pInverse); } #endif #ifdef USE_PIFFT // use PI FFT based solver by Carlos Milovic F. rLaplaceImage_p.ResetSelections(); rSolution_p.AllocateData(nCols,nRows,nChannels,colorSpace); rSolution_p.ResetSelections(); // current solver handles only one channel per run. for(int chan=0;chan<nChannels;++chan){ TimeMessage startSolver(String("PIFFT Solver, Channel ")+String(chan)); imageType_t tmpImage(nCols,nRows); rLaplaceImage_p.SelectChannel(chan); tmpImage.Assign(rLaplaceImage_p); __SolvePoisson(tmpImage); rSolution_p.SelectChannel(chan); rSolution_p.Mov(tmpImage); } #endif }
//constructor of the class Solver_FFTW::Solver_FFTW(){ /*===============================================*/ Input* initInput = new Input(); numOfXGrid = initInput->getXGridNum(); numOfYGrid = initInput->getYGridNum(); cout << "Input Finished" << endl; cout << "Grids along x axis: " << numOfXGrid << endl; cout << "Grids along y axis: " << numOfYGrid << endl; /*====================================================== Initializing arrays in real space ======================================================*/ v = new double*[numOfXGrid]; w = new double*[numOfXGrid]; initE = 0; temp_Velocity = new double[numOfXGrid*numOfYGrid]; firstD_u = new double[numOfXGrid*numOfYGrid]; secondD_u = new double[numOfXGrid*numOfYGrid]; for(int i = 0; i < numOfXGrid; i++){ v[i] = new double[numOfYGrid]; w[i] = new double[numOfYGrid]; for(int j = 0; j < numOfYGrid; j++){ v[i][j] = initInput->getXVelocity(i,j); w[i][j] = initInput->getYVelocity(i,j); initE += v[i][j]*v[i][j] + w[i][j]*w[i][j]; //calculating the initial energy } } for(int i = 0; i < numOfXGrid*numOfYGrid; i++){ temp_Velocity[i] = 0; firstD_u[i] = 0; secondD_u[i] = 0; } /*===================================================== Initializing first order derivatives =====================================================*/ v_x = new double*[numOfXGrid]; v_y = new double*[numOfXGrid]; w_x = new double*[numOfXGrid]; w_y = new double*[numOfXGrid]; for(int i = 0; i < numOfXGrid; i++){ v_x[i] = new double[numOfYGrid]; v_y[i] = new double[numOfYGrid]; w_x[i] = new double[numOfYGrid]; w_y[i] = new double[numOfYGrid]; for(int j = 0; j < numOfYGrid; j++){ v_x[i][j] = 0; v_y[i][j] = 0; w_x[i][j] = 0; w_y[i][j] = 0; } } /*===================================================== Initializing second order derivatives =====================================================*/ v_x_x = new double*[numOfXGrid]; v_y_y = new double*[numOfXGrid]; w_x_x = new double*[numOfXGrid]; w_y_y = new double*[numOfXGrid]; for(int i = 0; i < numOfXGrid; i++){ v_x_x[i] = new double[numOfYGrid]; v_y_y[i] = new double[numOfYGrid]; w_x_x[i] = new double[numOfYGrid]; w_y_y[i] = new double[numOfYGrid]; for(int j = 0; j < numOfYGrid; j++){ v_x_x[i][j] = 0; v_y_y[i][j] = 0; w_x_x[i][j] = 0; w_y_y[i][j] = 0; } } /*======================================================== Initializing the forces ========================================================*/ externalFx = new double*[numOfXGrid]; externalFy = new double*[numOfXGrid]; for(int i = 0;i < numOfXGrid; i++){ externalFx[i] = new double[numOfYGrid]; externalFy[i] = new double[numOfYGrid]; for(int j = 0;j < numOfYGrid; j++){ externalFx[i][j] = 0; externalFy[i][j] = 0; } } /*======================================================== initializing multiple threads ==========================================================*/ if(fftw_init_threads()){ fftw_plan_with_nthreads(THREADS); cout << "Using "<< THREADS << " threads" << endl << endl; } else { cout << "Using multiple threads failed" << endl; exit(0); } /*===================================================== Initializing arrays in fourier space =====================================================*/ V = (fftw_complex**)fftw_malloc(sizeof(fftw_complex*)*numOfXGrid); W = (fftw_complex**)fftw_malloc(sizeof(fftw_complex*)*numOfXGrid); temp_U = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*numOfXGrid*(numOfYGrid/2+1)); firstD_U = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*numOfXGrid*(numOfYGrid/2+1)); secondD_U = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*numOfXGrid*(numOfYGrid/2+1)); for(int i = 0; i < numOfXGrid; i++){ V[i] = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*(numOfYGrid/2+1)); W[i] = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*(numOfYGrid/2+1)); for(int j = 0; j < numOfYGrid/2 + 1; j++){ V[i][j][0] = 0; V[i][j][1] = 0; W[i][j][0] = 0; W[i][j][1] = 0; } } for(int i = 0; i < numOfXGrid*(numOfYGrid/2+1); i++){ temp_U[i][0] = 0; temp_U[i][1] = 0; firstD_U[i][0] = 0; firstD_U[i][1] = 0; secondD_U[i][0] = 0; secondD_U[i][1] = 0; } temp = (fftw_complex**)fftw_malloc(sizeof(fftw_complex*)*numOfXGrid); for(int i = 0; i < numOfXGrid; i++){ temp[i] = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*(numOfYGrid/2+1)); } /**===================================================== Initializing Plans ======================================================*/ plan_r2c = fftw_plan_dft_r2c_2d(numOfXGrid,numOfYGrid,temp_Velocity,temp_U,FFTW_ESTIMATE); plan_c2r = fftw_plan_dft_c2r_2d(numOfXGrid,numOfYGrid,temp_U,temp_Velocity,FFTW_ESTIMATE); plan_firstD = fftw_plan_dft_c2r_2d(numOfXGrid,numOfYGrid,firstD_U,firstD_u,FFTW_ESTIMATE); plan_secondD = fftw_plan_dft_c2r_2d(numOfXGrid,numOfYGrid,secondD_U,secondD_u,FFTW_ESTIMATE); /*====================================================== initializing output energy file ======================================================*/ stringstream fileNameOfE; fileNameOfE << OUTPUT_PATH << "E" << ".txt"; energy.open(fileNameOfE.str().c_str()); /*====================================================== generating the readMe.txt file ======================================================*/ stringstream fileNameOfReadMe; fileNameOfReadMe << OUTPUT_PATH << "readMe.txt"; readMe.open(fileNameOfReadMe.str().c_str()); readMe << "Nx = " << numOfXGrid << endl; readMe << "Ny = " << numOfYGrid << endl; readMe << "dt = " << TIME_STEP << endl; readMe << "Initial Energy = " << initE << endl; readMe << "Viscosity = "<< VISCOSITY << endl; readMe << "Rescaled Viscosity =" << VISCOSITY/sqrt(initE*(numOfXGrid-1)*(numOfYGrid-1)); readMe.close(); /*====================================================== Initializing Adams array. ======================================================*/ Adams_v = new double**[3]; Adams_w = new double**[3]; for(int i = 0;i < 3; i++){ Adams_v[i] = new double*[numOfXGrid]; Adams_w[i] = new double*[numOfXGrid]; for(int j = 0; j < numOfXGrid; j++){ Adams_v[i][j] = new double[numOfYGrid]; Adams_w[i][j] = new double[numOfYGrid]; for(int k = 0; k < numOfYGrid; k++){ Adams_v[i][j][k] = 0; Adams_w[i][j][k] = 0; } } } /*==================================================*/ return; }
void init_common(void) { /* This routine will initialize everything */ int i,j,k; DEBUG_START_FUNC; #ifdef MPI_SUPPORT #ifdef FFTW3_MPI_SUPPORT fftw_mpi_init(); #endif #endif #ifdef _OPENMP if( !(fftw_init_threads()) ) ERROR_HANDLER( ERROR_CRITICAL, "Threads initialisation failed"); #endif /* We start with the coordinate system */ kx = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX ); if (kx == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for kx allocation"); ky = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX ); if (ky == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for ky allocation"); kz = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX ); if (kz == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for kz allocation"); kxt = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX ); if (kxt == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for kxt allocation"); kyt = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX ); if (kyt == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for kyt allocation"); kzt = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX ); if (kzt == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for kzt allocation"); k2t = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX ); if (k2t == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for k2t allocation"); ik2t = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX ); if (ik2t == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for ik2t allocation"); for( i = 0; i < NX_COMPLEX / NPROC; i++) { for( j = 0; j < NY_COMPLEX; j++) { for( k = 0; k < NZ_COMPLEX; k++) { kx[ IDX3D ] = (2.0 * M_PI) / param.lx * (fmod( NX_COMPLEX * rank / NPROC + i + (NX_COMPLEX / 2) , NX_COMPLEX ) - NX_COMPLEX / 2 ); #ifdef WITH_2D ky[ IDX3D ] = (2.0 * M_PI) / param.ly * j; kz[ IDX3D ] = 0.0; #else ky[ IDX3D ] = (2.0 * M_PI) / param.ly * (fmod( j + (NY_COMPLEX / 2) , NY_COMPLEX ) - NY_COMPLEX / 2 ); kz[ IDX3D ] = (2.0 * M_PI) / param.lz * k; #endif kxt[ IDX3D ]= kx[IDX3D]; kyt[ IDX3D ]= ky[IDX3D]; kzt[ IDX3D ]= kz[IDX3D]; k2t[ IDX3D ] = kxt[IDX3D] * kxt[IDX3D] + kyt[IDX3D] * kyt[IDX3D] + kzt[IDX3D] * kzt[IDX3D]; if ( k2t[IDX3D] == 0.0 ) ik2t[IDX3D] = 1.0; else ik2t[IDX3D] = 1.0 / k2t[IDX3D]; } } } kxmax = 2.0 * M_PI/ param.lx * ( (NX / 2) - 1); kymax = 2.0 * M_PI/ param.ly * ( (NY / 2) - 1); kzmax = 2.0 * M_PI/ param.lz * ( (NZ / 2) - 1); #ifdef WITH_2D kzmax = 0.0; #endif kmax=pow(kxmax*kxmax+kymax*kymax+kzmax*kzmax,0.5); /* Initialize the dealiazing mask Or the nyquist frequency mask (in case dealiasing is not required) */ mask = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX ); if (mask == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for mask allocation"); for( i = 0; i < NX_COMPLEX/NPROC; i++) { for( j = 0; j < NY_COMPLEX; j++) { for( k = 0; k < NZ_COMPLEX; k++) { mask[ IDX3D ] = 1.0; if(param.antialiasing) { if( fabs( kx[ IDX3D ] ) > 2.0/3.0 * kxmax) mask[ IDX3D ] = 0.0; if( fabs( ky[ IDX3D ] ) > 2.0/3.0 * kymax) mask[ IDX3D ] = 0.0; #ifndef WITH_2D if( fabs( kz[ IDX3D ] ) > 2.0/3.0 * kzmax) mask[ IDX3D ] = 0.0; #endif } else { if ( NX_COMPLEX / NPROC * rank + i == NX_COMPLEX / 2 ) mask[ IDX3D ] = 0.0; if ( j == NY_COMPLEX / 2 ) mask[ IDX3D ] = 0.0; #ifndef WITH_2D if ( k == NZ_COMPLEX ) mask[ IDX3D ] = 0.0; #endif } } } } if(param.antialiasing) { kxmax = kxmax * 2.0 / 3.0; kymax = kymax * 2.0 / 3.0; kzmax = kzmax * 2.0 / 3.0; kmax = kmax * 2.0 / 3.0; } // Allocate fields // Complex fields w1 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w1 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w1 allocation"); w2 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w2 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w2 allocation"); w3 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w3 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w3 allocation"); w4 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w4 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w4 allocation"); w5 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w5 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w5 allocation"); w6 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w6 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w6 allocation"); w7 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w7 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w7 allocation"); w8 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w8 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w8 allocation"); w9 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w9 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w9 allocation"); w10 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w10 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w10 allocation"); w11 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w11 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w11 allocation"); w12 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w12 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w12 allocation"); w13 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w13 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w13 allocation"); w14 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w14 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w14 allocation"); w15 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w15 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w15 allocation"); w16 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w16 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w15 allocation"); w17 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w17 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w15 allocation"); w18 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (w18 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w15 allocation"); wh1 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (wh1 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for wh1 allocation"); wh2 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX); if (wh2 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for wh2 allocation"); wh3 = (double complex *) fftw_malloc( sizeof(double complex) * NX*(NY/2+1)); if (wh3 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for wh3 allocation"); wh4 = (double complex *) fftw_malloc( sizeof(double complex) * NX*(NY/2+1)*NZ); if (wh4 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for wh4 allocation"); wh5 = (double complex *) fftw_malloc( sizeof(double complex) * NX*(NY/2+1)*NZ); if (wh5 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for wh5 allocation"); // Initialize wh1,wh2,wh3; for(i=0;i<NX*(NY/2+1);i++) {wh1[i]=0; wh2[i]=0; wh3[i]=0;} /* Will use the same memory space for real and complex fields */ wr1 = (double *) w1; wr2 = (double *) w2; wr3 = (double *) w3; wr4 = (double *) w4; wr5 = (double *) w5; wr6 = (double *) w6; wr7 = (double *) w7; wr8 = (double *) w8; wr9 = (double *) w9; wr10 = (double *) w10; wr11 = (double *) w11; wr12 = (double *) w12; wr13 = (double *) w13; wr14 = (double *) w14; wr15 = (double *) w15; wr16 = (double *) w16; wr17 = (double *) w17; wr18 = (double *) w18; wrh1 = (double *) wh1; wrh2 = (double *) wh2; wrh3 = (double *) wh3; wrh4 = (double *) wh4; wrh5 = (double *) wh5; // Physic initialisation // init_real_mask(); //set Reynolds numbers using input powers AJB 08/03/12 param.reynolds = pow(10.0,param.reynolds); nu = 1.0 / param.reynolds; #ifdef BOUSSINESQ param.reynolds_th = pow(10.0,param.reynolds_th); nu_th = 1.0 / param.reynolds_th; #endif #ifdef MHD param.reynolds_m = pow(10.0,param.reynolds_m); eta = 1.0 / param.reynolds_m; #endif DEBUG_END_FUNC; return; }