Example #1
0
/**
 * Initializes the library.
 * @param nthreads The number of OpenMP threads to use for execution of local FFT.
 * @return 0 if successful
 */
int accfft_init(int nthreads){
  int threads_ok=1;
  if (threads_ok) threads_ok = fftw_init_threads();
  if (threads_ok) fftw_plan_with_nthreads(nthreads);

  return (!threads_ok);
}
Example #2
0
///Initialize FFTW
void FFTInit(int threads)
{
#ifdef FFTW_WITH_THREADS
  fftw_init_threads();
  fftw_plan_with_nthreads(threads);
#endif
}
Example #3
0
void dct(int N, double *in, double *out){

	// compute variables
	int ii;
	fftw_plan my_plan;
	fftw_init_threads();

	// define plan
	fftw_plan_with_nthreads(omp_get_max_threads());
	my_plan = fftw_plan_r2r_1d(N, in, out, FFTW_REDFT00, FFTW_ESTIMATE);

	//execute plan
	fftw_execute(my_plan);

	// scale output
	for(ii=0; ii < N; ii++){
		if(ii == 0 || ii == N-1){
			out[ii] = out[ii]/(double)(N-1)/2.0;
		}
		else{
			out[ii] = out[ii]/(double)(N-1);
		}
	}

	// destroy plan
	fftw_destroy_plan(my_plan);
	fftw_cleanup_threads();

}
Example #4
0
convolution_plan::convolution_plan(int width, int height, int kw, int mode, int threadMaxCount) {

    switch (mode) {
    case 0:
        this->width = width;
        this->height = height;
        break;
    case 1:
        this->width = width + kw - 1;
        this->height = height + kw - 1;
        break;
    default:
        throw std::invalid_argument("Warning: 2d convolution plan: Invalid mode");
    }

    if (threadMaxCount > 1) {
        fftw_init_threads(); // This MUST come before all other fftw calls
        fftw_plan_with_nthreads(threadMaxCount);
    }

    this->depth = 1;
    this->dim = 2;
    this->kw = kw;
    this->threadMaxCount = threadMaxCount;
    fftw_complex* benchmarkArray1 = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * this->width * this->height);
    fftw_complex* benchmarkArray2 = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * this->width * this->height);

    this->forwardPlan = fftw_plan_dft_2d(this->height, this->width, benchmarkArray1, benchmarkArray2, FFTW_FORWARD, FFTW_MEASURE);
    this->backwardPlan = fftw_plan_dft_2d(this->height, this->width, benchmarkArray1, benchmarkArray2, FFTW_BACKWARD, FFTW_MEASURE);

    fftw_free(benchmarkArray1);
    fftw_free(benchmarkArray2);

    this->staticKernel = NULL;
}
Example #5
0
void plan_fftw(
  Search_settings *sett, 
	Command_line_opts *opts,
	FFTW_plans *plans, 
	FFTW_arrays *fftw_arr, 
	Aux_arrays *aux_arr) {

  char hostname[512], wfilename[512];
  FILE *wisdom;

  /* Imports a "wisdom file" containing information 
   * (previous tests) about how to optimally compute Fourier 
   * transforms on a given machine. If wisdom file is not present, 
   * it will be created after the test (measure) runs 
   * of the fft_plans are performed below 
   * (see http://www.fftw.org/fftw3_doc/Wisdom.html)
   */ 

  fftw_init_threads();

  gethostname(hostname, 512);
  sprintf (wfilename, "wisdom-%s.dat", hostname);
  if((wisdom = fopen (wfilename, "r")) != NULL) {
    fftw_import_wisdom_from_file(wisdom);
    fclose (wisdom);
  }

  sett->Ninterp = sett->interpftpad*sett->nfft; 

  // array length (xa, xb) is max{fftpad*nfft, Ninterp}
  fftw_arr->arr_len = (sett->fftpad*sett->nfft > sett->Ninterp 
                    ? sett->fftpad*sett->nfft : sett->Ninterp);

  //  fftw_arr->xa = fftw_malloc(2*fftw_arr->arr_len*sizeof(fftw_complex));
  //fftw_arr->xb = fftw_arr->xa + fftw_arr->arr_len;
  fftw_arr->xa = fftw_malloc(fftw_arr->arr_len*sizeof(fftw_complex));
  fftw_arr->xb = fftw_malloc(fftw_arr->arr_len*sizeof(fftw_complex));

  sett->nfftf = sett->fftpad*sett->nfft;

  // Change FFTW_MEASURE to FFTW_PATIENT for more optimized plan
  // (takes more time to generate the wisdom file)
  plans->plan = fftw_plan_dft_1d(sett->nfftf, fftw_arr->xa, fftw_arr->xa, FFTW_FORWARD, FFTW_MEASURE);

  fftw_plan_with_nthreads(omp_get_max_threads());

  plans->pl_int = fftw_plan_dft_1d(sett->nfft, fftw_arr->xa, fftw_arr->xa, FFTW_FORWARD, FFTW_MEASURE);
	                             
  plans->pl_inv = fftw_plan_dft_1d(sett->Ninterp, fftw_arr->xa, fftw_arr->xa, FFTW_BACKWARD, FFTW_MEASURE);
	                             
  // Generates a wisdom FFT file if there is none
  if((wisdom = fopen(wfilename, "r")) == NULL) {
    wisdom = fopen(wfilename, "w");
    fftw_export_wisdom_to_file(wisdom);
  }

  fclose (wisdom);

} // end of FFT plans 
void set_num_threads(int nr)
{
    num_threads = nr;
    omp_set_num_threads(nr);
    int ret = fftw_init_threads();
    if (ret == 0) {cout << "error" << endl; exit(1);}
    fftw_plan_with_nthreads(nr);
}
Example #7
0
 FFT::FFT(size_t threads)
   : forward_plans(),
     backward_plans()
 {
   if (!fftw_init_threads()) {
     std::cerr << "Unable to init threads in fftw\n";
     throw 1;
   }
   fftw_plan_with_nthreads(threads);
 }
Example #8
0
void cSystem::startNthreadsFFTW(void)
{
    require( fftw_init_threads() != 0, "void cSystem::startNthreadsFFTW(void)");
    require(fftwf_init_threads() != 0, "void cSystem::startNthreadsFFTW(void)");

     fftw_plan_with_nthreads(getNumProcessors());
    fftwf_plan_with_nthreads(getNumProcessors());

    std::cout << "FFTW multithreading is turned on: " << getNumProcessors() << " threads\n\n";
}
void fftInitThreading() {
#ifdef _OPENMP
#ifdef INTEL_MKL_VERSION
// NOTE: Using Intel MKL (and threading particularly)
//   could require a lot of additional setup
  fftw3_mkl.number_of_user_threads = omp_get_max_threads();
#else
  fftw_init_threads();
  fftw_plan_with_nthreads(omp_get_max_threads());
#endif
#endif
}
Example #10
0
int main(void)
{
  printf("nthreads = %d\n", nfft_get_omp_num_threads());

  /* init */
  fftw_init_threads();

  printf("Computing an NDSFT, an NFSFT, an adjoint NDSFT, and an adjoint NFSFT"
    "...\n\n");
  simple_test_nfsft();
  return EXIT_SUCCESS;
}
Example #11
0
JNIEXPORT void JNICALL Java_br_usp_ime_dspbenchmarking_algorithms_fftw_FFTW_initThreadsJNI(JNIEnv *pEnv, jobject pObj, jint num_of_threads) {
	if (!threads_initialized && !fftw_init_threads()) {
		char buff[150];
		sprintf(buff, "Failed to initialize thread");
		(*pEnv)->ThrowNew(pEnv, (*pEnv)->FindClass(pEnv, "java/lang/Exception"), buff);
	} else {
		fftw_plan_with_nthreads(num_of_threads);
		threads_enabled = 1;
		threads_initialized = 1;
		__android_log_print(ANDROID_LOG_INFO, LOG_TAG, "Threads enabled");
	}
}
Example #12
0
void FFTHandler::init(long arg_n){
//#ifndef DEBUG
	fftw_init_threads();
	fftw_plan_with_nthreads(omp_get_max_threads());
//#endif
	n = arg_n;
	leased=0;
	//fprintf(stderr, "Initializing fft plan of size [%ld]\n", n);
	memoryPool.resize(1);
	memoryPool[0] = (double*)fftw_malloc(sizeof(double)*2*(n/2+1));
	fftForwardPlan = fftw_plan_dft_r2c_1d(n, memoryPool[0], (fftw_complex*)memoryPool[0],FFTW_MEASURE);
	fftReversePlan = fftw_plan_dft_c2r_1d(n, (fftw_complex*)memoryPool[0], memoryPool[0],FFTW_MEASURE);
}
void fft_init()
{
  fftw_init_threads();
  fftw_plan_with_nthreads(6);

  int i;
  for(i=0;i<2;i++){
    fft_in[i] = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * width[i]*height[i]);
    fft_out[i] = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * width[i]*height[i]);
    fft_plan[i]=fftw_plan_dft_2d(height[i],width[i],fft_in[i],fft_out[i],
				 FFTW_FORWARD, FFTW_ESTIMATE);
  }
}
Example #14
0
int cfft2_init(int pad1           /* padding on the first axis */,
               int nx,   int ny   /* input data size */,
               int *nx2, int *ny2 /* padded data size */)
/*< initialize >*/
{

#ifdef SF_HAS_FFTW
#ifdef _OPENMP
    fftw_init_threads();
    sf_warning("Using threaded FFTW3! \n");
    fftw_plan_with_nthreads(omp_get_max_threads());
#endif
#endif

#ifndef SF_HAS_FFTW
    int i2;
#endif

    nk = n1 = kiss_fft_next_fast_size(nx*pad1);

#ifndef SF_HAS_FFTW
    cfg1  = kiss_fft_alloc(n1,0,NULL,NULL);
    icfg1 = kiss_fft_alloc(n1,1,NULL,NULL);
#endif

    n2 = kiss_fft_next_fast_size(ny);

    cc = sf_complexalloc2(n1,n2);
    dd = sf_complexalloc2(nk,n2);

#ifndef SF_HAS_FFTW
    cfg2  = kiss_fft_alloc(n2,0,NULL,NULL);
    icfg2 = kiss_fft_alloc(n2,1,NULL,NULL);

    tmp =    (kiss_fft_cpx **) sf_alloc(n2,sizeof(*tmp));
    tmp[0] = (kiss_fft_cpx *)  sf_alloc(nk*n2,sizeof(kiss_fft_cpx));
    for (i2=0; i2 < n2; i2++) {
        tmp[i2] = tmp[0]+i2*nk;
    }

    trace2 = sf_complexalloc(n2);
    ctrace2 = (kiss_fft_cpx *) trace2;
#endif

    *nx2 = n1;
    *ny2 = n2;

    wt =  1.0/(n1*n2);

    return (nk*n2);
}
Example #15
0
void FourierTransformer::setThreadsNumber(int tNumber)
{
	if (tNumber != 1)
	{
		threadsSetOn = true;
		nthreads = tNumber;
		pthread_mutex_lock(&fftw_plan_mutex);
		if (fftw_init_threads() == 0)
		{
			REPORT_ERROR("FFTW cannot init threads (setThreadsNumber)");
		}
		fftw_plan_with_nthreads(nthreads);
		pthread_mutex_unlock(&fftw_plan_mutex);
	}
}
Example #16
0
/****** fft_init ************************************************************
PROTO	void fft_init(void)
PURPOSE	Initialize the FFT routines
INPUT	-.
OUTPUT	-.
NOTES	Global preferences are used for multhreading.
AUTHOR	E. Bertin (IAP)
VERSION	29/11/2006
 ***/
void    fft_init(void)
 {
  if (!firsttimeflag)
    {
#ifdef USE_THREADS
    if (!fftw_init_threads())
      error(EXIT_FAILURE, "*Error*: thread initialization failed in ", "FFTW");
    fftw_plan_with_nthreads(prefs.nthreads);
    QPTHREAD_MUTEX_INIT(&fftmutex, NULL);
#endif
    firsttimeflag = 1;
    }

  return;
  }
Example #17
0
// have a look for parallel ffts
int
parallel_ffts( void )
{
  // initialise parallel fft ?
#ifdef OMP_FFTW
  if( fftw_init_threads() == 0 ) {
    return GLU_FAILURE ;
  } else {
    // in here I set the number of fftw threads to be the same
    // as the usual number of parallel threads ..
    fftw_plan_with_nthreads( Latt.Nthreads ) ;
    fprintf( stdout , "[PAR] FFTW using %d thread(s) \n" , Latt.Nthreads ) ;
  }
#endif
  return GLU_SUCCESS ;
}
Example #18
0
/****** fft_init ************************************************************
PROTO	void fft_init(void)
PURPOSE	Initialize the FFT routines
INPUT	-.
OUTPUT	-.
NOTES	Global preferences are used for multhreading.
AUTHOR	E. Bertin (IAP)
VERSION	26/06/2009
 ***/
void    fft_init(int nthreads)
 {
  if (!firsttimeflag)
    {
#ifdef USE_THREADS
    if (nthreads > 1)
      {
      if (!fftw_init_threads())
        error(EXIT_FAILURE, "*Error*: thread initialization failed in ", "FFTW");
      fftwf_plan_with_nthreads(prefs.nthreads);
      }
#endif
    firsttimeflag = 1;
    }

  return;
  }
Example #19
0
// have a look for parallel ffts
static int
parallel_ffts( void )
{
  // initialise parallel fft ?
#ifdef OMP_FFTW
  if( fftw_init_threads( ) == 0 ) {
    return FAILURE ;
  } else {
    // in here I set the number of fftw threads to be the same
    // as the usual number of parallel threads ..
    #pragma omp parallel
    { nthreads = omp_get_num_threads( ) ; } // set nthreads
  }
  fftw_plan_with_nthreads( nthreads ) ;
  printf("[PAR] FFTW using %d thread(s) \n" , nthreads ) ;
#endif
  return SUCCESS ;
}
Example #20
0
void ini() {
  LogLevel = 2;
  fftw_init_threads();   //<-- Desini aufrufen
  fftw_plan_with_nthreads(1);  

  iniTools(5517+Parameter_SD_Selector*12345);
  
  physicalScale = NaN;
  physicalScaleError = NaN;
  GoldstoneRenormFactor = 1.0;
  GoldstoneRenormFactorError = 0.0;
  GoldstoneRenormFactorDetermined = false;
  xmlOutput_Tag = new char*[10000];
  xmlOutput_Description = new char*[10000];
  xmlOutput_Value = new double[10000];
  xmlOutput_ValueError = new double[10000]; 
  xmlOutput_Count = 0;
}
Example #21
0
/*!
 this is a straight forward main function, that does the following
  - load lua config file
  - get configuration table from config
  - contruct a preferences class from them
  - start simulation according to the preferences
  - dump the result as precified in preferences
 */
int main(int argc, char * argv[argc])
{
  fftw_init_threads();
  fftw_plan_with_nthreads(4);

  if (argc != 3) {
    fprintf(stderr, "usage: %s config.lua result.dat\n", argv[0]);
    return -1;
  }

  lua_State * L = luaL_newstate();
  luaL_openlibs(L);

  preferences_t * prefs = preferences_new();

  if (luaL_dofile(L, argv[1])) {
    fprintf(stderr, "could not load '%s' : %s\n", argv[1], lua_tostring(L, 1));
  } else {
    // get config table
    lua_getfield(L, LUA_GLOBALSINDEX, "config");
    if (lua_isnil(L, -1)) {
      fprintf(stderr, "table config undefined\n");
    } else {
      // ref config table, so we can access it in preferences_read()
      prefs->config = luaL_ref(L, LUA_REGISTRYINDEX);
      if (!preferences_read(L, prefs)) {
        if (!start_simulation(prefs)) {
          FILE * fp = fopen(argv[2], "wb"); assert(fp);
          dump_results(prefs, fp);
          fclose(fp);
        }
      }
    }
  }

  preferences_free(prefs);

  lua_close(L);

  fftw_cleanup();
  fftw_cleanup_threads();
  pthread_exit(NULL);
}
Example #22
0
void Transformer::init() {
	//printf("num_threads = %d\n", omp_get_max_threads());
    printf("nthreads = %d\n", nfft_get_num_threads());
    /* init */
    fftw_init_threads();
    fftw_plan_with_nthreads(omp_get_max_threads());
    /* precomputation (for fast polynomial transform) */
    nfsft_precompute(N, 1000.0, 0U, 0U);

    /* Initialize transform plan using the guru interface. All input and output
     * arrays are allocated by nfsft_init_guru(). Computations are performed with
     * respect to L^2-normalized spherical harmonics Y_k^n. The array of spherical
     * Fourier coefficients is preserved during transformations. The NFFT uses a
     * cut-off parameter m = 6. See the NFFT 3 manual for details.
     */
    nfsft_init(&plan, N, M);
    //nfsft_init_guru(&plan, N, M, NFSFT_MALLOC_X | NFSFT_MALLOC_F |
    //    NFSFT_MALLOC_F_HAT | NFSFT_NORMALIZED | NFSFT_PRESERVE_F_HAT,
    //    PRE_PHI_HUT | PRE_PSI | FFTW_INIT | FFT_OUT_OF_PLACE, 6);
}
Example #23
0
int main(int argc, char *argv[]) {

	printf("Memmory Allocation...\n");

	fftw_init_threads();

	fftw_plan_with_nthreads(4);

	load_files();

	init_buffers();

	printf("Analysis Process Started...\n");

	analysis();

	printf("Process Finished.\n");

	clean_up_memmory();

	fftw_cleanup_threads();

	return 0;
}
Example #24
0
Space_trans::Space_trans(const Config & configSettings):Data(configSettings)
{
  fftw_iodim dims[DIM], howmany_dims[1];
  int rank = DIM;
  int howmany_rank = 1;
  if(DIM == 1){
    dims[0].n = m[0];
    dims[0].is = n3;
    dims[0].os = n3;
  }
  if(DIM == 2){
    dims[0].n = m[0];
    dims[0].is = n3;
    dims[0].os = n3;
    dims[1].n = m[1];
    dims[1].is = n3*m[0];
    dims[1].os = n3*m[0];
  }
  howmany_dims[0].n = n3;
  howmany_dims[0].is = 1;
  howmany_dims[0].os = 1;

  fftw_init_threads();
  fftw_plan_with_nthreads(NUM_THREADS);

  pf = fftw_plan_guru_dft( rank, dims,
      howmany_rank, howmany_dims,
      realdata, realdata,
      FFTW_FORWARD, FFTW_ESTIMATE );

  pi = fftw_plan_guru_dft( rank, dims,
      howmany_rank, howmany_dims,
      realdata, realdata,
      FFTW_BACKWARD, FFTW_ESTIMATE );
  fftw_plan_with_nthreads(1);
}
int main(int argc, char **argv)
{
  int m, psi_flag;
#ifdef _OPENMP
  int nthreads;

  if (argc != 4)
    return 1;

  nthreads = atoi(argv[3]);
  fftw_init_threads();
  omp_set_num_threads(nthreads);
#else
  if (argc != 3)
    return 1;
#endif

  m = atoi(argv[1]);
  psi_flag = atoi(argv[2]);

  bench_openmp(stdin, m, psi_flag);

  return 0;
}
Example #26
0
// Run function of whole program
static void run(const char *config_filename) {
	double max_z, min_z;
	source_object *object;
	double complex *optical_field;

	// Initialize FFTW threads
	fftw_init_threads();

	// Load config to memory
	load_config(config_filename);

	// Initialize logger
	initialize_logger(get_integer_value(CONF_LOGGER_DEBUG_MODE));

	// Load source object
	load_source_object(&object, get_string_value(CONF_OBJECT_POINTS_FILE));

	// Initialize final optical field
	initialize_optical_field(&optical_field);

	// Extract z position extremes of source object
	extract_object_proportions(object, &min_z, &max_z, OBJECT_DEPTH);

	// Modified WRP method itself
	perform_wrp_method(object, optical_field, min_z, max_z);

	// Numerical reconstruction on hologram
	perform_numerical_reconstruction(optical_field, min_z);

	// Memory clear
	log_info("Deleting all structures from memory\n");
	free(optical_field);
	delete_source_object(object);
	delete_lookup_table();
	delete_config();
}
Example #27
0
int main(int argc, char **argv)
{
  int m, nfsft_flags, psi_flags;
  int nrepeat;
  int trafo_adjoint, N, M, r;
  double *x;
  C *f_hat, *f;
#ifdef _OPENMP
  int nthreads;

  if (argc != 6)
    return 1;

  nthreads = atoi(argv[5]);
  fftw_init_threads();
  omp_set_num_threads(nthreads);
#else
  if (argc != 5)
    return 1;
#endif

  m = atoi(argv[1]);
  nfsft_flags = atoi(argv[2]);
  psi_flags = atoi(argv[3]);
  nrepeat = atoi(argv[4]);

  bench_openmp_readfile(stdin, &trafo_adjoint, &N, &M, &x, &f_hat, &f);

  /* precomputation (for fast polynomial transform) */
  nfsft_precompute(N,1000.0,0U,0U);

  for (r = 0; r < nrepeat; r++)
    bench_openmp(trafo_adjoint, N, M, x, f_hat, f, m, nfsft_flags, psi_flags);

  return 0;
}
Example #28
0
//static
void
GradientsBase::solveImage(imageType_t const &rLaplaceImage_p, imageType_t &rSolution_p)
{
  int const nRows=rLaplaceImage_p.Height();
  int const nCols=rLaplaceImage_p.Width();
  int const nChannels=rLaplaceImage_p.NumberOfChannels();
  imageType_t::color_space colorSpace=rLaplaceImage_p.ColorSpace();

#ifdef USE_FFTW
  // adapted from http://www.umiacs.umd.edu/~aagrawal/software.html,

  AssertColImage(rLaplaceImage_p);
  // just in case we accidentally change this, because code below believes in double...
  Assert(typeid(realType_t)==typeid(double));
  // check assumption of row major format
  Assert(rLaplaceImage_p.PixelAddress(0,0)+1==rLaplaceImage_p.PixelAddress(1,0));


  rSolution_p.AllocateData(nCols,nRows,nChannels,colorSpace);
  rSolution_p.ResetSelections();
  rSolution_p.Black();

#ifdef USE_THREADS
    // threaded version
    int const nElements=nRows*nCols;
    int const nThreads=Thread::NumberOfThreads(nElements);

    if(fftw_init_threads()==0){
      throw Error("Problem initilizing threads");
    }
    fftw_plan_with_nthreads(nThreads);
#endif

  for(int chan=0;chan<nChannels;++chan){
    TimeMessage startSolver(String("FFTW Solver, Channel ")+String(chan));

    // FIXME see if fttw_allocate gives us something...
    imageType_t fcos(nCols,nRows);

#if 0
    // During experiment, the higher optimization did not give us anything except for an additional delay. May change later.
    fftw_plan pForward= fftw_plan_r2r_2d(nRows, nCols, const_cast<double *>(rLaplaceImage_p.PixelData(chan)), fcos.PixelData(), FFTW_REDFT10, FFTW_REDFT10, FFTW_MEASURE);
    fftw_plan pInverse = fftw_plan_r2r_2d(nRows, nCols, fcos.PixelData(), rSolution_p.PixelData(chan), FFTW_REDFT01, FFTW_REDFT01, FFTW_ESTIMATE);
#else
    fftw_plan pForward= fftw_plan_r2r_2d(nRows, nCols, const_cast<double *>(rLaplaceImage_p.PixelData(chan)), fcos.PixelData(), FFTW_REDFT10, FFTW_REDFT10, FFTW_MEASURE);
    fftw_plan pInverse = fftw_plan_r2r_2d(nRows, nCols, fcos.PixelData(), rSolution_p.PixelData(chan), FFTW_REDFT01, FFTW_REDFT01, FFTW_ESTIMATE);
#endif

    // find DCT
    fftw_execute(pForward);

    realType_t const pi=pcl::Pi();

    for(int row = 0 ; row < nRows; ++row){
      for(int col = 0 ; col < nCols; ++col){
	fcos.Pixel(col,row) /= 2*cos(pi*col/( (double) nCols)) - 2 + 2*cos(pi*row/((double) nRows)) - 2;
      }
    }
    fcos.Pixel(0,0)=0.0;

    // Inverse DCT
    fftw_execute(pInverse);
    fftw_destroy_plan(pForward);
    fftw_destroy_plan(pInverse);
  }
#endif
#ifdef USE_PIFFT
  // use PI FFT based solver by Carlos Milovic F.
  rLaplaceImage_p.ResetSelections();
  rSolution_p.AllocateData(nCols,nRows,nChannels,colorSpace);
  rSolution_p.ResetSelections();
  // current solver handles only one channel per run.
  for(int chan=0;chan<nChannels;++chan){
    TimeMessage startSolver(String("PIFFT Solver, Channel ")+String(chan));
    imageType_t tmpImage(nCols,nRows);
    rLaplaceImage_p.SelectChannel(chan);
    tmpImage.Assign(rLaplaceImage_p);
    __SolvePoisson(tmpImage);
    rSolution_p.SelectChannel(chan);
    rSolution_p.Mov(tmpImage);
  }
#endif
}
Example #29
0
//constructor of the class
Solver_FFTW::Solver_FFTW(){
	/*===============================================*/
	Input* initInput = new Input();
	numOfXGrid = initInput->getXGridNum();
	numOfYGrid = initInput->getYGridNum();
	cout << "Input Finished" << endl;
	cout << "Grids along x axis: " << numOfXGrid << endl;
	cout << "Grids along y axis: " << numOfYGrid << endl;

	/*======================================================
	Initializing arrays in real space
	======================================================*/
	v = new double*[numOfXGrid];
	w = new double*[numOfXGrid];
	initE = 0;
	temp_Velocity = new double[numOfXGrid*numOfYGrid];
	firstD_u = new double[numOfXGrid*numOfYGrid];
	secondD_u = new double[numOfXGrid*numOfYGrid];
	for(int i = 0; i < numOfXGrid; i++){
		v[i] = new double[numOfYGrid];
		w[i] = new double[numOfYGrid];
		for(int j = 0; j < numOfYGrid; j++){
			v[i][j] = initInput->getXVelocity(i,j);
			w[i][j] = initInput->getYVelocity(i,j);
			initE += v[i][j]*v[i][j] + w[i][j]*w[i][j]; //calculating the initial energy
		}
	}

	for(int i = 0; i < numOfXGrid*numOfYGrid; i++){
		temp_Velocity[i] = 0;
		firstD_u[i] = 0;
		secondD_u[i] = 0;
	}
	
	/*=====================================================
	Initializing first order derivatives
	=====================================================*/
	v_x = new double*[numOfXGrid];
	v_y = new double*[numOfXGrid];
	w_x = new double*[numOfXGrid];
	w_y = new double*[numOfXGrid];
	for(int i = 0; i < numOfXGrid; i++){
		v_x[i] = new double[numOfYGrid];
		v_y[i] = new double[numOfYGrid];
		w_x[i] = new double[numOfYGrid];
		w_y[i] = new double[numOfYGrid];
		for(int j = 0; j < numOfYGrid; j++){
			v_x[i][j] = 0;
			v_y[i][j] = 0;
			w_x[i][j] = 0;
			w_y[i][j] = 0;
		}
	}
	
	/*=====================================================
	Initializing second order derivatives
	=====================================================*/
	v_x_x = new double*[numOfXGrid];
	v_y_y = new double*[numOfXGrid];
	w_x_x = new double*[numOfXGrid];
	w_y_y = new double*[numOfXGrid];
	for(int i = 0; i < numOfXGrid; i++){
		v_x_x[i] = new double[numOfYGrid];
		v_y_y[i] = new double[numOfYGrid];
		w_x_x[i] = new double[numOfYGrid];
		w_y_y[i] = new double[numOfYGrid];
		for(int j = 0; j < numOfYGrid; j++){
			v_x_x[i][j] = 0;
			v_y_y[i][j] = 0;
			w_x_x[i][j] = 0;
			w_y_y[i][j] = 0;
		}
	}

	/*========================================================
	Initializing the forces
	========================================================*/
	externalFx = new double*[numOfXGrid];
	externalFy = new double*[numOfXGrid];
	for(int i = 0;i < numOfXGrid; i++){
		externalFx[i] = new double[numOfYGrid];
		externalFy[i] = new double[numOfYGrid];
		for(int j = 0;j < numOfYGrid; j++){
			externalFx[i][j] = 0;
			externalFy[i][j] = 0;
		}
	}


	/*========================================================
	initializing multiple threads
	==========================================================*/
	if(fftw_init_threads()){
		fftw_plan_with_nthreads(THREADS);
		cout << "Using "<< THREADS << " threads" << endl << endl;
	}
	else {
		cout << "Using multiple threads failed" << endl;
		exit(0);
	}

	/*=====================================================
	Initializing arrays in fourier space
	=====================================================*/
	V = (fftw_complex**)fftw_malloc(sizeof(fftw_complex*)*numOfXGrid);
	W = (fftw_complex**)fftw_malloc(sizeof(fftw_complex*)*numOfXGrid);
	temp_U = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*numOfXGrid*(numOfYGrid/2+1));
	firstD_U = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*numOfXGrid*(numOfYGrid/2+1));
	secondD_U = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*numOfXGrid*(numOfYGrid/2+1));
	for(int i = 0; i < numOfXGrid; i++){
		V[i] = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*(numOfYGrid/2+1));
		W[i] = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*(numOfYGrid/2+1));
		for(int j = 0; j < numOfYGrid/2 + 1; j++){
			V[i][j][0] = 0;
			V[i][j][1] = 0;
			W[i][j][0] = 0;
			W[i][j][1] = 0;
		}
	}

	for(int i = 0; i < numOfXGrid*(numOfYGrid/2+1); i++){
		temp_U[i][0] = 0;
		temp_U[i][1] = 0;
		firstD_U[i][0] = 0;
		firstD_U[i][1] = 0;
		secondD_U[i][0] = 0;
		secondD_U[i][1] = 0;
	}

	temp = (fftw_complex**)fftw_malloc(sizeof(fftw_complex*)*numOfXGrid);
	for(int i = 0; i < numOfXGrid; i++){
		temp[i] = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*(numOfYGrid/2+1));
	}
	/**=====================================================
	Initializing Plans
	======================================================*/
	plan_r2c = fftw_plan_dft_r2c_2d(numOfXGrid,numOfYGrid,temp_Velocity,temp_U,FFTW_ESTIMATE);
	plan_c2r = fftw_plan_dft_c2r_2d(numOfXGrid,numOfYGrid,temp_U,temp_Velocity,FFTW_ESTIMATE);
	plan_firstD = fftw_plan_dft_c2r_2d(numOfXGrid,numOfYGrid,firstD_U,firstD_u,FFTW_ESTIMATE);
	plan_secondD = fftw_plan_dft_c2r_2d(numOfXGrid,numOfYGrid,secondD_U,secondD_u,FFTW_ESTIMATE);

	/*======================================================
	initializing output energy file
	======================================================*/

	stringstream fileNameOfE;
	fileNameOfE << OUTPUT_PATH << "E" << ".txt";
	energy.open(fileNameOfE.str().c_str());

	/*======================================================
	generating the readMe.txt file
	======================================================*/

	stringstream fileNameOfReadMe;
	fileNameOfReadMe << OUTPUT_PATH << "readMe.txt";
	readMe.open(fileNameOfReadMe.str().c_str());
	readMe << "Nx = " << numOfXGrid << endl;
	readMe << "Ny = " << numOfYGrid << endl;
	readMe << "dt = " << TIME_STEP << endl;
	readMe << "Initial Energy = " << initE << endl;
	readMe << "Viscosity = "<< VISCOSITY << endl;
	readMe << "Rescaled Viscosity =" << VISCOSITY/sqrt(initE*(numOfXGrid-1)*(numOfYGrid-1));
	readMe.close();


	/*======================================================
	Initializing Adams array.
	======================================================*/
	Adams_v = new double**[3];
	Adams_w = new double**[3];
	for(int i = 0;i < 3; i++){
		Adams_v[i] = new double*[numOfXGrid];
		Adams_w[i] = new double*[numOfXGrid];
		for(int j = 0; j < numOfXGrid; j++){
			Adams_v[i][j] = new double[numOfYGrid];
			Adams_w[i][j] = new double[numOfYGrid];
			for(int k = 0; k < numOfYGrid; k++){
				Adams_v[i][j][k] = 0;
				Adams_w[i][j][k] = 0;
			}
		}
	}

	/*==================================================*/
	return;
	
}
Example #30
0
void init_common(void) {
	/* This routine will initialize everything */
	int i,j,k;
	
	DEBUG_START_FUNC;
	
#ifdef MPI_SUPPORT
#ifdef FFTW3_MPI_SUPPORT	
	fftw_mpi_init();
#endif
#endif
#ifdef _OPENMP
	if( !(fftw_init_threads()) ) ERROR_HANDLER( ERROR_CRITICAL, "Threads initialisation failed");
#endif
	
	/* We start with the coordinate system */
	kx = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX );
	if (kx == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for kx allocation");
	
	ky = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX );
	if (ky == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for ky allocation");
	
	kz = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX );
	if (kz == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for kz allocation");
	
	kxt = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX );
	if (kxt == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for kxt allocation");
	kyt = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX );
	if (kyt == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for kyt allocation");
	kzt = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX );
	if (kzt == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for kzt allocation");
	
	k2t = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX );
	if (k2t == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for k2t allocation");
	
	ik2t = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX );
	if (ik2t == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for ik2t allocation");
	

	for( i = 0; i < NX_COMPLEX / NPROC; i++) {
		for( j = 0; j < NY_COMPLEX; j++) {
			for( k = 0; k < NZ_COMPLEX; k++) {
				kx[ IDX3D ] = (2.0 * M_PI) / param.lx *
						(fmod( NX_COMPLEX * rank / NPROC  + i + (NX_COMPLEX / 2) ,  NX_COMPLEX ) - NX_COMPLEX / 2 );
					 
#ifdef WITH_2D
				ky[ IDX3D ] = (2.0 * M_PI) / param.ly * j;
					 
				kz[ IDX3D ] = 0.0;
#else
				ky[ IDX3D ] = (2.0 * M_PI) / param.ly *
						(fmod( j + (NY_COMPLEX / 2) ,  NY_COMPLEX ) - NY_COMPLEX / 2 );
					 
				kz[ IDX3D ] = (2.0 * M_PI) / param.lz * k;
#endif

				kxt[ IDX3D ]= kx[IDX3D];
				kyt[ IDX3D ]= ky[IDX3D]; 
				kzt[ IDX3D ]= kz[IDX3D]; 
			
				k2t[ IDX3D ] = kxt[IDX3D] * kxt[IDX3D] +
								kyt[IDX3D] * kyt[IDX3D] +
								kzt[IDX3D] * kzt[IDX3D];
							  
				if ( k2t[IDX3D] == 0.0 ) ik2t[IDX3D] = 1.0;
				else	ik2t[IDX3D] = 1.0 / k2t[IDX3D];
			}
		}
	}
	kxmax = 2.0 * M_PI/ param.lx * ( (NX / 2) - 1);
	kymax = 2.0 * M_PI/ param.ly * ( (NY / 2) - 1);
	kzmax = 2.0 * M_PI/ param.lz * ( (NZ / 2) - 1);
#ifdef WITH_2D
	kzmax = 0.0;
#endif
	kmax=pow(kxmax*kxmax+kymax*kymax+kzmax*kzmax,0.5);
	
	/* Initialize the dealiazing mask Or the nyquist frequency mask (in case dealiasing is not required) */
	
	mask = (double *) fftw_malloc( sizeof(double) * NTOTAL_COMPLEX );
	if (mask == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for mask allocation");
	
	for( i = 0; i < NX_COMPLEX/NPROC; i++) {
		for( j = 0; j < NY_COMPLEX; j++) {
			for( k = 0; k < NZ_COMPLEX; k++) {

				mask[ IDX3D ] = 1.0;
				if(param.antialiasing) {
					if( fabs( kx[ IDX3D ] ) > 2.0/3.0 * kxmax)
						mask[ IDX3D ] = 0.0;
				
					if( fabs( ky[ IDX3D ] ) > 2.0/3.0 * kymax)
						mask[ IDX3D ] = 0.0;
#ifndef WITH_2D
					if( fabs( kz[ IDX3D ] ) > 2.0/3.0 * kzmax)
						mask[ IDX3D ] = 0.0;
#endif
				}
				else {			
				        if (  NX_COMPLEX / NPROC * rank + i == NX_COMPLEX / 2 ) 
						mask[ IDX3D ] = 0.0;
					if ( j == NY_COMPLEX / 2 )  
						mask[ IDX3D ] = 0.0;
#ifndef WITH_2D
					if ( k == NZ_COMPLEX ) 
						mask[ IDX3D ] = 0.0;
#endif
				}
			}
		}
	}

	if(param.antialiasing) {
		kxmax = kxmax * 2.0 / 3.0;
		kymax = kymax * 2.0 / 3.0;
		kzmax = kzmax * 2.0 / 3.0;
		kmax = kmax * 2.0 / 3.0;
	}
	

// Allocate fields
// Complex fields

	w1 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w1 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w1 allocation");
	
	w2 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w2 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w2 allocation");
	
	w3 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w3 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w3 allocation");
	
	w4 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w4 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w4 allocation");
	
	w5 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w5 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w5 allocation");
	
	w6 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w6 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w6 allocation");
	
	w7 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w7 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w7 allocation");
	
	w8 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w8 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w8 allocation");
	
	w9 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w9 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w9 allocation");
	
	w10 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w10 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w10 allocation");
	
	w11 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w11 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w11 allocation");
	
	w12 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w12 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w12 allocation");
	
	w13 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w13 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w13 allocation");
	
	w14 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w14 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w14 allocation");
	
	w15 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w15 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w15 allocation");
	w16 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w16 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w15 allocation");
	w17 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w17 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w15 allocation");
	w18 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (w18 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for w15 allocation");
	
	wh1 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (wh1 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for wh1 allocation");	
	wh2 = (double complex *) fftw_malloc( sizeof(double complex) * NTOTAL_COMPLEX);
	if (wh2 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for wh2 allocation");
	wh3 = (double complex *) fftw_malloc( sizeof(double complex) * NX*(NY/2+1));
	if (wh3 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for wh3 allocation");
	wh4 = (double complex *) fftw_malloc( sizeof(double complex) * NX*(NY/2+1)*NZ);
	if (wh4 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for wh4 allocation");
	wh5 = (double complex *) fftw_malloc( sizeof(double complex) * NX*(NY/2+1)*NZ);
	if (wh5 == NULL) ERROR_HANDLER( ERROR_CRITICAL, "No memory for wh5 allocation");
	
// Initialize wh1,wh2,wh3;
	for(i=0;i<NX*(NY/2+1);i++) {wh1[i]=0; wh2[i]=0; wh3[i]=0;}	
		
	/* Will use the same memory space for real and complex fields */
	
	wr1 = (double *) w1;
	wr2 = (double *) w2;
	wr3 = (double *) w3;
	wr4 = (double *) w4;
	wr5 = (double *) w5;
	wr6 = (double *) w6;
	wr7 = (double *) w7;
	wr8 = (double *) w8;
	wr9 = (double *) w9;
	wr10 = (double *) w10;
	wr11 = (double *) w11;
	wr12 = (double *) w12;
	wr13 = (double *) w13;
	wr14 = (double *) w14;
	wr15 = (double *) w15;
	wr16 = (double *) w16;
	wr17 = (double *) w17;
	wr18 = (double *) w18;
	
	wrh1 = (double *) wh1;
	wrh2 = (double *) wh2;
	wrh3 = (double *) wh3;
	wrh4 = (double *) wh4;
	wrh5 = (double *) wh5;
// Physic initialisation
//	init_real_mask();

	//set Reynolds numbers using input powers AJB 08/03/12
	param.reynolds = pow(10.0,param.reynolds);
	nu = 1.0 / param.reynolds;
#ifdef BOUSSINESQ	
	param.reynolds_th = pow(10.0,param.reynolds_th);
	nu_th = 1.0 / param.reynolds_th;
#endif
#ifdef MHD
	param.reynolds_m = pow(10.0,param.reynolds_m);
	eta = 1.0 / param.reynolds_m;
#endif
	DEBUG_END_FUNC;
	return;
}