C++ (Cpp) hmc примеры использования

Язык программирования: C++ (Cpp)

Метод/Функция: hmc

Примеров на hotexamples.com: 4

C++ (Cpp) hmc - 4 примера найдено. Это лучшие примеры C++ (Cpp) кода для hmc, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: MPI_InterChainClassicHMC.cpp Проект: tbs1980/mcpack

int main(void)
{
    typedef double RealType;
    typedef mcpack::utils::GaussPotentialEnergy<RealType> PotEngType;
    typedef PotEngType::RealVectorType RealVectorType;
    typedef PotEngType::RealMatrixType RealMatrixType;
    typedef RealMatrixType::Index IndexType;
    typedef mcpack::hamiltonian::GaussKineticEnergy<RealType> KinEngType;
    typedef mcpack::hamiltonian::LeapFrog<PotEngType,KinEngType> IntegratorType;
    typedef mcpack::utils::RandomVariateGenerator<RealType> RandVarGenType;
    typedef mcpack::hamiltonian::HMCProposal<IntegratorType,RandVarGenType> HMCProposalType;
    //typedef mcpack::hamiltonian::ClassicHMC<HMCProposalType> HMCType;
    typedef mcpack::hamiltonian::MPIInterChainClassicHMC<HMCProposalType> HMCType;

    
    const IndexType N=10;

    RealVectorType mu=RealVectorType::Zero(N);
    RealMatrixType SigmaInv=RealMatrixType::Identity(N,N);
    RealMatrixType MInv=RealMatrixType::Identity(N,N);
    RealVectorType q0=RealVectorType::Random(N);

    const RealType eps=1;
    const IndexType Nsteps=10;

    PotEngType G(mu,SigmaInv);
    KinEngType K(MInv);

    IntegratorType Lp(G,K);

    HMCProposalType prop(Lp,eps,Nsteps);

    HMCType hmc(prop,q0,12346l);

    
    const IndexType NSamples=1000;

    RealMatrixType Samples(NSamples,N);

    hmc.Generate(Samples);
    
    std::cout<<"Acceptace Rate= "<<hmc.GetAcceptanceRate()<<std::endl;

	return 0;
}

Пример #2

Показать файл

Файл: PredictaEngine.cpp Проект: cslr/resonanz

    //////////////////////////////////////////////////////////////////////
    // worker thread
    void PredictaEngine::loop()
    {
      setStatus("Waiting..");
      thread_idle = true;

      while(running){
	while(!optimize && running){
	  thread_idle = true;
	  setStatus("Waiting..");
	  usleep(100000); // 100ms (waits for action

	  train.clear();
	  scoring.clear();
	  results.clear();
	}

	if(!running) continue;

	thread_idle = false;
	time_t executionStartedTime = time(0);

	train.clear();
	scoring.clear();
	results.clear();

	//////////////////////////////////////////////////////////////////////////
	// loads at most 100.000 = 100k lines of data to memory

	setStatus("Loading data (examples)..");

	if(train.importAscii(trainingFile, 100000) == false){
	  std::string error = "Cannot load file: " + trainingFile;
	  setError(error);	  
	  optimize = false;
	  continue;
	}

	setStatus("Loading data (to be scored data)..");

	if(scoring.importAscii(scoringFile, 100000) == false){
	  std::string error = "Cannot load file: " + scoringFile;
	  train.clear();
	  scoring.clear();
	  setError(error);
	  optimize = false;
	  continue;
	}

	setStatus("Checking data validity..");

	train.removeBadData();
	scoring.removeBadData();
	
	// if number of data points in training is smaller than 2*dim(input)
	// then the optimizer fails
	
	if(train.getNumberOfClusters() < 0 || scoring.getNumberOfClusters() < 0){
	  setError("No data in input files");
	  optimize = false;
	  continue;
	}
	
	if(train.size(0) < 10){
	  setError("Not enough data (at least 10 examples) in input file");
	  optimize = false;
	  continue;
	}
	
	if(train.size(0) < 2*train.dimension(0)){
	  setError("Not enough data (at least 2*DIMENSION examples) in input file");
	  optimize = false;
	  continue;
	}

	if(train.dimension(0) != (scoring.dimension(0) + 1)){
	  setError("Incorrect dimensions in training or scoring files");
	  optimize = false;
	  continue;
	}

	if(train.dimension(0) < 2){
	  setError("Incorrect dimensions in training or scoring files");
	  optimize = false;
	  continue;
	}

	//////////////////////////////////////////////////////////////////////////
	// preprocess data using PCA (if PCA cannot be calculated the whole process fails)

	// separates training data into input and output clusters
	whiteice::dataset< whiteice::math::blas_real<double> > tmp;

	if(tmp.createCluster("input", train.dimension(0)-1) == false || 
	   tmp.createCluster("output", 1) == false){
	  setError("Internal software error");
	  optimize = false;
	  continue;
	}

	for(unsigned int i=0;i<train.size(0);i++){
	  whiteice::math::vertex< whiteice::math::blas_real<double> > t = train.access(0, i);
	  whiteice::math::vertex< whiteice::math::blas_real<double> > a;
	  whiteice::math::vertex< whiteice::math::blas_real<double> > b;
	  a.resize(t.size()-1);
	  b.resize(1);
	  b[0] = t[t.size()-1];

	  for(unsigned int j=0;j<(t.size()-1);j++)
	    a[j] = t[j];

	  if(tmp.add(0, a) == false || tmp.add(1, b) == false){
	    setError("Internal software error");
	    optimize = false;
	    continue;
	  }
	}

	if(optimize == false)
	  continue; // abort computations

	train = tmp;
	
	setStatus("Preprocessing data..");

	if(train.preprocess(0) == false /*|| train.preprocess(1) == false*/){
	  setError("Bad/singular data please add more variance to data");
	  optimize = false;
	  continue;
	}
	
	//////////////////////////////////////////////////////////////////////////
	// optimize neural network using LBFGS (ML starting point for HMC sampler)
	
	std::vector<unsigned int> arch; // use double layer wide nnetwork
	arch.push_back(train.dimension(0));
	// arch.push_back(100*train.dimension(0));
	arch.push_back(train.dimension(0) < 10 ? 10 : train.dimension(0));
	arch.push_back(train.dimension(0) < 10 ? 10 : train.dimension(0));
	arch.push_back(train.dimension(1));

	whiteice::nnetwork< whiteice::math::blas_real<double> > nn(arch);
	whiteice::LBFGS_nnetwork< whiteice::math::blas_real<double> > bfgs(nn, train, false, false);
	whiteice::math::vertex< whiteice::math::blas_real<double> > w;

	nn.randomize();

#if 0
	// deep pretraining is disabled as a default
	setStatus("Preoptimizing solution (deep learning)..");
	if(deep_pretrain_nnetwork(&nn, train, true) == false){
	  setError("ERROR: deep pretraining of nnetwork failed.\n");
	  optimize = false;
	  continue;
	}
#endif

	
	nn.exportdata(w);
	bfgs.minimize(w);

	time_t t0 = time(0);
	unsigned int iterations = 0;
	whiteice::math::blas_real<double> error;


	while(optimize && bfgs.solutionConverged() == false && bfgs.isRunning() == true){
	  if(optimize == false){ // we lost license to do this anymore..
	    setStatus("Aborting optimization");
	    break;
	  }

	  time_t t1 = time(0);
	  unsigned int counter = (unsigned int)(t1 - t0); // time-elapsed

	  if(bfgs.getSolution(w, error, iterations) == false){ // we lost license to continue..
	    setStatus("Aborting optimization");
	    setError("LBFGS::getSolution() failed");
	    optimize = false;
	    break;
	  }

	  char buffer[128];
	  snprintf(buffer, 128, "Preoptimizing solution (%d iterations, %.2f minutes): %f",
		   iterations, counter/60.0f, error.c[0]);

	  setStatus(buffer);

	  // update results only every 5 seconds
	  sleep(5);
	}

	if(optimize == false){
	  bfgs.stopComputation();
	  continue; // abort computation
	}

	
	// after convergence, get the best solution
	if(bfgs.getSolution(w, error, iterations) == false){ // we lost license to continue..
	  setStatus("Aborting optimization");
	  setError("LBFGS::getSolution() failed");
	  optimize = false;
	  continue;
	}

	nn.importdata(w);

	//////////////////////////////////////////////////////////////////////////
	// use HMC to sample from max likelihood in order to get MAP

	setStatus("Analyzing uncertainty..");

	// whiteice::UHMC< whiteice::math::blas_real<double> > hmc(nn, train, true);
	whiteice::HMC< whiteice::math::blas_real<double> > hmc(nn, train, true);
	// whiteice::linear_ETA<float> eta;

	// for high quality..
	// we use just 50 samples
	// const unsigned int NUMSAMPLES = 1000;
	// eta.start(0.0f, NUMSAMPLES);

	if(hmc.startSampler() == false){
	  setStatus("Starting sampler failed (internal error)");
	  setError("Cannot start sampler");
	  optimize = false;
	  continue;
	}

	// unsigned int samples = 0;

	t0 = time(0);
	
	// always analyzes results for given time length
	double timeElapsed = (time(0) - executionStartedTime);
	double totalTime = 0;
	
	if(timeElapsed < optimizationTime)
	  totalTime = optimizationTime - timeElapsed;
	

	while(optimize){
	  unsigned int samples = hmc.getNumberOfSamples();
	  // if(samples >= NUMSAMPLES) break;
	  
	  // eta.update((float)hmc.getNumberOfSamples());

	  time_t t1 = time(0);
	  double counter = (double)(t1 - t0); // time-elapsed
	  
	  double timeLeft = (totalTime - counter)/60.0;
	  if(timeLeft <= 0.0){
	    timeLeft = 0.0;
	    if(hmc.getNumberOfSamples() > 0)
	      break; // always gets a single sample from HMC
	  }

	  char buffer[128];
	  snprintf(buffer, 128,
		   "Analyzing uncertainty (%d iterations. %.2f%% error. ETA %.2f minutes)",
		   // 100.0*((double)samples/((double)NUMSAMPLES)),
		   samples,
		   100.0*hmc.getMeanError(1).c[0]/error.c[0],
		   timeLeft);
		   // eta.estimate()/60.0);

	  setStatus(buffer);

	  if(optimize == false){ // we lost license to continue..
	    setStatus("Uncertainty analysis aborted");
	    break;
	  }

	  // updates only every 5 seconds so that we do not take too much resources
	  sleep(5); 
	}

	if(optimize == false)
	  continue; // abort computation

	hmc.stopSampler();
	
	//////////////////////////////////////////////////////////////////////////
	// estimate mean and variance of output given inputs in 'scoring'

	setStatus("Calculating scoring..");

	whiteice::bayesian_nnetwork< whiteice::math::blas_real<double> > bnn;
	
	if(hmc.getNetwork(bnn) == false){
	  setStatus("Exporting prediction model failed");
	  setError("Internal software error");
	  optimize = false;
	  continue;
	}
	
	if(results.createCluster("results", 1) == false){
	  setError("Internal software error");
	  optimize = false;
	  continue;
	}


	unsigned int NUM = scoring.size(0);

	// demo version only scores 10 first examples given in a file.
	if(demoVersion){
	  if(NUM > 10) NUM = 10;
	}
	
	for(unsigned int i=0;i<NUM;i++){

	  char buffer[128];
	  snprintf(buffer, 128, "Scoring data (%.1f%%)..",
		   100.0*((double)i)/((double)scoring.size(0)));
	  setStatus(buffer);
	  
	  
	  whiteice::math::vertex< whiteice::math::blas_real<double> > mean;
	  whiteice::math::matrix< whiteice::math::blas_real<double> > cov;
	  whiteice::math::vertex< whiteice::math::blas_real<double> > score;
	  auto tmp = scoring[i];

	  if(train.preprocess(0, tmp) == false){
	    setStatus("Calculating prediction failed (preprocess)");
	    setError("Internal software error");
	    optimize = false;
	    break;
	  }
	  
	  if(bnn.calculate(tmp, mean, cov) == false){
	    setStatus("Calculating prediction failed");
	    setError("Internal software error");
	    optimize = false;
	    break;
	  }

	  score.resize(1);
	  score[0] = mean[0] + risk*cov(0,0);

	  if(results.add(0, score) == false){
	    setStatus("Calculating prediction failed (storage)");
	    setError("Internal software error");
	    optimize = false;
	    break;
	  }

	  if(optimize == false)
	    break; // lost our license to continue
	}

	if(optimize == false)
	  continue; // lost our license to continue


	// finally save the results
	setStatus("Saving prediction results to file..");

	if(results.exportAscii(resultsFile) == false){
	  setStatus("Saving prediction results failed");
	  setError("Internal software error");
	  optimize = false;
	  break;
	}

	setStatus("Computations complete");
	
	optimize = false;
      }
    }

Пример #3

Показать файл

Файл: 1hadr.GPU.mom.cpp Проект: cpviolator/QUDA-CPS

int main(int argc, char *argv[]) {

  int seed = atoi(argv[1]);
  int device = atoi(argv[2]);

  initQuda(device);
  Start(&argc,&argv);

  DoArg do_arg;
  setup_do_arg(do_arg, seed, NSITES_3D, NSITES_T, BETA);
  GJP.Initialize(do_arg);

  //VRB.DeactivateAll();
  
  GwilsonFclover lat;
  CommonArg c_arg;

  //Declare args for Gaussian Smearing
  QPropWGaussArg g_arg;
  g_arg.gauss_link_smear_type=GAUSS_LS_TYPE;   //Link smearing
  g_arg.gauss_link_smear_coeff=GAUSS_LS_COEFF; //Link smearing
  g_arg.gauss_link_smear_N=GAUSS_LS_N;         //Link smearing hits
  g_arg.gauss_N = GAUSS_N;                     //Source/Sink smearing hits
  g_arg.gauss_W = sqrt(KAPPA*4*g_arg.gauss_N); //Smearing parameter.

  char is_qu[5];
  #ifdef QUENCH
    GhbArg ghb_arg;
    ghb_arg.num_iter = 1;
    AlgGheatBath hb(lat, &c_arg, &ghb_arg);
    strcpy(is_qu,"QUEN");
  #else
    HmdArg hmd_arg;
    setup_hmd_arg(hmd_arg);
    AlgHmcPhi hmc(lat, &c_arg, &hmd_arg);
    strcpy(is_qu,"UNQU");
  #endif

  int sweep_counter = 0;
  int total_updates = NTHERM + NSKIP*(NDATA-1);

  QPropWArg arg0;
  arg0.t=0;
  arg0.x=0;
  arg0.y=0;
  arg0.z=0;
  arg0.cg.mass = MASS;
  arg0.cg.stop_rsd = STOP_RSD;
  arg0.cg.max_num_iter = MAX_NUM_ITER;
  arg0.cg.Inverter = INVERTER_TYPE;
  arg0.cg.bicgstab_n = BICGSTAB_N;

  int x2[4];
  WilsonMatrix t4;		
  Float d0_t4t4c_re_tr = 0.0;
  int x2_idx = 0;
  int vol3d = pow(NSITES_3D,3);
  char lattice[256]; //lattice config file
  char file[256];  //output file

  //////////////////////
  // Start simulation //
  ////////////////////// 

  while (sweep_counter < total_updates) {
    for (int n = 1; n <= NSKIP; n++) {
#ifdef READ
      //do nothing
#else
      #ifdef QUENCH 
	hb.run();
      #else 
	hmc.run();
      #endif
#endif
      sweep_counter++;
      if (!UniqueID()) {
        printf("step %d complete\n",sweep_counter);
        fflush(stdout);
      }
    }

    if (sweep_counter == NTHERM) printf("thermalization complete. \n");
    if (sweep_counter >= NTHERM) {

      // Use this code to specify a gauge configuration.
      #ifdef QUENCH
        sprintf(lattice, LATT_PATH"QU/lat_hb_B%.2f_%d-%d_%d.dat", BETA, NSITES_3D, NSITES_T, sweep_counter);
      #else
	sprintf(lattice, LATT_PATH"UNQ/lat_hmc_B%.2f_M%.3f_%d-%d_%d.dat", BETA, NSITES_3D, NSITES_T, sweep_counter);
      #endif
#ifdef READ
      ReadLatticeParallel(lat,lattice);
#else
      WriteLatticeParallel(lat,lattice);
#endif
      gaugecounter = 1;

      // Get Point Source Propagator
      // This will place a unit wall source t plane set at the coordinates
      // specified by arg0, modulated by a phase set by P. It will then be
      // smeared using the parameters specified by g_arg.

      //Set the momentum phase.
      int P[3] = {P1,P2,P3};

      //Smear the source using the parameters set by g_arg.
      QPropWMomSrcSmeared qprop0(lat, &arg0, P, &g_arg, &c_arg);
      // Smear the sink with the same g_arg parameters.
      qprop0.GaussSmearSinkProp(g_arg);
      
      //Sum over x2
      for (x2[3]=0; x2[3]<GJP.TnodeSites(); x2[3]++) {
	//Reinitialise trace
	d0_t4t4c_re_tr *= 0.0;	
	for (x2[2]=0; x2[2]<GJP.ZnodeSites(); x2[2]++)
	  for (x2[1]=0; x2[1]<GJP.YnodeSites(); x2[1]++)
	    for (x2[0]=0; x2[0]<GJP.XnodeSites(); x2[0]++) {
	      x2_idx = lat.GsiteOffset(x2)/4;

	      //Get propagator sinked at x2.
	      t4 = qprop0[x2_idx];
	      //Get the real part of the trace.
	      d0_t4t4c_re_tr += MMDag_re_tr(t4);
	    }
	
	//////////////////////////
	// Write trace to file. //
	//////////////////////////
	
	//Write data file so that the data can be reproduced from the name of the file.
	sprintf(file, DATAPATH"MOM_%d%d%d_GPU_%d_B%.2f_M%.3f_N%d_W%.3f_n%d_xi%.2f_1pion_%s_stout_%d-%d.dat",
	P[0], P[1], P[2], seed, BETA, MASS, g_arg.gauss_N, g_arg.gauss_W, g_arg.gauss_link_smear_N, 
	g_arg.gauss_link_smear_coeff, is_qu, NSITES_3D, NSITES_T);
	
	FILE *t4tr=Fopen(file,"a");
	Fprintf(t4tr,"%d %d %d %.16e\n", sweep_counter, x2[3], 0, d0_t4t4c_re_tr);
	Fclose(t4tr);
	cout<<"time slice = "<<x2[3]<<" complete."<<endl;
	
	//////////////////////////////////////////
	// End trace summation at time slice t. //
	//////////////////////////////////////////
	
      }
    }
  }
  
  ////////////////////
  // End simulation //
  ////////////////////
  
  //End();
  endQuda();
  return 0;
}

Пример #4

Показать файл

Файл: 2hadr.mom.9d.cpp Проект: cpviolator/QUDA-CPS

int main(int argc, char *argv[]) {

  Start(&argc,&argv);
  int seed = atoi(argv[1]);         //
  int SINPz_Pz   = atof(argv[2]);   // integer percentage of the tolerance of sin(p)/p at Z.
  int SINPxy_Pxy = atof(argv[3]);   // integer percentage of the tolerance of sin(p)/p at XY.
  //int t_in = atoi(argv[5]);         //

  DoArg do_arg;
  setup_do_arg(do_arg, seed); 
  GJP.Initialize(do_arg);  

  GwilsonFclover lat;
  CommonArg c_arg;

  //Declare args for Gaussian Smearing
  QPropWGaussArg g_arg_mom;
  setup_g_arg(g_arg_mom);


  int sweep_counter = 0;
  int total_updates = NTHERM + NSKIP*(NDATA-1);

  #ifdef QUENCH
  GhbArg ghb_arg;
  ghb_arg.num_iter = 1;
  AlgGheatBath hb(lat, &c_arg, &ghb_arg);
  #else
  HmdArg hmd_arg; 
  setup_hmd_arg(hmd_arg);
  AlgHmcPhi hmc(lat, &c_arg, &hmd_arg); 
  #endif

  //Declare args for source at 0.
  QPropWArg arg_0;
  setup_qpropwarg_cg(arg_0);
  arg_0.x = 0;
  arg_0.y = 0;
  arg_0.z = 0;
  arg_0.t = 0;

  //Declare args for source at z.
  QPropWArg arg_z;
  setup_qpropwarg_cg(arg_z);

  // Propagator calculation objects and memory allocation
  //
  // Using x[4] = X(x,y,z,t)
  //       y[4] = Y(x,y,z,t)
  //       z[4] = Z(x,y,z,t)
  int x[4];
  int y[4];
  int z[4];
  int x_idx4d, x_idx3d, y_idx4d, y_idx3d, z_idx4d, z_idx3d;
  int vol4d = GJP.XnodeSites()*GJP.YnodeSites()*GJP.ZnodeSites()*GJP.TnodeSites();
  int vol3d = GJP.XnodeSites()*GJP.YnodeSites()*GJP.ZnodeSites();
  int xnodes = GJP.XnodeSites();
  int ynodes = GJP.YnodeSites();
  int znodes = GJP.ZnodeSites();
  double norm = pow(vol3d, -0.5);
  
  int max_mom = NSITES_3D;
  mom3D mom(max_mom, SINPz_Pz/(1.0*100));

  int s1 = 0;
  int c1 = 0;
  int s2 = 0;
  int c2 = 0;
  int sc_idx = 0;

  //use t to represent the time slice.
  //int t = 0;

  //In these arrays, we will use the index convention [sink_index + vol3d*source_index]
  WilsonMatrix *t3_arr = (WilsonMatrix*)smalloc(vol3d*vol3d*sizeof(WilsonMatrix));
  WilsonMatrix *t2_arr = (WilsonMatrix*)smalloc(vol3d*vol3d*sizeof(WilsonMatrix));
  //Initialise
  for (int i=0; i<vol3d*vol3d; i++) {
    t3_arr[i]    *= 0.0;
    t2_arr[i]    *= 0.0;
  }

  //Arrays to store the trace data
  fftw_complex *FT_t4 = (fftw_complex*)smalloc(vol3d*sizeof(fftw_complex));
  fftw_complex *FT_t2 = (fftw_complex*)smalloc(vol3d*vol3d*sizeof(fftw_complex));
  fftw_complex *FT_t3 = (fftw_complex*)smalloc(vol3d*vol3d*sizeof(fftw_complex));
  
  //Use this array several times for 9d D0, D1, D2.
  fftw_complex *FT_9d  = (fftw_complex*)smalloc(vol3d*vol3d*vol3d*sizeof(fftw_complex));
  
  //Momentum source array.
  fftw_complex *FFTW_mom_arr  = (fftw_complex*)smalloc(vol3d*sizeof(fftw_complex));
  //Initialise
  for (int i=0; i<vol3d*vol3d*vol3d; i++) {
    for(int a=0; a<2; a++){
      FT_9d[i][a]  = 0.0;    
      if(i<vol3d*vol3d) {
	FT_t3[i][a]  = 0.0;
	FT_t2[i][a]  = 0.0;
      }
      if(i<vol3d) {
	FT_t4[i][a]  = 0.0;
	FFTW_mom_arr[i][a]  = 0.0;
      }
    }
  }
 //gaahhbage 
  FFT_F(9, NSITES_3D, FT_9d);
  FFT_B(9, NSITES_3D, FT_9d);

  FFT_F(6, NSITES_3D, FT_t2);
  FFT_B(6, NSITES_3D, FT_t2);
 
  FFT_F(3, NSITES_3D, FFTW_mom_arr);
  FFT_B(3, NSITES_3D, FFTW_mom_arr); 

  WilsonMatrix t1;
  WilsonMatrix t1c;
  WilsonMatrix t4;
  WilsonMatrix t4c;
  WilsonMatrix t4t1c;
  WilsonMatrix t2t3c;
  WilsonMatrix t3;
  WilsonMatrix t3c;
  WilsonMatrix t2;
  WilsonMatrix t2c;
		
  //Rcomplex mom_src;
  //WilsonMatrix temp;

  Rcomplex t1t1c_tr;
  Rcomplex t4t4c_tr;
  Rcomplex d2_tr;
  Rcomplex t2t2c_tr;
  Rcomplex t3t3c_tr;

  //////////////////////
  // Start simulation //
  ////////////////////// 

  Float *time = (Float*)smalloc(10*sizeof(Float));
  for(int a=0; a<10; a++) time[a] = 0.0;

  char lattice[256];
  
  while (sweep_counter < total_updates) {
    for (int n = 1; n <= NSKIP; n++) {
#ifndef READ
      #ifdef QUENCH
      hb.run(); 
      #else
      hmc.run();
      #endif
#endif
      sweep_counter++;
      if (!UniqueID()) {
        printf("step %d complete\n",sweep_counter);
        fflush(stdout);
      }
    }
    
    if (sweep_counter == NTHERM) {
      printf("thermalization complete. \n");
    }
    if (sweep_counter >= NTHERM) {
      // Use this code to specify a gauge configuration.
      #ifdef QUENCH
        sprintf(lattice, LATT_PATH"QU/lat_hb_B%.2f_%d-%d_%d.dat", BETA, NSITES_3D, NSITES_T, sweep_counter);
      #else
        sprintf(lattice, LATT_PATH"UNQ/lat_hmc_B%.2f_M%.3f_%d-%d_%d.dat", BETA, MASS, NSITES_3D, NSITES_T, sweep_counter);
      #endif
#ifdef READ
      ReadLatticeParallel(lat,lattice);
#else
      WriteLatticeParallel(lat,lattice);
#endif
      
      gaugecounter = 1;
      
      // We will compute two arrays of momentum source propagators.
      // One array is of t2 S(x,z)
      // One array is of t3 S(y,z)
      // Each array will be indexed arr[sink_index + vol*source_index].
      
      // The sources for these arrays are calculated using the backaward FT of momentum states.
      // E.G., momemtum state P_0=(0,0,0) is used to calculated the position space state
      // X_0[n] = \frac{1}{sqrt(V)} * \sum_{m} e^{(-2i*pi/N)*n*m} * P_0[m].
      // This source is then used in the inversion to calculate an propagator M_0. M_0 <P_0|  has,
      // strong overlap with the P_0 state. This is repeated for small momenta (e.g. |P| < 1) and the propagators
      // from each inversion are summed and normalised by the number of momenta used k:
      // M = 1/sqrt(k) sum_k M_k <P_k|  The resulting propagator M has strong overlap with the low momentum states.
      // N.B. One can show that using all possible momenta K, the full propagator matrix is recovered.
      
      // The 0-mom source at the origin is calculated outside the time loop.
      int P0[3] = {0,0,0};
      
      arg_0.t = 0;
      QPropWMomSrcSmeared qprop_0(lat, &arg_0, P0, &g_arg_mom, &c_arg);
      qprop_0.GaussSmearSinkProp(g_arg_mom);
      cout<<"Sink Smear 0 complete."<<endl;
      
      //////////////////////////////////
      // Begin loop over time slices. //
      //////////////////////////////////

      for (int t=0; t<GJP.TnodeSites(); t++) {
	//Reinitialise all propagator arrays.
        for (int i=0; i<vol3d*vol3d; i++) {
	  t2_arr[i]    *= 0.0;
	  t3_arr[i]    *= 0.0;
        }
	
	stopwatchStart();
	
	//Generate momentum source
	int n_mom_srcs    = 0;

	for (mom.P[2] = 0; mom.P[2] < max_mom; mom.P[2]++)
	  for (mom.P[1] = 0; mom.P[1] < max_mom; mom.P[1]++)
	    for (mom.P[0] = 0; mom.P[0] < max_mom; mom.P[0]++) {
	      
	      cout<<"MOM = "<<mom.P[0]<<" "<<mom.P[1]<<" "<<mom.P[2]<<endl;
	      
	      cout<<"NORM_MOM_SZE = "<<mom.mod()/M_PI<<endl;
	      //frac = sin(p)/p
	      Float frac = sin(mom.mod())/(mom.mod());
	      cout<<"SIN(Pz)/Pz = "<<frac<<endl;
	      
	      if(frac > mom.sin_cutoff || (mom.P[0] == 0 && mom.P[1] == 0 && mom.P[2] == 0) ){
		
		//Set momentum
		int P[3] = {mom.P[0], mom.P[1], mom.P[2]};
		
		// The CPS momentum source function uses an unnormalised
		// source, so we take the product of both normalisation
		// factors and place them here on the FFTW_mom_arr.
		// A further normalisation to perform comes from the number n_mom_srcs
		// of momentum sources. This is done later in when the trace of
		// of the propagators is caculated.
		
		
		//Get Momentum Propagator
		arg_z.t = t;
		//QPropWMomSrc qprop_mom(lat, &arg_z, P, &c_arg);
		QPropWMomSrcSmeared qprop_mom(lat, &arg_z, P, &g_arg_mom, &c_arg);
		cout<<"Inversion "<<(n_mom_srcs+1)<<" complete."<<endl;
		qprop_mom.GaussSmearSinkProp(g_arg_mom);
		cout<<"Sink Smear "<<(n_mom_srcs+1)<<" complete."<<endl;
		
		int z_idx4d, z_idx3d, x_idx4d, x_idx3d, y_idx4d, y_idx3d;
		//Loop over sources at z.
		z[3] = t;
		for (z[2]=0; z[2]<znodes; z[2]++)
		  for (z[1]=0; z[1]<ynodes; z[1]++)
		    for (z[0]=0; z[0]<xnodes; z[0]++) {
		      z_idx4d = lat.GsiteOffset(z)/4;
		      z_idx3d = z_idx4d - vol3d*z[3];
		      
		      cout<<"mom_src "<<qprop_mom.mom_src(z_idx4d)<<endl;
		      
		      //Loop over sinks at x.
		      x[3] = 0;
		      for (x[2]=0; x[2]<znodes; x[2]++)
			for (x[1]=0; x[1]<ynodes; x[1]++)
			  for (x[0]=0; x[0]<xnodes; x[0]++) {
			    x_idx4d = lat.GsiteOffset(x)/4;
			    x_idx3d = x_idx4d - vol3d*x[3];
			    
			    //Build t2 array.
			    t2_arr[x_idx3d + vol3d*z_idx3d] += qprop_mom[x_idx4d]*conj(qprop_mom.mom_src(z_idx4d));
			  }
		      
		      //Loop over sinks at y.
		      y[3] = t;
		      for (y[2]=0; y[2]<znodes; y[2]++)
			for (y[1]=0; y[1]<ynodes; y[1]++)
			  for (y[0]=0; y[0]<xnodes; y[0]++) {
			    y_idx4d = lat.GsiteOffset(y)/4;
			    y_idx3d = y_idx4d - vol3d*y[3];
			    
			    //Build t3 array.
			    t3_arr[y_idx3d + vol3d*z_idx3d] += qprop_mom[y_idx4d]*conj(qprop_mom.mom_src(z_idx4d));
			  }
		    }
		n_mom_srcs++; 
		cout << "momentum sources: "<<1+mom.P[2]*max_mom*max_mom + mom.P[1]*max_mom + mom.P[0]<<" / "<<pow(max_mom,3)<<" checked"<<endl;
	      }
	    }
	
	cout<<"FLAG 1"<<endl;
	//inversions + fill      
	time[1] = stopwatchReadSeconds();
	stopwatchStart();
	
	//////////////////////////////////////////////////////////////////
	// End momentum source propagator calculation for time slice t. //
	//////////////////////////////////////////////////////////////////
	
	
	///////////////////////////////////////////////
	// Begin summation of trace at time slice t. //
	///////////////////////////////////////////////
	      
	// The t1, t1c, t4, and t4c propagators are calculated 'on the fly'
	// within the trace summation.
      
	//Reinitialise all trace variables
	
	t1  *= 0.0;
	t1c *= 0.0;
	t2  *= 0.0;
	t2c *= 0.0;
	t3  *= 0.0;
	t3c *= 0.0;
	t4  *= 0.0;
	t4c *= 0.0;
	t4t1c *= 0.0;
	t2t3c *= 0.0;
      
	t1t1c_tr *= 0.0;
	t2t2c_tr *= 0.0;
	t3t3c_tr *= 0.0;
	t4t4c_tr *= 0.0;
	d2_tr *= 0.0;
            
	for (int i=0; i<vol3d*vol3d*vol3d; i++) 
	  for(int a=0; a<2; a++) {
	    FT_9d[i][a] = 0.0;
	    if(i<vol3d*vol3d) {
	      FT_t3[i][a] = 0.0;
	      FT_t2[i][a] = 0.0;
	    }
	    if(i<vol3d) {
	      FT_t4[i][a] = 0.0;
	    }
	  }
	//Sum over X
	x[3] = 0;
	for (x[2]=0; x[2]<znodes; x[2]++)
	  for (x[1]=0; x[1]<ynodes; x[1]++)
	    for (x[0]=0; x[0]<xnodes; x[0]++) {
	      x_idx4d = lat.GsiteOffset(x)/4;
	      x_idx3d = x_idx4d - vol3d*x[3];
	      
	      t1 = qprop_0[x_idx4d];
	      t1c = t1.conj_cp();
	      
	      //Sum over Y
	      y[3] = t;
	      for (y[2]=0; y[2]<znodes; y[2]++)
		for (y[1]=0; y[1]<ynodes; y[1]++)
		  for (y[0]=0; y[0]<xnodes; y[0]++) {
		    y_idx4d = lat.GsiteOffset(y)/4;
		    y_idx3d = y_idx4d - vol3d*y[3];
		    
		    t4 = qprop_0[y_idx4d];
		  
		    // Use this condition so that t4t4c is calculated only once
		    // over X per time slice.
		    if (x_idx3d == 0) {
		      //Perform t4t4c trace sum for D0 graph.
		      FT_t4[y_idx3d][0] = MMDag_re_tr(t4);
		      FT_t4[y_idx3d][1] = 0.0;
		    }
		    
		    //Declare new Wilson Matrix t4*t1c for D2 and compute
		    t4t1c = t4;
		    t4t1c *= t1c;
		    
		    //Sum over Z.
		    z[3] = t;
		    for (z[2]=0; z[2]<znodes; z[2]++)
		      for (z[1]=0; z[1]<ynodes; z[1]++)
			for (z[0]=0; z[0]<xnodes; z[0]++) {
			  z_idx4d = lat.GsiteOffset(z)/4;
			  z_idx3d = z_idx4d - vol3d*z[3];
			  
			  //Declare new Wilson Matrix t2*t3c and compute it.			  
			  t2t3c = t2_arr[x_idx3d + vol3d*z_idx3d];
			  t3c   = t3_arr[y_idx3d + vol3d*z_idx3d].conj_cp();
			  t2t3c *= t3c;
			  
			  //Perform t4t1c * t2t3c trace sum for D2 graph.
			  d2_tr = Trace(t4t1c, t2t3c);
			  
			  //Create 9d array for D2.			  
			  FT_9d[x_idx3d + vol3d*(y_idx3d + vol3d*z_idx3d)][0] = d2_tr.real();
			  FT_9d[x_idx3d + vol3d*(y_idx3d + vol3d*z_idx3d)][1] = d2_tr.imag();
			  
			  ///////////////////////////////////////////////////////////////////
			  // Use this condition so that t2t2c is calculated only over
			  // x1 and x3 loops per time slice. 
			  if (y_idx3d == 0) {
			    //Retrieve propagators for t2t2c trace sum.
			    FT_t2[x_idx3d + vol3d*z_idx3d][0] = MMDag_re_tr(t2_arr[x_idx3d + vol3d*z_idx3d]);
			    FT_t2[x_idx3d + vol3d*z_idx3d][1] = 0.0;
			  }
			  // Use this condition so that t3t3c is calculated only over
			  // x2 and x3 loops per time slice. 
			  if (x_idx3d == 0) {
			    
			    //Retrieve propagators for t3t3c trace sum.
			    FT_t3[y_idx3d + vol3d*z_idx3d][0] = MMDag_re_tr(t3_arr[y_idx3d + vol3d*z_idx3d]);
			    FT_t3[y_idx3d + vol3d*z_idx3d][1] = 0.0;
			  }
			  ///////////////////////////////////////////////////////////////////
			}
		  }
	    }
	
	//Fill the trace arrays
	time[2] = stopwatchReadSeconds();
	
	cout<<"FLAG 3"<<endl;

	///////////////////////////////////////////////
	// Write traces to file for post-processing. //
	///////////////////////////////////////////////
      
	char file[256];
  	FFT_F(6, NSITES_3D, FT_t2);
	FFT_F(6, NSITES_3D, FT_t3);
	FFT_F(3, NSITES_3D, FT_t4);

	// if(t==0) {    
	// sprintf(file, "%d-%d_3-0.1_msmsFT_6d_data/t1t1c_TR_%d_%d-%d_%d_%d.dat",  NSITES_3D, NSITES_T, n_mom_srcs, NSITES_3D, NSITES_T, sweep_counter, t);	  
	// FILE *qt1tr   = Fopen(file, "a");
	// for(int snk =0; snk<vol3d; snk++) {
	// Fprintf(qt1tr,  "%d %d %d %.16e %.16e\n", sweep_counter, t, snk, FT_t4[snk][0], FT_t4[snk][1]);
	// }
	// Fclose(qt1tr);
	// }
	
	sprintf(file, DATAPATH"t4t4c_TR_%d_%d-%d_%d_%d.dat",  n_mom_srcs, NSITES_3D, NSITES_T, sweep_counter, t);
	FILE *qt4tr   = Fopen(file, "a");
	for(int snk =0; snk<vol3d; snk++) {
	  Fprintf(qt4tr,  "%d %d %d %.16e %.16e\n", sweep_counter, t, snk, FT_t4[snk][0], FT_t4[snk][1]);
	}
	
	sprintf(file, DATAPATH"t2t2c_TR_%d_%d-%d_%d_%d.dat",  n_mom_srcs, NSITES_3D, NSITES_T, sweep_counter, t);
	FILE *qt2tr   = Fopen(file, "a");
	sprintf(file, DATAPATH"t3t3c_TR_%d_%d-%d_%d_%d.dat",  n_mom_srcs, NSITES_3D, NSITES_T, sweep_counter, t);
	FILE *qt3tr   = Fopen(file, "a");
	
	for(int src =0; src<vol3d; src++) {
	  for(int snk =0; snk<vol3d; snk++) {
	    Fprintf(qt2tr,"%d %d %d %d %.16e %.16e\n", sweep_counter, t, src, snk, FT_t2[snk + vol3d*src][0], FT_t2[snk + vol3d*src][1]);
	    Fprintf(qt3tr,"%d %d %d %d %.16e %.16e\n", sweep_counter, t, src, snk, FT_t3[snk + vol3d*src][0], FT_t3[snk + vol3d*src][1]);
	  }
	}
	

	Fclose(qt2tr);
	Fclose(qt3tr);
	Fclose(qt4tr);

	//////////////////////////
	// FFT the 9D D2 array. //
	//////////////////////////
      
	stopwatchStart();
	
	FFT_F(9, NSITES_3D, FT_9d);
	//time for D2 6d FFT
	time[4] = stopwatchReadSeconds();      
	//wtf == 'write to file', include/FFTW_functions.cpp
	FFT_wtf_ZYX(FT_9d, 2, SINPz_Pz, SINPxy_Pxy, n_mom_srcs, NSITES_3D, NSITES_T, sweep_counter, t);
	
	//sprintf(file, "T_data/times_%d-%d_%d_%d.dat", NSITES_3D, NSITES_T, sweep_counter, t);
	//FILE *time_fp = Fopen(file, "a");
	//Fprintf(time_fp, "%.4f %.4f %.4f %.4f\n", time[1], time[2], time[3], time[4]);
	//Fclose(time_fp); 
	
	//////////////////////////////////////////
	// End trace summation at time slice t. //
	//////////////////////////////////////////
      }
    }
  }
  ////////////////////
  // End simulation //
  ////////////////////
  
  sfree(t2_arr);
  sfree(t3_arr);

  //sfree(FT_t1);
  sfree(FT_t4);
  sfree(FT_t2);
  sfree(FT_t3);
  sfree(FT_9d);  


  sfree(time);

  //End();
  return 0;
}