Пример #1
0
void TMVAtest(){
  //gSystem->Load("../lib/slc5_amd64_gcc462/libTAMUWWMEPATNtuple.so");
  gSystem->Load("libPhysics");
  //gSystem->Load("EvtTreeForAlexx_h.so");
  gSystem->Load("libTMVA.1");
  gSystem->Load("AutoDict_vector_TLorentzVector__cxx.so");
  TMVA::Tools::Instance();
  TFile* outputFile = TFile::Open("TMVA1.root", "RECREATE");
  TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification",outputFile,"V=true:Color:DrawProgressBar");// ":Transformations=I;D;P;G,D" );
  TFile* signal = TFile::Open("/uscms_data/d2/aperloff/Spring12ME7TeV/MEResults/microNtuples_oldStructure/microWW_EPDv01.root");
  TFile* bkg = TFile::Open("/uscms_data/d2/aperloff/Spring12ME7TeV/MEResults/microNtuples_oldStructure/microWJets_EPDv01.root");

  TTree* stree = (TTree*)signal->Get("METree");
  TTree* btree = (TTree*)bkg->Get("METree");
  factory->AddSignalTree(stree,1.0);
  factory->AddBackgroundTree(btree,1.0);


  factory->SetSignalWeightExpression("1.0");
  factory->SetBackgroundWeightExpression("1.0");
  factory->AddVariable("tEventProb[0]");
  factory->AddVariable("tEventProb[1]");
  factory->AddVariable("tEventProb[2]");

  //factory->AddVariable("tEventProb0 := tEventProb[0]",'F');
  //factory->AddVariable("tEventProb1 := tEventProb[1]",'F');
  //factory->AddVariable("tEventProb2 := tEventProb[2]",'F');
  TCut test("Entry$>-2 && jLV[1].Pt()>30");
  TCut mycuts (test);
  factory->PrepareTrainingAndTestTree(mycuts,mycuts,"nTrain_Signal=0:nTrain_Background=0:nTest_Signal=0:nTest_Background=0:SplitMode=Random:NormMode=None:V=true:VerboseLevel=DEBUG");
  factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );
  factory->TrainAllMethods();
  factory->TestAllMethods();
  factory->EvaluateAllMethods();
  outputFile->Close(); 

}
Пример #2
0
void TMVA_stop( TString signal_name = "T2tt", int train_region = 1, float x_parameter = 0.25)
{
   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVA_stop.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //
   // if you like to use a method via the plugin mechanism, we recommend using
   //
   // mylinux~> root -l TMVA_stop.C\(\"P_myMethod\"\)
   // (an example is given for using the BDT as plugin (see below),
   // but of course the real application is when you write your own
   // method based)

  //-----------------------------------------------------
  // define event selection (store in TCut sel)
  //-----------------------------------------------------

  TCut njets4("mini_njets>=4");
  TCut met100("mini_met>=100");
  TCut mt120("mini_mt>=120");
  TCut nb1("mini_nb>=1");
  TCut isotrk("mini_passisotrk==1");
  TCut lep_pt30("mini_nlep>=1 && mini_lep1pt>30.0");
  TCut sig("mini_sig==1");
   
  TCut  sel0  = njets4 + met100 + mt120 + nb1 + isotrk + lep_pt30 + sig;

  cout << "Using selection      : " << sel0.GetTitle() << endl;
  cout << "Doing signal point   : " << train_region       << endl;

  //-----------------------------------------------------
  // choose which variables to include in MVA training
  //-----------------------------------------------------
  
  std::map<std::string,int> mvaVar;
  mvaVar[ "met" ]			= 1;
  mvaVar[ "lep1pt" ]  	    = 0;
  mvaVar[ "mt2w" ]	  		= 1;
  mvaVar[ "htratiom" ]	    = 1;
  mvaVar[ "chi2" ]	        = 1;
  mvaVar[ "dphimjmin" ]		= 1;
  mvaVar[ "pt_b" ]			= 0;
  mvaVar[ "nb" ]			= 0;
  mvaVar[ "pt_J1" ]			= 0;
  mvaVar[ "pt_J2" ]			= 0;
  mvaVar[ "rand" ]			= 0;

  mvaVar[ "mt" ]			= 0;
  mvaVar[ "mt2bl" ]			= 0;
  mvaVar[ "mt2b" ]			= 0;
  mvaVar[ "lep1eta" ]			= 0;
  mvaVar[ "thrjetlm" ]			= 0;
  mvaVar[ "apljetlm" ]			= 0;
  mvaVar[ "sphjetlm" ]			= 0;
  mvaVar[ "cirjetlm" ]			= 0;
  mvaVar[ "chi2min" ]			= 0;
  mvaVar[ "chi2min_mt2b" ]		= 0;
  mvaVar[ "chi2min_mt2bl" ]		= 0;
  mvaVar[ "chi2min_mt2w" ]		= 0;
  mvaVar[ "mt2bmin" ]			= 0;
  mvaVar[ "mt2blmin" ]			= 0;
  mvaVar[ "mt2wmin_chi2" ]		= 0;
  mvaVar[ "mt2bmin_chi2" ]		= 0;
  mvaVar[ "mt2blmin_chi2" ]		= 0;
  mvaVar[ "mt2wmin_chi2prob" ]		= 0;
  mvaVar[ "mt2bmin_chi2prob" ]		= 0;
  mvaVar[ "mt2blmin_chi2prob" ]		= 0;
  mvaVar[ "htratiol" ]              	= 0;
  mvaVar[ "dphimj1" ]			= 0;
  mvaVar[ "dphimj2" ]			= 0;
  mvaVar[ "metsig" ]			= 0;

  //---------------------------------
  //choose bkg samples to include
  //---------------------------------
  cout << "Background trees: " << endl;
  int n_backgrounds = 8;

  TString backgrounds[] = {"ttdl_powheg", "ttsl_powheg", "w1to4jets", "tW_lep", "triboson", "diboson", "ttV", "DY1to4Jtot" };

  TString bkgPath = "/nfs-3/userdata/stop/Train/V00-02-18__V00-03-00_4jetsMET100_bkg/";

  TChain* chBackground = new TChain("t");
 
  for (int i = 0; i < n_backgrounds; i++) {
     TString backgroundChain = bkgPath + "/" + backgrounds[i] + ".root";
     cout << "    " << backgroundChain << endl;
     chBackground ->Add(backgroundChain );
  }

  //---------------------------------
  //choose signal sample to include
  //---------------------------------
  cout << "Signal trees: " << endl;
  TString s_train_region = "";
  s_train_region += train_region;
  TString s_x_parameter = "";
  s_x_parameter = Form("%.2f",x_parameter);

  TString signalPath = "/nfs-3/userdata/stop/Train/";
  TString signalVersion = "V00-02-18__V00-03-00_4jetsMET100_";

  TChain *chSignal = new TChain("t");

  TString base_name = signalPath + "/" + signalVersion + signal_name + "/" + signal_name + "_" + s_train_region;
  if (signal_name == "T2bw") base_name = base_name + "_" + s_x_parameter;
  TString signalChain  = base_name + ".root" ;

  cout << "    " << signalChain << endl;

  chSignal->Add(signalChain);

  //-----------------------------------------------------
  // choose backgrounds to include for multiple outputs
  //-----------------------------------------------------
  
  // bool doMultipleOutputs = false;

  // TChain *chww = new TChain("Events");
  // chww->Add(Form("%s/WWTo2L2Nu_PU_testFinal_baby.root",babyPath));
  // chww->Add(Form("%s/GluGluToWWTo4L_PU_testFinal_baby.root",babyPath));
  
  // TChain *chwjets = new TChain("Events");
  // chwjets->Add(Form("%s/WJetsToLNu_PU_testFinal_baby.root",babyPath));
  
  // TChain *chtt = new TChain("Events");
  // chtt->Add(Form("%s/TTJets_PU_testFinal_baby.root",babyPath));
  
  // std::map<std::string,int> includeBkg;
  // includeBkg["ww"]      = 1;
  // includeBkg["wjets"]   = 0;
  // includeBkg["tt"]      = 0;

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   // --- Cut optimisation
   Use["Cuts"]            = 0;
   Use["CutsD"]           = 0;
   Use["CutsPCA"]         = 0;
   Use["CutsGA"]          = 0;
   Use["CutsSA"]          = 0;
   // 
   // --- 1-dimensional likelihood ("naive Bayes estimator")
   Use["Likelihood"]      = 0;
   Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
   Use["LikelihoodPCA"]   = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
   Use["LikelihoodKDE"]   = 0;
   Use["LikelihoodMIX"]   = 0;
   //
   // --- Mutidimensional likelihood and Nearest-Neighbour methods
   Use["PDERS"]           = 0;
   Use["PDERSD"]          = 0;
   Use["PDERSPCA"]        = 0;
   Use["PDEFoam"]         = 0;
   Use["PDEFoamBoost"]    = 0; // uses generalised MVA method boosting
   Use["KNN"]             = 0; // k-nearest neighbour method
   //
   // --- Linear Discriminant Analysis
   Use["LD"]              = 0; // Linear Discriminant identical to Fisher
   Use["Fisher"]          = 0;
   Use["FisherG"]         = 0;
   Use["BoostedFisher"]   = 0; // uses generalised MVA method boosting
   Use["HMatrix"]         = 0;
   //
   // --- Function Discriminant analysis
   Use["FDA_GA"]          = 0; // minimisation of user-defined function using Genetics Algorithm
   Use["FDA_SA"]          = 0;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   Use["FDA_MCMT"]        = 0;
   //
   // --- Neural Networks (all are feed-forward Multilayer Perceptrons)
   Use["MLP"]             = 0; // Recommended ANN
   Use["MLPBFGS"]         = 0; // Recommended ANN with optional training method
   Use["MLPBNN"]          = 0; // Recommended ANN with BFGS training method and bayesian regulator
   Use["CFMlpANN"]        = 0; // Depreciated ANN from ALEPH
   Use["TMlpANN"]         = 0; // ROOT's own ANN
   //
   // --- Support Vector Machine 
   Use["SVM"]             = 0;
   // 
   // --- Boosted Decision Trees
   Use["BDT"]             = 1; // uses Adaptive Boost
   Use["BDT1"]            = 0; // uses Adaptive Boost
   Use["BDTG"]            = 0; // uses Gradient Boost
   Use["BDTB"]            = 0; // uses Bagging
   Use["BDTD"]            = 0; // decorrelation + Adaptive Boost
   // 
   // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
   Use["RuleFit"]         = 0;
   //
   // --- multi-output MVA's
   Use["multi_BDTG"]      = 0;
   Use["multi_MLP"]       = 0;
   Use["multi_FDA_GA"]    = 0;

   //
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;

   // --- Here the preparation phase begins

   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   TString outfileName = "TMVA_" + signal_name + "_" + s_train_region;
   if (signal_name == "T2bw") outfileName = outfileName +"_" + s_x_parameter;
   outfileName += ".root";
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   TString classification_name = "classification_" + signal_name + "_" + s_train_region;
   if (signal_name == "T2bw") classification_name = classification_name +"_" + s_x_parameter;

   /*
   TString multioutfileName( "TMVA_HWW_multi.root" );
   TFile* multioutputFile;

   if( doMultipleOutputs )
     multioutputFile = TFile::Open( multioutfileName, "RECREATE" );
   */

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory is 
   // the only TMVA object you have to interact with
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( classification_name, outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );
   /*
   TMVA::Factory *multifactory;
   if( doMultipleOutputs )
     multifactory= new TMVA::Factory( "TMVAMulticlass", multioutputFile,
                                      "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=multiclass" );
   */
   
   // If you wish to modify default settings
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
   //factory->AddVariable( "myvar1 := var1+var2", 'F' );
   //factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' );
   //factory->AddVariable( "var3",                "Variable 3", "units", 'F' );
   //factory->AddVariable( "var4",                "Variable 4", "units", 'F' );

   //--------------------------------------------------------
   // choose which variables to include in training
   //--------------------------------------------------------

   if( mvaVar[ "met"           ]  == 1 ) factory->AddVariable( "mini_met"                    ,  "E_{T}^{miss}"               ,       "GeV", 'F' );
   if( mvaVar[ "mt"            ]  == 1 ) factory->AddVariable( "mini_mt"                     ,  "M_{T}"                      ,       "GeV", 'F' );
   if( mvaVar[ "mt2w"          ]  == 1 ) factory->AddVariable( "mini_mt2w"                   ,  "MT2W"                       ,       "GeV", 'F' );
   if( mvaVar[ "mt2bl"         ]  == 1 ) factory->AddVariable( "mini_mt2bl"                  ,  "MT2bl"                      ,       "GeV", 'F' );
   if( mvaVar[ "mt2b"          ]  == 1 ) factory->AddVariable( "mini_mt2b"                   ,  "MT2b"                       ,       "GeV", 'F' );
   if( mvaVar[ "chi2"          ]  == 1 ) factory->AddVariable( "mini_chi2"                   ,  "chi2"                       ,       ""   , 'F' );
   if( mvaVar[ "lep1pt"        ]  == 1 ) factory->AddVariable( "mini_lep1pt"                 ,  "lepton pt"                  ,       ""   , 'F' );
   if( mvaVar[ "lep1eta"       ]  == 1 ) factory->AddVariable( "mini_lep1eta"                ,  "lepton eta"                 ,       ""   , 'F' );
   if( mvaVar[ "thrjetlm"      ]  == 1 ) factory->AddVariable( "mini_thrjetlm"               ,  "thrust"                     ,       ""   , 'F' );
   if( mvaVar[ "apljetlm"      ]  == 1 ) factory->AddVariable( "mini_apljetlm"               ,  "aplanarity"                 ,       ""   , 'F' );
   if( mvaVar[ "sphjetlm"      ]  == 1 ) factory->AddVariable( "mini_sphjetlm"               ,  "sphericity"                 ,       ""   , 'F' );
   if( mvaVar[ "cirjetlm"      ]  == 1 ) factory->AddVariable( "mini_cirjetlm"               ,  "circularity"                ,       ""   , 'F' );
   if( mvaVar[ "chi2min"       ]  == 1 ) factory->AddVariable( "mini_min(chi2min,100)"       ,  "#chi^{2}_{min}"             ,       ""   , 'F' );
   if( mvaVar[ "chi2minprob"   ]  == 1 ) factory->AddVariable( "mini_chi2minprob"            ,  "Prob(#chi^{2}_{min})"       ,       ""   , 'F' );
   if( mvaVar[ "chi2min_mt2b"  ]  == 1 ) factory->AddVariable( "mini_chi2min_mt2b"           ,  "MT2b(#chi^{2}_{min})"       ,       ""   , 'F' );
   if( mvaVar[ "chi2min_mt2bl" ]  == 1 ) factory->AddVariable( "mini_chi2min_mt2bl"          ,  "MT2bl(#chi^{2}_{min})"      ,       ""   , 'F' );
   if( mvaVar[ "chi2min_mt2w"  ]  == 1 ) factory->AddVariable( "mini_chi2min_mt2w"           ,  "MT2W(#chi^{2}_{min})"       ,       ""   , 'F' );
   if( mvaVar[ "mt2bmin"       ]  == 1 ) factory->AddVariable( "mini_mt2bmin"                ,  "MT2b_{min}"                 ,       ""   , 'F' );
   if( mvaVar[ "mt2blmin"      ]  == 1 ) factory->AddVariable( "mini_mt2blmin"               ,  "MT2bl_{min}"                ,       ""   , 'F' );
   if( mvaVar[ "mt2wmin"       ]  == 1 ) factory->AddVariable( "mini_mt2wmin"                ,  "MT2W_{min}"                 ,       ""   , 'F' );
   if( mvaVar[ "mt2bmin_chi2"  ]  == 1 ) factory->AddVariable( "min(mt2bmin_chi2,100)"  ,  "#chi^{2}(MT2b_{min})"       ,       ""   , 'F' );
   if( mvaVar[ "mt2blmin_chi2" ]  == 1 ) factory->AddVariable( "min(mt2blmin_chi2,100)" ,  "#chi^{2}(MT2bl_{min})"      ,       ""   , 'F' );
   if( mvaVar[ "mt2wmin_chi2"  ]  == 1 ) factory->AddVariable( "min(mt2wmin_chi2,100)"  ,  "#chi^{2}(MT2W_{min})"       ,       ""   , 'F' );
   if( mvaVar[ "mt2bmin_chi2prob"  ]  == 1 ) factory->AddVariable( "mt2bmin_chi2prob"   ,  "Prob(#chi^{2}(MT2b_{min}))"       ,       ""   , 'F' );
   if( mvaVar[ "mt2blmin_chi2prob" ]  == 1 ) factory->AddVariable( "mt2blmin_chi2prob"  ,  "Prob(#chi^{2}(MT2bl_{min}))"      ,       ""   , 'F' );
   if( mvaVar[ "mt2wmin_chi2prob"  ]  == 1 ) factory->AddVariable( "mt2wmin_chi2prob"   ,  "Prob(#chi^{2}(MT2W_{min}))"       ,       ""   , 'F' );
   if( mvaVar[ "htratiol"      ]  == 1 ) factory->AddVariable( "mini_htssl/(mini_htosl+mini_htssl)"    ,  "H_{T}^{SSL}/H_{T}"          ,       ""   , 'F' );
   if( mvaVar[ "htratiom"      ]  == 1 ) factory->AddVariable( "mini_htssm/(mini_htosm+mini_htssm)"    ,  "H_{T}^{SSM}/H_{T}"          ,       ""   , 'F' );
   if( mvaVar[ "dphimj1"       ]  == 1 ) factory->AddVariable( "mini_dphimj1"                ,  "#Delta#phi(j1,E_{T}^{miss})",       ""   , 'F' );
   if( mvaVar[ "dphimj2"       ]  == 1 ) factory->AddVariable( "mini_dphimj2"                ,  "#Delta#phi(j2,E_{T}^{miss})",       ""   , 'F' );
   if( mvaVar[ "dphimjmin"     ]  == 1 ) factory->AddVariable( "mini_dphimjmin"              ,  "min(#Delta#phi(j_{1,2},E_{T}^{miss}))",       ""   , 'F' );
   if( mvaVar[ "rand"          ]  == 1 ) factory->AddVariable( "mini_rand"                   ,  "random(0,1)"                ,       ""   , 'F' );
   if( mvaVar[ "metsig"        ]  == 1 ) factory->AddVariable( "met/sqrt(htosl+htssl)"  ,  "E_{T}^{miss}/#sqrt{H_{T}}"  ,       "#sqrt{GeV}"   , 'F' )
;
   if( mvaVar[ "pt_b"          ]  == 1 ) factory->AddVariable( "mini_pt_b"  ,       "P_T(b) GeV"   , 'F' );
   if( mvaVar[ "nb"            ]  == 1 ) factory->AddVariable( "mini_nb"  ,       "P_T(b) GeV"   , 'F' );
   if( mvaVar[ "pt_J1"          ]  == 1 ) factory->AddVariable( "pt_J1"  ,       "P_T(J1) GeV"   , 'F' );
   if( mvaVar[ "pt_J2"          ]  == 1 ) factory->AddVariable( "pt_J2"  ,       "P_T(J2) GeV"   , 'F' );
   
   /*
   if( doMultipleOutputs ){
     if (mvaVar["lephard_pt"])       multifactory->AddVariable( "lephard_pt",                 "1st lepton pt",                "GeV", 'F' );
     if (mvaVar["lepsoft_pt"])       multifactory->AddVariable( "lepsoft_pt",                 "2nd lepton pt",                "GeV", 'F' );
     if (mvaVar["dil_dphi"])         multifactory->AddVariable( "dil_dphi",                   "dphi(ll)",                     "",    'F' );
     if (mvaVar["dil_mass"])         multifactory->AddVariable( "dil_mass",                   "M(ll)",                        "GeV", 'F' );
     if (mvaVar["event_type"])       multifactory->AddVariable( "event_type",                 "Dil Flavor Type",              "",    'F' );
     if (mvaVar["met_projpt"])       multifactory->AddVariable( "met_projpt",                 "Proj. MET",                    "GeV", 'F' );
     if (mvaVar["met_pt"])           multifactory->AddVariable( "met_pt",                     "MET",                          "GeV", 'F' );
     if (mvaVar["mt_lephardmet"])    multifactory->AddVariable( "mt_lephardmet",              "MT(lep1,MET)",                 "GeV", 'F' );
     if (mvaVar["mt_lepsoftmet"])    multifactory->AddVariable( "mt_lepsoftmet",              "MT(lep2,MET)",                 "GeV", 'F' );
     if (mvaVar["mthiggs"])          multifactory->AddVariable( "mthiggs",                    "MT(Higgs)",                    "GeV", 'F' );
     if (mvaVar["dphi_lephardmet"])  multifactory->AddVariable( "dphi_lephardmet",            "dphi(lep1,MET)",               "GeV", 'F' );
     if (mvaVar["dphi_lepsoftmet"])  multifactory->AddVariable( "dphi_lepsoftmet",            "dphi(lep2,MET)",               "GeV", 'F' );
     if (mvaVar["lepsoft_fbrem"])    multifactory->AddVariable( "lepsoft_fbrem",              "2nd lepton f_{brem}",          "",    'F' );
     if (mvaVar["lepsoft_eOverPIn"]) multifactory->AddVariable( "lepsoft_eOverPIn",           "2nd lepton E/p",               "",    'F' );
     if (mvaVar["lepsoft_qdphi"])    multifactory->AddVariable( "lepsoft_q * lepsoft_dPhiIn", "2nd lepton q#times#Delta#phi", "",    'F' );
   }
   */

   // You can add so-called "Spectator variables", which are not used in the MVA training,
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
   // input variables, the response values of all trained MVAs, and the spectator variables
   //factory->AddSpectator( "spec1 := var1*2",  "Spectator 1", "units", 'F' );
   //factory->AddSpectator( "spec2 := var1*3",  "Spectator 2", "units", 'F' );

//   TTree* signalTrainingTree =  (TTree*) chSignalTrain;
//   TTree* signalTestTree =  (TTree*) chSignalTest;
//
//   TTree* bkgTrainingTree =  (TTree*) chBkgTrain;
//   TTree* bkgTestTree =  (TTree*) chBkgTest;
   
//    std::cout << "--- TMVAClassification       : Using bkg input files: -------------------" <<  std::endl;
// 
//    TObjArray *listOfBkgFiles = chbackground->GetListOfFiles();
//    TIter bkgFileIter(listOfBkgFiles);
//    TChainElement* currentBkgFile = 0;
// 
//    while((currentBkgFile = (TChainElement*)bkgFileIter.Next())) {
//      std::cout << currentBkgFile->GetTitle() << std::endl;
//    }
// 
//    std::cout << "--- TMVAClassification       : Using sig input files: -------------------" <<  std::endl;
//    
//    TObjArray *listOfSigFiles = chsignal->GetListOfFiles();
//    TIter sigFileIter(listOfSigFiles);
//    TChainElement* currentSigFile = 0;
// 
//    while((currentSigFile = (TChainElement*)sigFileIter.Next())) {
//      std::cout << currentSigFile->GetTitle() << std::endl;
//    }

   // global event weights per tree (see below for setting event-wise weights)
   Double_t signalWeight     = 1.0;
   Double_t backgroundWeight = 1.0;

   // You can add an arbitrary number of signal or background trees
//   factory->AddSignalTree    ( chSignal,     signalWeight     );
//   factory->AddBackgroundTree( chBackground, backgroundWeight );

   factory->AddTree(chSignal, "Signal", signalWeight, sel0+"mini_rand < 0.5", "train");
   factory->AddTree(chSignal, "Signal", signalWeight, sel0+"mini_rand >= 0.5", "test");
   factory->AddTree(chBackground, "Background", backgroundWeight, sel0+"mini_rand < 0.5", "train");
   factory->AddTree(chBackground, "Background", backgroundWeight, sel0+"mini_rand >= 0.5", "test");
   
   // To give different trees for training and testing, do as follows:
   //factory->AddSignalTree( signalTrainingTree, signalWeight, "Training" );
   //factory->AddSignalTree( signalTestTree,     signalWeight,  "Test" );

   //factory->AddBackgroundTree( bkgTrainingTree, backgroundWeight, "Training" );
   //factory->AddBackgroundTree( bkgTestTree,     backgroundWeight,  "Test" );
   
   // Use the following code instead of the above two or four lines to add signal and background
   // training and test events "by hand"
   // NOTE that in this case one should not give expressions (such as "var1+var2") in the input
   //      variable definition, but simply compute the expression before adding the event
   //
   //     // --- begin ----------------------------------------------------------
   //     std::vector<Double_t> vars( 4 ); // vector has size of number of input variables
   //     Float_t  treevars[4], weight;
   //     
   //     // Signal
   //     for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
   //     for (UInt_t i=0; i<signal->GetEntries(); i++) {
   //        signal->GetEntry(i);
   //        for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
   //        // add training and test events; here: first half is training, second is testing
   //        // note that the weight can also be event-wise
   //        if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight );
   //        else                              factory->AddSignalTestEvent    ( vars, signalWeight );
   //     }
   //   
   //     // Background (has event weights)
   //     background->SetBranchAddress( "weight", &weight );
   //     for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
   //     for (UInt_t i=0; i<background->GetEntries(); i++) {
   //        background->GetEntry(i);
   //        for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
   //        // add training and test events; here: first half is training, second is testing
   //        // note that the weight can also be event-wise
   //        if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight );
   //        else                                factory->AddBackgroundTestEvent    ( vars, backgroundWeight*weight );
   //     }
   //      // --- end ------------------------------------------------------------
   //
   // --- end of tree registration 
   
   // Set individual event weights (the variables must exist in the original TTree)
   factory->SetSignalWeightExpression    ("mini_weight");
   factory->SetBackgroundWeightExpression("mini_weight");

   /*
   if( doMultipleOutputs ){
     multifactory->AddTree(signal,"Signal");
     multifactory->SetSignalWeightExpression    ("event_scale1fb");
     multifactory->SetBackgroundWeightExpression("event_scale1fb");
     multifactory->SetWeightExpression("event_scale1fb");
     
     if( includeBkg["ww"] ){
       TTree* ww = (TTree*) chww;
       multifactory->AddTree(ww,"WW");
       cout << "Added WW to multi-MVA" << endl;
     }
     if( includeBkg["wjets"] ){
       TTree* wjets = (TTree*) chwjets;
       multifactory->AddTree(wjets,"WJets");
       cout << "Added W+jets to multi-MVA" << endl;
     }
     if( includeBkg["tt"] ){
       TTree* tt = (TTree*) chtt;
       multifactory->AddTree(tt,"tt");
       cout << "Added ttbar multi-MVA" << endl;
     }
   }
   */

   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = sel0; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   TCut mycutb = sel0; // for example: TCut mycutb = "abs(var1)<0.5";

   // Tell the factory how to use the training and testing events
   //
   // If no numbers of events are given, half of the events in the tree are used 
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut,
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );
   
   //Use random splitting
//   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
//                                        "nTrain_Signal=100000:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
   factory->PrepareTrainingAndTestTree( "", "",
                                        "nTrain_Signal=0:nTrain_Background=0:NormMode=None:!V" );

   // if( doMultipleOutputs ){
   //   multifactory->PrepareTrainingAndTestTree( mycuts, mycutb,
   //                                             "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
   // }

   //Use alternate splitting 
   //(this is preferable since its easier to track which events were used for training, but the job crashes! need to fix this...)
   //factory->PrepareTrainingAndTestTree( mycuts, mycutb,
   //                                     "nTrain_Signal=0:nTrain_Background=0:SplitMode=Alternate:NormMode=NumEvents:!V" );

   // ---- Book MVA methods
   //
   // Please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["CutsD"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsD",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

   if (Use["CutsPCA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

   if (Use["CutsGA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                           "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );

   if (Use["CutsSA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                           "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   // Likelihood ("naive Bayes estimator")
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                           "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

   // Decorrelated likelihood
   if (Use["LikelihoodD"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
                           "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

   // PCA-transformed likelihood
   if (Use["LikelihoodPCA"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 

   // Use a kernel density estimator to approximate the PDFs
   if (Use["LikelihoodKDE"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 

   // Use a variable-dependent mix of splines and kernel density estimator
   if (Use["LikelihoodMIX"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
                           "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

   // Test the multi-dimensional probability density estimator
   // here are the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSD"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

   if (Use["PDERSPCA"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

   // Multi-dimensional likelihood estimator using self-adapting phase-space binning
   if (Use["PDEFoam"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                           "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );

   if (Use["PDEFoamBoost"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost",
                           "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN",
                           "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // H-Matrix (chi2-squared) method
   if (Use["HMatrix"])
      factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" );

   // Linear discriminant (same as Fisher discriminant)
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher discriminant (same as LD)
   if (Use["Fisher"])
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher with Gauss-transformed input variables
   if (Use["FisherG"])
      factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

   // Composite classifier: ensemble (tree) of boosted Fisher classifiers
   if (Use["BoostedFisher"])
      factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", 
                           "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" );

   // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

   if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   if (Use["FDA_MT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   if (Use["FDA_MCMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

   // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );

//      factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:VarTransform=N:NCycles=1000:HiddenLayers=N+N:TestRate=5:!UseRegulator:LearningRate=0.2:DecayRate=0.001:BPMode=batch:BatchSize=500"); 

   if (Use["MLPBFGS"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

   if (Use["MLPBNN"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators

   // CF(Clermont-Ferrand)ANN
   if (Use["CFMlpANN"])
      factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  

   // Tmlp(Root)ANN
   if (Use["TMlpANN"])
      factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDTG"]) // Gradient Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                           "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5" );

   if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   if (Use["BDT1"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT1",
                             "!H:!V:NTrees=200:nEventsMin=300:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=4:PruneMethod=NoPruning" );

   if (Use["BDTB"]) // Bagging
      factory->BookMethod( TMVA::Types::kBDT, "BDTB",
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   if (Use["BDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTD",
                           "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );

   // RuleFit -- TMVA implementation of Friedman's method
   if (Use["RuleFit"])
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                           "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

   // if( doMultipleOutputs ){
   //   if (Use["multi_BDTG"]) // gradient boosted decision trees
   //     multifactory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.50:nCuts=20:NNodesMax=8");
   //   if (Use["multi_MLP"]) // neural network
   //     multifactory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:NCycles=1000:HiddenLayers=N+5,5:TestRate=5:EstimatorType=MSE");
   //   if (Use["multi_FDA_GA"]) // functional discriminant with GA minimizer
   //     multifactory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );
   // }
   
   // For an example of the category classifier usage, see: TMVAClassificationCategory

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

   // factory->OptimizeAllMethods("SigEffAt001","Scan");
   // factory->OptimizeAllMethods("ROCIntegral","GA");

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs
  
   // Train MVAs using the set of training events
   factory->TrainAllMethods();
  
   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();
  
   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();
  
   // if( doMultipleOutputs ){
   //   // Train nulti-MVAs using the set of training events
   //   multifactory->TrainAllMethods();
     
   //   // ---- Evaluate all multi-MVAs using the set of test events
   //   multifactory->TestAllMethods();
     
   //   // ----- Evaluate and compare performance of all configured multi-MVAs
   //   multifactory->EvaluateAllMethods();
   // }
   
   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();
   //if( doMultipleOutputs )  multioutputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;
  
   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
Пример #3
0
int main( int argc, char** argv )
{//main
  std::string folder;

  if (argc > 1) {
    folder = argv[1];
  }
  else {
    folder = "output_tmva/nunu/MET130/";
  }

  bool useQCD = true;
  bool useOthers = false;
  bool useOthersAsSignal = true;

  //List of input signal files
  std::vector<std::string> sigfiles;
  //sigfiles.push_back("MC_VBF_HToZZTo4Nu_M-120");
  sigfiles.push_back("MC_Powheg-Htoinv-mH125");

  if (useOthersAsSignal) {
    sigfiles.push_back("MC_TTJets");
    //powheg samples
    //sigfiles.push_back("MC_TT-v1");
    //sigfiles.push_back("MC_TT-v2");
    //
    sigfiles.push_back("MC_T-tW");
    sigfiles.push_back("MC_Tbar-tW");
    sigfiles.push_back("MC_SingleT-s-powheg-tauola");
    sigfiles.push_back("MC_SingleTBar-s-powheg-tauola");
    sigfiles.push_back("MC_SingleT-t-powheg-tauola");
    sigfiles.push_back("MC_SingleTBar-t-powheg-tauola");
    sigfiles.push_back("MC_WW-pythia6-tauola");
    sigfiles.push_back("MC_WZ-pythia6-tauola");
    sigfiles.push_back("MC_ZZ-pythia6-tauola");
    sigfiles.push_back("MC_W1JetsToLNu_enu");
    sigfiles.push_back("MC_W2JetsToLNu_enu");
    sigfiles.push_back("MC_W3JetsToLNu_enu");
    sigfiles.push_back("MC_W4JetsToLNu_enu");
    sigfiles.push_back("MC_WJetsToLNu-v1_enu");
    sigfiles.push_back("MC_WJetsToLNu-v2_enu");
    sigfiles.push_back("MC_W1JetsToLNu_munu");
    sigfiles.push_back("MC_W2JetsToLNu_munu");
    sigfiles.push_back("MC_W3JetsToLNu_munu");
    sigfiles.push_back("MC_W4JetsToLNu_munu");
    sigfiles.push_back("MC_WJetsToLNu-v1_munu");
    sigfiles.push_back("MC_WJetsToLNu-v2_munu");
    sigfiles.push_back("MC_W1JetsToLNu_taunu");
    sigfiles.push_back("MC_W2JetsToLNu_taunu");
    sigfiles.push_back("MC_W3JetsToLNu_taunu");
    sigfiles.push_back("MC_W4JetsToLNu_taunu");
    sigfiles.push_back("MC_WJetsToLNu-v1_taunu");
    sigfiles.push_back("MC_WJetsToLNu-v2_taunu");
    sigfiles.push_back("MC_DYJetsToLL");
    sigfiles.push_back("MC_DY1JetsToLL");
    sigfiles.push_back("MC_DY2JetsToLL");
    sigfiles.push_back("MC_DY3JetsToLL");
    sigfiles.push_back("MC_DY4JetsToLL");
    sigfiles.push_back("MC_ZJetsToNuNu_100_HT_200");
    sigfiles.push_back("MC_ZJetsToNuNu_200_HT_400");
    sigfiles.push_back("MC_ZJetsToNuNu_400_HT_inf");
    sigfiles.push_back("MC_ZJetsToNuNu_50_HT_100");
    sigfiles.push_back("MC_GJets-HT-200To400-madgraph");
    sigfiles.push_back("MC_GJets-HT-400ToInf-madgraph");
    sigfiles.push_back("MC_WGamma");
    sigfiles.push_back("MC_EWK-Z2j");
    sigfiles.push_back("MC_EWK-Z2jiglep");
    sigfiles.push_back("MC_EWK-W2jminus_enu");
    sigfiles.push_back("MC_EWK-W2jplus_enu");
    sigfiles.push_back("MC_EWK-W2jminus_munu");
    sigfiles.push_back("MC_EWK-W2jplus_munu");
    sigfiles.push_back("MC_EWK-W2jminus_taunu");
    sigfiles.push_back("MC_EWK-W2jplus_taunu");
  }

  //List of input files
  std::vector<std::string> bkgfiles;
  if (useQCD){
    bkgfiles.push_back("MC_QCD-Pt-30to50-pythia6");
    bkgfiles.push_back("MC_QCD-Pt-50to80-pythia6");
    bkgfiles.push_back("MC_QCD-Pt-80to120-pythia6");
    bkgfiles.push_back("MC_QCD-Pt-120to170-pythia6");
    bkgfiles.push_back("MC_QCD-Pt-170to300-pythia6");
    bkgfiles.push_back("MC_QCD-Pt-300to470-pythia6");
    bkgfiles.push_back("MC_QCD-Pt-470to600-pythia6");
    bkgfiles.push_back("MC_QCD-Pt-600to800-pythia6");
    bkgfiles.push_back("MC_QCD-Pt-800to1000-pythia6");
    bkgfiles.push_back("MC_QCD-Pt-1000to1400-pythia6");
    bkgfiles.push_back("MC_QCD-Pt-1400to1800-pythia6");
    bkgfiles.push_back("MC_QCD-Pt-1800-pythia6");
  }
  if (useOthers) {
    bkgfiles.push_back("MC_TTJets");
    //powheg samples
    //bkgfiles.push_back("MC_TT-v1");
    //bkgfiles.push_back("MC_TT-v2");
    //
    bkgfiles.push_back("MC_T-tW");
    bkgfiles.push_back("MC_Tbar-tW");
    bkgfiles.push_back("MC_SingleT-s-powheg-tauola");
    bkgfiles.push_back("MC_SingleTBar-s-powheg-tauola");
    bkgfiles.push_back("MC_SingleT-t-powheg-tauola");
    bkgfiles.push_back("MC_SingleTBar-t-powheg-tauola");
    bkgfiles.push_back("MC_WW-pythia6-tauola");
    bkgfiles.push_back("MC_WZ-pythia6-tauola");
    bkgfiles.push_back("MC_ZZ-pythia6-tauola");
    bkgfiles.push_back("MC_W1JetsToLNu_enu");
    bkgfiles.push_back("MC_W2JetsToLNu_enu");
    bkgfiles.push_back("MC_W3JetsToLNu_enu");
    bkgfiles.push_back("MC_W4JetsToLNu_enu");
    bkgfiles.push_back("MC_WJetsToLNu-v1_enu");
    bkgfiles.push_back("MC_WJetsToLNu-v2_enu");
    bkgfiles.push_back("MC_W1JetsToLNu_munu");
    bkgfiles.push_back("MC_W2JetsToLNu_munu");
    bkgfiles.push_back("MC_W3JetsToLNu_munu");
    bkgfiles.push_back("MC_W4JetsToLNu_munu");
    bkgfiles.push_back("MC_WJetsToLNu-v1_munu");
    bkgfiles.push_back("MC_WJetsToLNu-v2_munu");
    bkgfiles.push_back("MC_W1JetsToLNu_taunu");
    bkgfiles.push_back("MC_W2JetsToLNu_taunu");
    bkgfiles.push_back("MC_W3JetsToLNu_taunu");
    bkgfiles.push_back("MC_W4JetsToLNu_taunu");
    bkgfiles.push_back("MC_WJetsToLNu-v1_taunu");
    bkgfiles.push_back("MC_WJetsToLNu-v2_taunu");
    bkgfiles.push_back("MC_DYJetsToLL");
    bkgfiles.push_back("MC_DY1JetsToLL");
    bkgfiles.push_back("MC_DY2JetsToLL");
    bkgfiles.push_back("MC_DY3JetsToLL");
    bkgfiles.push_back("MC_DY4JetsToLL");
    bkgfiles.push_back("MC_ZJetsToNuNu_100_HT_200");
    bkgfiles.push_back("MC_ZJetsToNuNu_200_HT_400");
    bkgfiles.push_back("MC_ZJetsToNuNu_400_HT_inf");
    bkgfiles.push_back("MC_ZJetsToNuNu_50_HT_100");
    bkgfiles.push_back("MC_GJets-HT-200To400-madgraph");
    bkgfiles.push_back("MC_GJets-HT-400ToInf-madgraph");
    bkgfiles.push_back("MC_WGamma");
    bkgfiles.push_back("MC_EWK-Z2j");
    bkgfiles.push_back("MC_EWK-Z2jiglep");
    bkgfiles.push_back("MC_EWK-W2jminus_enu");
    bkgfiles.push_back("MC_EWK-W2jplus_enu");
    bkgfiles.push_back("MC_EWK-W2jminus_munu");
    bkgfiles.push_back("MC_EWK-W2jplus_munu");
    bkgfiles.push_back("MC_EWK-W2jminus_taunu");
    bkgfiles.push_back("MC_EWK-W2jplus_taunu");
  }

   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
  TFile *output_tmva = TFile::Open((folder+"/TMVA_QCDrej.root").c_str(),"RECREATE");

  // Create the factory object. Later you can choose the methods
  // whose performance you'd like to investigate. The factory is 
  // the only TMVA object you have to interact with
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", output_tmva,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );


  //fill the variables with event weight from the trees
  //const unsigned nVars = 4;

   
   factory->AddSpectator("jet1_pt","Jet 1 p_{T}", "GeV", 'F');
   factory->AddSpectator("jet2_pt","Jet 2 p_{T}", "GeV", 'F');
   factory->AddSpectator("jet1_eta","Jet 1 #eta", "", 'F');
   factory->AddVariable("jet2_eta","Jet 2 #eta", "", 'F');// **
   factory->AddSpectator("jet1_phi","Jet 1 #phi", "", 'F');
   factory->AddSpectator("jet2_phi","Jet 2 #phi", "", 'F');
   factory->AddSpectator("dijet_M","M_{jj}", " GeV", 'F');
   factory->AddSpectator("dijet_deta","#Delta#eta_{jj}", "", 'F');
   factory->AddSpectator("dijet_sumeta","#eta_{j1}+#eta_{j2}", "", 'F');
   factory->AddSpectator("dijet_dphi","#Delta#phi_{jj}", "", 'F');
   factory->AddSpectator("met","MET", "GeV", 'F');// **
   factory->AddSpectator("met_phi","MET #phi", "", 'F');
   factory->AddVariable("met_significance","MET significance", "", 'F');// **
   factory->AddSpectator("sumet","#Sum E_{T}", "GeV", 'F');
   factory->AddSpectator("ht","H_{T}", "GeV", 'F');
   factory->AddVariable("mht","MH_{T}", "GeV", 'F');// **
   factory->AddSpectator("sqrt_ht","#sqrt{H_{T}}", "GeV^{0.5}", 'F');
   factory->AddSpectator("unclustered_et","Unclustered E_{T}", "GeV", 'F');
   factory->AddSpectator("unclustered_phi","Unclustered #phi", "GeV", 'F');
   factory->AddSpectator("jet1met_dphi","#Delta#phi(MET,jet1)", "", 'F');
   factory->AddVariable("jet2met_dphi","#Delta#phi(MET,jet2)", "", 'F');// **
   factory->AddVariable("jetmet_mindphi","minimum #Delta#phi(MET,jet)", "", 'F');// **
   factory->AddVariable("jetunclet_mindphi","minimum #Delta#phi(unclustered,jet)", "",  'F');// **
   factory->AddVariable("metunclet_dphi","#Delta#phi(MET,unclustered)", "",  'F');// **
   factory->AddVariable("dijetmet_scalarSum_pt", "p_{T}^{jet1}+p_{T}^{jet2}+MET", "GeV", 'F');// **
   factory->AddSpectator("dijetmet_vectorialSum_pt","p_{T}(#vec{j1}+#vec{j2}+#vec{MET})", "GeV", 'F');
   factory->AddVariable("dijetmet_ptfraction","p_{T}^{dijet}/(p_{T}^{dijet}+MET)", "", 'F');// **
   //factory->AddVariable("jet1met_scalarprod := (jet1_pt*cos(jet1_phi)*met_x+jet1_pt*sin(jet1_phi)*met_y)/met", "#vec{p_{T}^{jet1}}.#vec{MET}/MET", "GeV" , 'F');
   //factory->AddVariable("jet2met_scalarprod := (jet2_pt*cos(jet2_phi)*met_x+jet2_pt*sin(jet2_phi)*met_y)/met", "#vec{p_{T}^{jet2}}.#vec{MET}/MET", "GeV" , 'F');
   factory->AddVariable("jet1met_scalarprod", "#vec{p_{T}^{jet1}}.#vec{MET}/MET", "GeV" , 'F');// **
   factory->AddVariable("jet2met_scalarprod", "#vec{p_{T}^{jet2}}.#vec{MET}/MET", "GeV" , 'F');// **
   factory->AddVariable("jet1met_scalarprod_frac := jet1met_scalarprod/met", "#vec{p_{T}^{jet1}}.#vec{MET}/MET^{2}", "" , 'F');// **
   factory->AddVariable("jet2met_scalarprod_frac := jet2met_scalarprod/met", "#vec{p_{T}^{jet2}}.#vec{MET}/MET^{2}", "" , 'F');// **
   factory->AddSpectator("n_jets_cjv_30","CJV jets (30 GeV)", "" , 'I');
   factory->AddSpectator("n_jets_cjv_20EB_30EE","CJV jets (|#eta|<2.4 and 20 GeV, or 30 GeV)", "" , 'I');
   

   //test with only VBF variables used in cut-based analysis
   //factory->AddVariable("dijet_M","M_{jj}", " GeV", 'F');
   //factory->AddVariable("dijet_deta","#Delta#eta_{jj}", "", 'F');
   //factory->AddVariable("dijet_dphi","#Delta#phi_{jj}", "", 'F');
   //factory->AddVariable("met","MET", "GeV", 'F');
   //factory->AddVariable("n_jets_cjv_30","CJV jets (30 GeV)", "" , 'I');


  //get input files
  //signal
  //TFile *signalfile = TFile::Open((folder+"/"+"MC_VBF_HToZZTo4Nu_M-120.root").c_str());
  //TTree *signal = (TTree*)signalfile->Get("TmvaInputTree");
  //Double_t signalWeight     = 1.0;
  //factory->AddSignalTree(signal,signalWeight);
  //Set individual event weights (the variables must exist in the original TTree)
  //factory->SetSignalWeightExpression("total_weight");

  //background
  std::map<std::string, TFile *> tfiles;
  for (unsigned i = 0; i < bkgfiles.size(); ++i) {
    std::string filename = (bkgfiles[i]+".root");
    TFile * tmp = new TFile((folder+"/"+filename).c_str());
    if (!tmp) {
      std::cerr << "Warning, file " << filename << " could not be opened." << std::endl;
    } else {
      tfiles[bkgfiles[i]] = tmp;      
    }
  }
  TTree *background[bkgfiles.size()];

  //signal
  std::map<std::string, TFile *> sfiles;
  for (unsigned i = 0; i < sigfiles.size(); ++i) {
    std::string filename = (sigfiles[i]+".root");
    TFile * tmp = new TFile((folder+"/"+filename).c_str());
    if (!tmp) {
      std::cerr << "Warning, file " << filename << " could not be opened." << std::endl;
    } else {
      sfiles[sigfiles[i]] = tmp;      
    }
  }
  TTree *signal[sigfiles.size()];

  for (unsigned i = 0; i < bkgfiles.size(); ++i) {

    std::string f = bkgfiles[i];
    if (tfiles[f]){
      background[i] = (TTree*)tfiles[f]->Get("TmvaInputTree");
      //if (f.find("QCD-Pt")!=f.npos){
      //}
      Double_t backgroundWeight = 1.0;
      factory->AddBackgroundTree(background[i],backgroundWeight);
      factory->SetBackgroundWeightExpression("total_weight");

    }//if file exist
    else {
      std::cout << " Cannot find background file " << f << std::endl;
    }
  }//loop on files

  for (unsigned i = 0; i < sigfiles.size(); ++i) {

    std::string f = sigfiles[i];
    if (sfiles[f]){
      signal[i] = (TTree*)sfiles[f]->Get("TmvaInputTree");
      //if (f.find("QCD-Pt")!=f.npos){
      //}
      Double_t signalWeight = 1.0;
      factory->AddSignalTree(signal[i],signalWeight);
      factory->SetSignalWeightExpression("total_weight");

    }//if file exist
    else {
      std::cout << " Cannot find signal file " << f << std::endl;
    }
  }//loop on files


   // Apply additional cuts on the signal and background samples (can be different)
  TCut mycuts = "";//dijet_deta>3.8 && dijet_M > 1100 && met > 100 && met_significance>5";
  TCut mycutb = "";//dijet_deta>3.8 && dijet_M > 1100 && met > 100 && met_significance>5";

  factory->PrepareTrainingAndTestTree( mycuts, mycutb,
				       "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
  


   // Likelihood ("naive Bayes estimator")
  //factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
  //"H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

  // Linear discriminant (same as Fisher discriminant)
  //factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

  // Fisher discriminant (same as LD)
  factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

  // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
  //factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=60:HiddenLayers=N+5:TestRate=5:!UseRegulator" );

  // Boosted Decision Trees
  // Gradient Boost
  //factory->BookMethod( TMVA::Types::kBDT, "BDTG",
  //"!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" );
  //factory->BookMethod( TMVA::Types::kBDT, "BDTG",
  //                       "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:nCuts=20:MaxDepth=2" );


  // Adaptive Boost
  //factory->BookMethod( TMVA::Types::kBDT, "BDT1000",
  //	       "!H:!V:NTrees=1000:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" );

  factory->BookMethod( TMVA::Types::kBDT, "BDT",
		       "!H:!V:NTrees=1000:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.2:SeparationType=GiniIndex:nCuts=20" );

  // Bagging
  //factory->BookMethod( TMVA::Types::kBDT, "BDTB",
  //                       "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" );

  // Decorrelation + Adaptive Boost
  //factory->BookMethod( TMVA::Types::kBDT, "BDTD",
  //                       "!H:!V:NTrees=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" );

  // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables
  //factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher",
  //       "!H:!V:NTrees=50:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" );

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   output_tmva->Close();

   std::cout << "==> Wrote root file: " << output_tmva->GetName() << std::endl
             << "==> TMVAClassification is done!" << std::endl
             << std::endl
             << "==> To view the results, launch the GUI: \"root -l ./TMVAGui.C\"" << std::endl
             << std::endl;

   // Clean up
   delete factory;

  return 0;
}//main
Пример #4
0
void TMVAClassification( TString myMethodList = "" )
{
   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //
   // if you like to use a method via the plugin mechanism, we recommend using
   //
   // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)
   // (an example is given for using the BDT as plugin (see below),
   // but of course the real application is when you write your own
   // method based)

   // this loads the library
   TMVA::Tools::Instance();

   //---------------------------------------------------------------
   // default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   Use["Cuts"]            = 0;
   Use["CutsD"]           = 0;
   Use["CutsPCA"]         = 0;
   Use["CutsGA"]          = 0;
   Use["CutsSA"]          = 0;
   // ---
   Use["Likelihood"]      = 1;
   Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
   Use["LikelihoodPCA"]   = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
   Use["LikelihoodKDE"]   = 0;
   Use["LikelihoodMIX"]   = 0;
   // ---
   Use["PDERS"]           = 0;
   Use["PDERSD"]          = 0;
   Use["PDERSPCA"]        = 0;
   Use["PDERSkNN"]        = 0; // depreciated until further notice
   Use["PDEFoam"]         = 0;
   // --
   Use["KNN"]             = 0;
   // ---
   Use["HMatrix"]         = 0;
   Use["Fisher"]          = 0;
   Use["FisherG"]         = 0;
   Use["BoostedFisher"]   = 0;
   Use["LD"]              = 0;
   // ---
   Use["FDA_GA"]          = 0;
   Use["FDA_SA"]          = 0;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   Use["FDA_MCMT"]        = 0;
   // ---
   Use["MLP"]             = 0; // this is the recommended ANN
   Use["MLPBFGS"]         = 0; // recommended ANN with optional training method
   Use["MLPBNN"]          = 0;  // recommended ANN with BFGS training method and bayesian regulator
   Use["CFMlpANN"]        = 0; // *** missing
   Use["TMlpANN"]         = 0;
   // ---
   Use["SVM"]             = 0;
   // ---
   Use["BDT"]             = 1;
   Use["BDTD"]            = 0;
   Use["BDTG"]            = 0;
   Use["BDTB"]            = 0;
   // ---
   Use["RuleFit"]         = 0;
   // ---
   Use["Plugin"]          = 0;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;

   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 0;
      }
   }

   // Create a new root output file.
   TString outfileName( "TMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory will
   // then run the performance analysis for you.
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );

   // If you wish to modify default settings
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
//   factory->AddVariable( "myvar1 := var1+var2", 'F' );
//   factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' );
//   factory->AddVariable( "var3",                "Variable 3", "units", 'F' );
//   factory->AddVariable( "var4",                "Variable 4", "units", 'F' );

   // You can add so-called "Spectator variables", which are not used in the MVA training,
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
   // input variables, the response values of all trained MVAs, and the spectator variables
//   factory->AddSpectator( "spec1:=var1*2",  "Spectator 1", "units", 'F' );
//   factory->AddSpectator( "spec2:=var1*3",  "Spectator 2", "units", 'F' );

   // read training and test data

factory->AddVariable("CScostheta",'F');

factory->AddVariable("ZRapidity",'F');

factory->AddVariable("REDmet",'F');
   if (ReadDataFromAsciiIFormat) {
      // load the signal and background event samples from ascii files
      // format in file must be:
      // var1/F:var2/F:var3/F:var4/F
      // 0.04551   0.59923   0.32400   -0.19170
      // ...

      TString datFileS = "tmva_example_sig.dat";
      TString datFileB = "tmva_example_bkg.dat";

      factory->SetInputTrees( datFileS, datFileB );
   }
   else {
      // load the signal and background event samples from ROOT trees
      TString fname = "./tmva_class_example.root";

      TString fname_Data7TeV_DoubleElectron2011B_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleElectron2011B_0.root";

      TString fname_Data7TeV_MuEG2011B_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_MuEG2011B_0.root";

      TString fname_ZH125 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH125.root";

      TString fname_SingleT_tW = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleT_tW.root";

      TString fname_SingleT_s = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleT_s.root";

      TString fname_Data7TeV_DoubleMu2011B_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleMu2011B_0.root";

      TString fname_Data7TeV_DoubleElectron2011A_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleElectron2011A_0.root";

      TString fname_ZH135 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH135.root";

      TString fname_DYJetsToLL = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//DYJetsToLL.root";

      TString fname_ZH115 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH115.root";

      TString fname_SingleTbar_t = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleTbar_t.root";

      TString fname_Data7TeV_DoubleElectron2011B_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleElectron2011B_1.root";

      TString fname_Data7TeV_DoubleMu2011A_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleMu2011A_1.root";

      TString fname_Data7TeV_MuEG2011A_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_MuEG2011A_1.root";

      TString fname_TTJets = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//TTJets.root";

      TString fname_SingleTbar_s = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleTbar_s.root";

      TString fname_WJetsToLNu = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//WJetsToLNu.root";

      TString fname_Data7TeV_DoubleElectron2011A_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleElectron2011A_1.root";

      TString fname_ZZ = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZZ.root";

      TString fname_ZH150 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH150.root";

      TString fname_Data7TeV_MuEG2011B_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_MuEG2011B_1.root";

      TString fname_WW = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//WW.root";

      TString fname_ZH105 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH105.root";

      TString fname_Data7TeV_DoubleMu2011A_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleMu2011A_0.root";

      TString fname_SingleTbar_tW = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleTbar_tW.root";

      TString fname_WZ = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//WZ.root";

      TString fname_Data7TeV_MuEG2011A_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_MuEG2011A_0.root";

      TString fname_Data7TeV_DoubleMu2011B_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleMu2011B_1.root";

      TString fname_ZH145 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH145.root";

      TString fname_SingleT_t = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleT_t.root";


      if (gSystem->AccessPathName( fname ))  // file does not exist in local directory
         gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root");

      TFile *input_Data7TeV_DoubleElectron2011B_0 = TFile::Open( fname_Data7TeV_DoubleElectron2011B_0 );

      TFile *input_Data7TeV_MuEG2011B_0 = TFile::Open( fname_Data7TeV_MuEG2011B_0 );

      TFile *input_ZH125 = TFile::Open( fname_ZH125 );

      TFile *input_SingleT_tW = TFile::Open( fname_SingleT_tW );

      TFile *input_SingleT_s = TFile::Open( fname_SingleT_s );

      TFile *input_Data7TeV_DoubleMu2011B_0 = TFile::Open( fname_Data7TeV_DoubleMu2011B_0 );

      TFile *input_Data7TeV_DoubleElectron2011A_0 = TFile::Open( fname_Data7TeV_DoubleElectron2011A_0 );

      TFile *input_ZH135 = TFile::Open( fname_ZH135 );

      TFile *input_DYJetsToLL = TFile::Open( fname_DYJetsToLL );

      TFile *input_ZH115 = TFile::Open( fname_ZH115 );

      TFile *input_SingleTbar_t = TFile::Open( fname_SingleTbar_t );

      TFile *input_Data7TeV_DoubleElectron2011B_1 = TFile::Open( fname_Data7TeV_DoubleElectron2011B_1 );

      TFile *input_Data7TeV_DoubleMu2011A_1 = TFile::Open( fname_Data7TeV_DoubleMu2011A_1 );

      TFile *input_Data7TeV_MuEG2011A_1 = TFile::Open( fname_Data7TeV_MuEG2011A_1 );

      TFile *input_TTJets = TFile::Open( fname_TTJets );

      TFile *input_SingleTbar_s = TFile::Open( fname_SingleTbar_s );

      TFile *input_WJetsToLNu = TFile::Open( fname_WJetsToLNu );

      TFile *input_Data7TeV_DoubleElectron2011A_1 = TFile::Open( fname_Data7TeV_DoubleElectron2011A_1 );

      TFile *input_ZZ = TFile::Open( fname_ZZ );

      TFile *input_ZH150 = TFile::Open( fname_ZH150 );

      TFile *input_Data7TeV_MuEG2011B_1 = TFile::Open( fname_Data7TeV_MuEG2011B_1 );

      TFile *input_WW = TFile::Open( fname_WW );

      TFile *input_ZH105 = TFile::Open( fname_ZH105 );

      TFile *input_Data7TeV_DoubleMu2011A_0 = TFile::Open( fname_Data7TeV_DoubleMu2011A_0 );

      TFile *input_SingleTbar_tW = TFile::Open( fname_SingleTbar_tW );

      TFile *input_WZ = TFile::Open( fname_WZ );

      TFile *input_Data7TeV_MuEG2011A_0 = TFile::Open( fname_Data7TeV_MuEG2011A_0 );

      TFile *input_Data7TeV_DoubleMu2011B_1 = TFile::Open( fname_Data7TeV_DoubleMu2011B_1 );

      TFile *input_ZH145 = TFile::Open( fname_ZH145 );

      TFile *input_SingleT_t = TFile::Open( fname_SingleT_t );


      std::cout << "--- TMVAClassification       : Using input_Data7TeV_DoubleElectron2011B_0 file: " << input_Data7TeV_DoubleElectron2011B_0->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_MuEG2011B_0 file: " << input_Data7TeV_MuEG2011B_0->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_ZH125 file: " << input_ZH125->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_SingleT_tW file: " << input_SingleT_tW->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_SingleT_s file: " << input_SingleT_s->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_DoubleMu2011B_0 file: " << input_Data7TeV_DoubleMu2011B_0->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_DoubleElectron2011A_0 file: " << input_Data7TeV_DoubleElectron2011A_0->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_ZH135 file: " << input_ZH135->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_DYJetsToLL file: " << input_DYJetsToLL->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_ZH115 file: " << input_ZH115->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_SingleTbar_t file: " << input_SingleTbar_t->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_DoubleElectron2011B_1 file: " << input_Data7TeV_DoubleElectron2011B_1->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_DoubleMu2011A_1 file: " << input_Data7TeV_DoubleMu2011A_1->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_MuEG2011A_1 file: " << input_Data7TeV_MuEG2011A_1->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_TTJets file: " << input_TTJets->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_SingleTbar_s file: " << input_SingleTbar_s->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_WJetsToLNu file: " << input_WJetsToLNu->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_DoubleElectron2011A_1 file: " << input_Data7TeV_DoubleElectron2011A_1->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_ZZ file: " << input_ZZ->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_ZH150 file: " << input_ZH150->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_MuEG2011B_1 file: " << input_Data7TeV_MuEG2011B_1->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_WW file: " << input_WW->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_ZH105 file: " << input_ZH105->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_DoubleMu2011A_0 file: " << input_Data7TeV_DoubleMu2011A_0->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_SingleTbar_tW file: " << input_SingleTbar_tW->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_WZ file: " << input_WZ->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_MuEG2011A_0 file: " << input_Data7TeV_MuEG2011A_0->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_DoubleMu2011B_1 file: " << input_Data7TeV_DoubleMu2011B_1->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_ZH145 file: " << input_ZH145->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_SingleT_t file: " << input_SingleT_t->GetName() << std::endl;


      TTree *signal_ZH145     = (TTree*)input_ZH145->Get("tmvatree");

      TTree *background_ZZ = (TTree*)input_ZZ->Get("tmvatree");


      // global event weights per tree (see below for setting event-wise weights)
      Double_t signalWeight     = 1.0;
      Double_t backgroundWeight = 1.0;

      // ====== register trees ====================================================
      //
      // the following method is the prefered one:
      // you can add an arbitrary number of signal or background trees
      factory->AddSignalTree    ( signal_ZH145,     1.0     );

      factory->AddBackgroundTree( background_ZZ, 1.0 );


      // To give different trees for training and testing, do as follows:
      //    factory->AddSignalTree( signal_ZH145TrainingTree, signal_ZH145TrainWeight, "Training" );

      //    factory->AddSignalTree( signal_ZH145TestTree,     signal_ZH145TestWeight,  "Test" );


      // Use the following code instead of the above two or four lines to add signal and background
      // training and test events "by hand"
      // NOTE that in this case one should not give expressions (such as "var1+var2") in the input
      //      variable definition, but simply compute the expression before adding the event
      //
      //    // --- begin ----------------------------------------------------------
      //    std::vector<Double_t> vars( 4 ); // vector has size of number of input variables
      //    Float_t  treevars[4];
      //    for (Int_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
      //    for (Int_t i=0; i<signal->GetEntries(); i++) {
      //       signal->GetEntry(i);
      //       for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
      //       // add training and test events; here: first half is training, second is testing
      //       // note that the weight can also be event-wise
      //       if (i < signal->GetEntries()/2) factory->AddSignalTrainingEvent( vars, signalWeight );
      //       else                            factory->AddSignalTestEvent    ( vars, signalWeight );
      //    }
      //
      //    for (Int_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
      //    for (Int_t i=0; i<background->GetEntries(); i++) {
      //       background->GetEntry(i);
      //       for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
      //       // add training and test events; here: first half is training, second is testing
      //       // note that the weight can also be event-wise
      //       if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight );
      //       else                                factory->AddBackgroundTestEvent    ( vars, backgroundWeight );
      //    }
      //    // --- end ------------------------------------------------------------
      //
      // ====== end of register trees ==============================================
   }

   // This would set individual event weights (the variables defined in the
   // expression need to exist in the original TTree)
   //    for signal    : factory->SetSignalWeightExpression("weight1*weight2");
   //    for background: factory->SetBackgroundWeightExpression("weight1*weight2");
   factory->SetBackgroundWeightExpression("Eweight*XS*BR*LUM*(1/NGE)*(B2/B3)*CUT");
   factory->SetSignalWeightExpression("Eweight*XS*BR*LUM*(1/NGE)*(B2/B3)*CUT");

   // Apply additional cuts on the signal and background samples (can be different)
TCut mycuts = "(CUT>2)";
TCut mycutb = "(CUT>2)";

   // tell the factory to use all remaining events in the trees after training for testing:

 factory->PrepareTrainingAndTestTree( mycuts, "SplitMode=random:!V" );
//                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

   // If no numbers of events are given, half of the events in the tree are used for training, and
   // the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut,
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );

   // ---- Book MVA methods
   //
   // please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["CutsD"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsD",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

   if (Use["CutsPCA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

   if (Use["CutsGA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                           "H:!V:FitMethod=GA:Seed=0:EffSel:Steps=50:Cycles=3:PopSize=1000:SC_steps=10:SC_rate=5:SC_factor=0.95" );

   if (Use["CutsSA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                           "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   // Likelihood
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                           "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

   // test the decorrelated likelihood
   if (Use["LikelihoodD"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

   if (Use["LikelihoodPCA"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 

   // test the new kernel density estimator
   if (Use["LikelihoodKDE"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 

   // test the mixed splines and kernel density estimator (depending on which variable)
   if (Use["LikelihoodMIX"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
                           "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

   // test the multi-dimensional probability density estimator
   // here are the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSkNN"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSkNN",
                           "!H:!V:VolumeRangeMode=kNN:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSD"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

   if (Use["PDERSPCA"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

   // Multi-dimensional likelihood estimator using self-adapting phase-space binning
   if (Use["PDEFoam"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                           "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:CutNmin=T:Nmin=100:Kernel=None:Compress=T" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN",
                           "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );
   // H-Matrix (chi2-squared) method
   if (Use["HMatrix"])
      factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" );

   // Fisher discriminant
   if (Use["Fisher"])
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=60:NsmoothMVAPdf=10" );

   // Fisher with Gauss-transformed input variables
   if (Use["FisherG"])
      factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

   // Composite classifier: ensemble (tree) of boosted Fisher classifiers
   if (Use["BoostedFisher"])
      factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2");

   // Linear discriminant (same as Fisher)
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None" );

   // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

   if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   if (Use["FDA_MT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   if (Use["FDA_MCMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

   // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );

   if (Use["MLPBFGS"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

   if (Use["MLPBNN"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators

   // CF(Clermont-Ferrand)ANN
   if (Use["CFMlpANN"])
      factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  

   // Tmlp(Root)ANN
   if (Use["TMlpANN"])
      factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDTG"]) // Gradient Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                           "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" );

   if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=1000:nEventsMin=400:MaxDepth=6:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   if (Use["BDTB"]) // Bagging
      factory->BookMethod( TMVA::Types::kBDT, "BDTB",
                           "!H:!V:NTrees=1000:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   if (Use["BDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTD",
                           "!H:!V:NTrees=1000:nEventsMin=400:MaxDepth=6:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );

   // RuleFit -- TMVA implementation of Friedman's method
   if (Use["RuleFit"])
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                           "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

   // For an example of the category classifier, see: TMVAClassificationCategory

   // --------------------------------------------------------------------------------------------------

   // As an example how to use the ROOT plugin mechanism, book BDT via
   // plugin mechanism
   if (Use["Plugin"]) {
         //
         // first the plugin has to be defined, which can happen either through the following line in the local or global .rootrc:
         //
         // # plugin handler          plugin name(regexp) class to be instanciated library        constructor format
         // Plugin.TMVA@@MethodBase:  ^BDT                TMVA::MethodBDT          TMVA.1         "MethodBDT(TString,TString,DataSet&,TString)"
         //
         // or by telling the global plugin manager directly
      gPluginMgr->AddHandler("TMVA@@MethodBase", "BDT", "TMVA::MethodBDT", "TMVA.1", "MethodBDT(TString,TString,DataSet&,TString)");
      factory->BookMethod( TMVA::Types::kPlugins, "BDT",
                           "!H:!V:NTrees=1000:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=50" );
   }

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros

gROOT->ProcessLine(".q;");
}
Пример #5
0
int TMVAKaggleHiggs ( TString myMethodList = "" )
{
   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //
   // if you like to use a method via the plugin mechanism, we recommend using
   //
   // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)
   // (an example is given for using the BDT as plugin (see below),
   // but of course the real application is when you write your own
   // method based)

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   // --- Cut optimisation
   Use["Cuts"]            = 0;
   Use["CutsD"]           = 0;
   Use["CutsPCA"]         = 0;
   Use["CutsGA"]          = 0;
   Use["CutsSA"]          = 0;
   // 
   // --- 1-dimensional likelihood ("naive Bayes estimator")
   Use["Likelihood"]      = 0;
   Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
   Use["LikelihoodPCA"]   = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
   Use["LikelihoodKDE"]   = 0;
   Use["LikelihoodMIX"]   = 0;
   //
   // --- Mutidimensional likelihood and Nearest-Neighbour methods
   Use["PDERS"]           = 0;
   Use["PDERSD"]          = 0;
   Use["PDERSPCA"]        = 0;
   Use["PDEFoam"]         = 0;
   Use["PDEFoamBoost"]    = 0; // uses generalised MVA method boosting
   Use["KNN"]             = 0; // k-nearest neighbour method
   //
   // --- Linear Discriminant Analysis
   Use["LD"]              = 0; // Linear Discriminant identical to Fisher
   Use["Fisher"]          = 0;
   Use["FisherG"]         = 0;
   Use["BoostedFisher"]   = 0; // uses generalised MVA method boosting
   Use["HMatrix"]         = 0;
   //
   // --- Function Discriminant analysis
   Use["FDA_GA"]          = 0; // minimisation of user-defined function using Genetics Algorithm
   Use["FDA_SA"]          = 0;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   Use["FDA_MCMT"]        = 0;
   //
   // --- Neural Networks (all are feed-forward Multilayer Perceptrons)
   Use["MLP"]             = 0; // Recommended ANN
   Use["MLPBFGS"]         = 0; // Recommended ANN with optional training method
   Use["MLPBNN"]          = 0; // Recommended ANN with BFGS training method and bayesian regulator
   Use["CFMlpANN"]        = 0; // Depreciated ANN from ALEPH
   Use["TMlpANN"]         = 0; // ROOT's own ANN
   Use["NN"]              = 1; // improved implementation of a NN
   //
   // --- Support Vector Machine 
   Use["SVM"]             = 0;
   // 
   // --- Boosted Decision Trees
   Use["BDT"]             = 1; // uses Adaptive Boost
   Use["BDTG"]            = 0; // uses Gradient Boost
   Use["BDTB"]            = 0; // uses Bagging
   Use["BDTD"]            = 0; // decorrelation + Adaptive Boost
   Use["BDTF"]            = 0; // allow usage of fisher discriminant for node splitting 
   // 
   // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
   Use["RuleFit"]         = 0;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAKaggleHiggs" << std::endl;

   // Select methods (don't look at this code - not of interest)
   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return 1;
         }
         Use[regMethod] = 1;
      }
   }

   // --------------------------------------------------------------------------------------------------

   // --- Here the preparation phase begins

   // Read training and test data
   // (it is also possible to use ASCII format as input -> see TMVA Users Guide)
   TString fname = "./training.root";
   
   if (gSystem->AccessPathName( fname ))  // file does not exist in local directory
      gSystem->Exec("curl -O http://root.cern.ch/files/tmva_class_example.root");
   
   TFile *input = TFile::Open( fname );
   
   std::cout << "--- TMVAClassification       : Using input file: " << input->GetName() << std::endl;
   
   // --- Register the training and test trees

   TTree *tree     = (TTree*)input->Get("data");
   
   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   TString outfileName( "TMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory is 
   // the only TMVA object you have to interact with
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification" );

   // If you wish to modify default settings
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
   // factory->AddVariable( "myvar1 := var1+var2", 'F' );
   // factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' );
   // factory->AddVariable( "var3",                "Variable 3", "units", 'F' );
   // factory->AddVariable( "var4",                "Variable 4", "units", 'F' );

   TString limit ("-900.0");
   TString replacementValue ("0.0");
   std::vector<std::string> vars = {"DER_mass_MMC","DER_mass_transverse_met_lep","DER_mass_vis","DER_pt_h","DER_deltaeta_jet_jet","DER_mass_jet_jet","DER_prodeta_jet_jet","DER_deltar_tau_lep","DER_pt_tot","DER_sum_pt","DER_pt_ratio_lep_tau","DER_met_phi_centrality","DER_lep_eta_centrality","PRI_tau_pt","PRI_tau_eta","PRI_tau_phi","PRI_lep_pt","PRI_lep_eta","PRI_lep_phi","PRI_met","PRI_met_phi","PRI_met_sumet","PRI_jet_num","PRI_jet_leading_pt","PRI_jet_leading_eta","PRI_jet_leading_phi","PRI_jet_subleading_pt","PRI_jet_subleading_eta","PRI_jet_subleading_phi","PRI_jet_all_pt"};
   
   for (std::vector<std::string>::iterator it = vars.begin (), itEnd = vars.end (); it != itEnd; ++it)
   {
       std::string s = *it;
       TString current;
       current.Form ("%s:=(%s<%s?%s:%s)",s.c_str (), s.c_str (), limit.Data (), replacementValue.Data (), s.c_str ());
       factory->AddVariable (current, 'F');
   }

   // You can add so-called "Spectator variables", which are not used in the MVA training,
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
   // input variables, the response values of all trained MVAs, and the spectator variables

//   factory->AddSpectator( "spec1 := var1*2",  "Spectator 1", "units", 'F' );
//   factory->AddSpectator( "spec2 := var1*3",  "Spectator 2", "units", 'F' );

   
   // global event weights per tree (see below for setting event-wise weights)
   Double_t weight     = 1.0;
//   Double_t backgroundWeight = 1.0;
   
   // You can add an arbitrary number of signal or background trees
//   factory->AddBackgroundTree( background, backgroundWeight );

   factory->AddTree(tree, "Signal", 1., "Label == 1");
   factory->AddTree(tree, "Background", 1., "Label == 0");
   
   // To give different trees for training and testing, do as follows:
   //    factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" );
   //    factory->AddSignalTree( signalTestTree,     signalTestWeight,  "Test" );
   
   // Use the following code instead of the above two or four lines to add signal and background
   // training and test events "by hand"
   // NOTE that in this case one should not give expressions (such as "var1+var2") in the input
   //      variable definition, but simply compute the expression before adding the event
   //
   //     // --- begin ----------------------------------------------------------
   //     std::vector<Double_t> vars( 4 ); // vector has size of number of input variables
   //     Float_t  treevars[4], weight;
   //     
   //     // Signal
   //     for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
   //     for (UInt_t i=0; i<signal->GetEntries(); i++) {
   //        signal->GetEntry(i);
   //        for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
   //        // add training and test events; here: first half is training, second is testing
   //        // note that the weight can also be event-wise
   //        if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight );
   //        else                              factory->AddSignalTestEvent    ( vars, signalWeight );
   //     }
   //   
   //     // Background (has event weights)
   //     background->SetBranchAddress( "weight", &weight );
   //     for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
   //     for (UInt_t i=0; i<background->GetEntries(); i++) {
   //        background->GetEntry(i);
   //        for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
   //        // add training and test events; here: first half is training, second is testing
   //        // note that the weight can also be event-wise
   //        if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight );
   //        else                                factory->AddBackgroundTestEvent    ( vars, backgroundWeight*weight );
   //     }
         // --- end ------------------------------------------------------------
   //
   // --- end of tree registration 

   // Set individual event weights (the variables must exist in the original TTree)
   //    for signal    : factory->SetSignalWeightExpression    ("weight1*weight2");
   //    for background: factory->SetBackgroundWeightExpression("weight1*weight2");
   factory->SetSignalWeightExpression( "Weight" );
   factory->SetBackgroundWeightExpression( "Weight" );

   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = "Label==1"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   TCut mycutb = "Label==0"; // for example: TCut mycutb = "abs(var1)<0.5";

   // Tell the factory how to use the training and testing events
   //
   // If no numbers of events are given, half of the events in the tree are used 
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut,
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );
   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=0:nTest_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
//   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
//                                        "nTrain_Signal=5000:nTrain_Background=5000:nTest_Signal=5000:nTest_Background=5000:SplitMode=Random:NormMode=NumEvents:!V" );

   // ---- Book MVA methods
   //
   // Please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["CutsD"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsD",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

   if (Use["CutsPCA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

   if (Use["CutsGA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                           "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );

   if (Use["CutsSA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                           "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   // Likelihood ("naive Bayes estimator")
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                           "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

   // Decorrelated likelihood
   if (Use["LikelihoodD"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
                           "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

   // PCA-transformed likelihood
   if (Use["LikelihoodPCA"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 

   // Use a kernel density estimator to approximate the PDFs
   if (Use["LikelihoodKDE"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 

   // Use a variable-dependent mix of splines and kernel density estimator
   if (Use["LikelihoodMIX"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
                           "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

   // Test the multi-dimensional probability density estimator
   // here are the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSD"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

   if (Use["PDERSPCA"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

   // Multi-dimensional likelihood estimator using self-adapting phase-space binning
   if (Use["PDEFoam"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                           "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );

   if (Use["PDEFoamBoost"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost",
                           "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN",
                           "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // H-Matrix (chi2-squared) method
   if (Use["HMatrix"])
      factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" );

   // Linear discriminant (same as Fisher discriminant)
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher discriminant (same as LD)
   if (Use["Fisher"])
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher with Gauss-transformed input variables
   if (Use["FisherG"])
      factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

   // Composite classifier: ensemble (tree) of boosted Fisher classifiers
   if (Use["BoostedFisher"])
      factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", 
                           "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" );

   // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

   if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   if (Use["FDA_MT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   if (Use["FDA_MCMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

   // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );

   if (Use["MLPBFGS"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

   if (Use["MLPBNN"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators


   // improved neural network implementation 
   if (Use["NN"])
   {
//       TString layoutString ("Layout=TANH|(N+100)*2,LINEAR");
//       TString layoutString ("Layout=SOFTSIGN|100,SOFTSIGN|50,SOFTSIGN|20,LINEAR");
//       TString layoutString ("Layout=RELU|300,RELU|100,RELU|30,RELU|10,LINEAR");
//       TString layoutString ("Layout=SOFTSIGN|50,SOFTSIGN|30,SOFTSIGN|20,SOFTSIGN|10,LINEAR");
//       TString layoutString ("Layout=TANH|50,TANH|30,TANH|20,TANH|10,LINEAR");
//       TString layoutString ("Layout=SOFTSIGN|50,SOFTSIGN|20,LINEAR");
       TString layoutString ("Layout=SOFTSIGN|70,SOFTSIGN|30,LINEAR");

       std::vector<TString> strategy;
       strategy.push_back (TString ("LearningRate=1e-2,Momentum=0.9,Repetitions=1,ConvergenceSteps=70,BatchSize=120,TestRepetitions=7,WeightDecay=0.001,Regularization=NONE,DropConfig=0.5+0.5+0.5+0.5,DropRepetitions=1,Multithreading=True"));
       strategy.push_back (TString ("LearningRate=1e-4,Momentum=0.5,Repetitions=1,ConvergenceSteps=70,BatchSize=80,TestRepetitions=7,WeightDecay=0.001,Regularization=L2,Multithreading=True,DropConfig=0.1+0.1+0.1+0.1,DropRepetitions=1"));
       strategy.push_back (TString ("LearningRate=1e-5,Momentum=0.3,Repetitions=1,ConvergenceSteps=70,BatchSize=60,TestRepetitions=7,WeightDecay=0.0001,Regularization=L2,Multithreading=True"));
       strategy.push_back (TString  ("LearningRate=1e-6,Momentum=0.0,Repetitions=1,ConvergenceSteps=70,BatchSize=40,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True"));
//       strategy.push_back (TString ("LearningRate=1e-6,Momentum=0.0,Repetitions=1,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True"));

       TString trainingStrategyString ("TrainingStrategy=");
       for (std::vector<TString>::const_iterator it = strategy.begin (), itEnd = strategy.end (); it != itEnd; ++it)
       {
           if (it != strategy.begin ())
               trainingStrategyString += "|";
           trainingStrategyString += *it;
       }
      
//       TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CROSSENTROPY");
       TString nnOptions ("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=G:WeightInitialization=XAVIERUNIFORM");
//       TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CHECKGRADIENTS");
       nnOptions.Append (":"); nnOptions.Append (layoutString);
       nnOptions.Append (":"); nnOptions.Append (trainingStrategyString);

       factory->BookMethod( TMVA::Types::kNN, "NN", nnOptions ); // NN
   }


   
   // CF(Clermont-Ferrand)ANN
   if (Use["CFMlpANN"])
      factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  

   // Tmlp(Root)ANN
   if (Use["TMlpANN"])
      factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDTG"]) // Gradient Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                           "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" );

   if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );

   if (Use["BDTB"]) // Bagging
      factory->BookMethod( TMVA::Types::kBDT, "BDTB",
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" );

   if (Use["BDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTD",
                           "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" );

   if (Use["BDTF"])  // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables
      factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher",
                           "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" );

   // RuleFit -- TMVA implementation of Friedman's method
   if (Use["RuleFit"])
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                           "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

   // For an example of the category classifier usage, see: TMVAClassificationCategory

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

   // ---- STILL EXPERIMENTAL and only implemented for BDT's ! 
   // factory->OptimizeAllMethods("SigEffAt001","Scan");
   // factory->OptimizeAllMethods("ROCIntegral","FitGA");

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVA::TMVAGui( outfileName );

   return 0;
}
Пример #6
0
void Reg(){
  
  TMVA::Tools::Instance();
  std::cout << "==> Start TMVARegression" << std::endl;
    
  ifstream myfile; 
  myfile.open("99per.txt");


  ostringstream xcS,xcH,xcP,xcC,xcN;  
  double xS,xH,xC,xN,xP;

  if(myfile.is_open()){
    while(!myfile.eof()){
      myfile>>xS>>xH>>xC>>xN>>xP;
    }
  }

  xcS<<xS;
  xcH<<xH;
  xcC<<xC;
  xcN<<xN;
  xcP<<xP;

  //Output file 
  TString outfileName( "Ex1out_FullW_def.root" );
  TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
  
  //Declaring the factory
  TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, 
					      "!V:!Silent:Color:DrawProgressBar" );
  //Declaring Input Varibles 
  factory->AddVariable( "Sieie",'F');
  factory->AddVariable( "ToE", 'F' );
  factory->AddVariable( "isoC",'F' );
  factory->AddVariable( "isoN",'F' );
  factory->AddVariable( "isoP",'F' );
  
  TString fname = "../../CutTMVATrees_Barrel.root";
  input = TFile::Open( fname );
  
  // --- Register the regression tree
  TTree *signal = (TTree*)input->Get("t_S");
  TTree *background = (TTree*)input->Get("t_B");
  
  //Just Some more settings
   Double_t signalWeight      = 1.0; 
   Double_t backgroundWeight  = 1.0; 

   // You can add an arbitrary number of regression trees
   factory->AddSignalTree( signal, signalWeight );
   factory->AddBackgroundTree( background , backgroundWeight );
 
   TCut mycuts ="";
   TCut mycutb ="";

   // factory->PrepareTrainingAndTestTree(mycuts,mycutb,"nTrain_Signal=9000:nTrain_Background=9000:nTest_Signal=10000:nTest_Background=10000");

   factory->SetBackgroundWeightExpression("weightPT*weightXS");
   factory->SetSignalWeightExpression("weightPT*weightXS");

   TString methodName = "Cuts_FullsampleW_def";
   TString methodOptions ="!H:!V:FitMethod=GA:EffMethod=EffSEl"; 
   methodOptions +=":VarProp[0]=FMin:VarProp[1]=FMin:VarProp[2]=FMin:VarProp[3]=FMin:VarProp[4]=FMin";
  
   methodOptions +=":CutRangeMax[0]="+xcS.str(); 
   methodOptions +=":CutRangeMax[1]="+xcH.str();
   methodOptions +=":CutRangeMax[2]="+xcC.str();
   methodOptions +=":CutRangeMax[3]="+xcN.str();
   methodOptions +=":CutRangeMax[4]="+xcP.str();

   //************
   factory->BookMethod(TMVA::Types::kCuts,methodName,methodOptions);
   factory->TrainAllMethods();
   factory->TestAllMethods();
   factory->EvaluateAllMethods();    
   
   // --------------------------------------------------------------
   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVARegression is done!" << std::endl;      
   delete factory;

}
Пример #7
0
//void TMVAClassification( TString myMethodList = "" )
void tmvaClassifier( TString myMethodList = "", TString inputDir="~/work/ewkzp2j_5311/ll/", bool minimalTrain=false, bool useQG=false)
{   
  gSystem->ExpandPathName(inputDir);
  TString pf("base_weights");
  if(!minimalTrain){
    if(useQG) pf="full_weights";
    else      pf="weights";
  }
  TMVA::gConfig().GetIONames().fWeightFileDir = inputDir + pf;
  
  // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
  // if you use your private .rootrc, or run from a different directory, please copy the
  // corresponding lines from .rootrc
  
  // methods to be processed can be given as an argument; use format:
  //
  // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
  //
  // if you like to use a method via the plugin mechanism, we recommend using
  //
  // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)
  // (an example is given for using the BDT as plugin (see below),
  // but of course the real application is when you write your own
  // method based)
  
  //---------------------------------------------------------------
  // This loads the library
  TMVA::Tools::Instance();
  
  // Default MVA methods to be trained + tested
  std::map<std::string,int> Use;
  
  // --- Cut optimisation
  Use["Cuts"]            = 0;
  Use["CutsD"]           = 0;
  Use["CutsPCA"]         = 0;
  Use["CutsGA"]          = 0;
  Use["CutsSA"]          = 0;
  // 
  // --- 1-dimensional likelihood ("naive Bayes estimator")
  Use["Likelihood"]      = 0;
  Use["LikelihoodD"]     = 1; // the "D" extension indicates decorrelated input variables (see option strings)
  Use["LikelihoodPCA"]   = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
  Use["LikelihoodKDE"]   = 0;
  Use["LikelihoodMIX"]   = 0;
  //
  // --- Mutidimensional likelihood and Nearest-Neighbour methods
  Use["PDERS"]           = 0;
  Use["PDERSD"]          = 0;
  Use["PDERSPCA"]        = 0;
  Use["PDEFoam"]         = 0;
  Use["PDEFoamBoost"]    = 0; // uses generalised MVA method boosting
  Use["KNN"]             = 0; // k-nearest neighbour method
  //
  // --- Linear Discriminant Analysis
  Use["LD"]              = 0; // Linear Discriminant identical to Fisher
  Use["Fisher"]          = 1;
  Use["FisherCat"]       = 0;//added by loic
  Use["FisherG"]         = 0;
  Use["BoostedFisher"]   = 0; // uses generalised MVA method boosting
  Use["HMatrix"]         = 0;
  //
  // --- Function Discriminant analysis
  Use["FDA_GA"]          = 0; // minimisation of user-defined function using Genetics Algorithm
  Use["FDA_SA"]          = 0;
  Use["FDA_MC"]          = 0;
  Use["FDA_MT"]          = 0;
  Use["FDA_GAMT"]        = 0;
  Use["FDA_MCMT"]        = 0;
  //
  // --- Neural Networks (all are feed-forward Multilayer Perceptrons)
  Use["MLP"]             = 0; // Recommended ANN
  Use["MLPBFGS"]         = 0; // Recommended ANN with optional training method
  Use["MLPBNN"]          = 0; // Recommended ANN with BFGS training method and bayesian regulator
  Use["CFMlpANN"]        = 0; // Depreciated ANN from ALEPH
  Use["TMlpANN"]         = 0; // ROOT's own ANN
  //
  // --- Support Vector Machine 
  Use["SVM"]             = 0;
  // 
  // --- Boosted Decision Trees
  Use["BDT"]             = 0; // uses Adaptive Boost
  Use["BDTG"]            = 0; // uses Gradient Boost
  Use["BDTB"]            = 0; // uses Bagging
  Use["BDTD"]            = 1; // decorrelation + Adaptive Boost
  Use["BDTF"]            = 0; // allow usage of fisher discriminant for node splitting 
  // 
  // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
  Use["RuleFit"]         = 0;
  // ---------------------------------------------------------------

  std::cout << std::endl;
  std::cout << "==> Start TMVAClassification" << std::endl;

  // Select methods (don't look at this code - not of interest)
  if (myMethodList != "") {
    for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

    std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
    for (UInt_t i=0; i<mlist.size(); i++) {
      std::string regMethod(mlist[i]);

      if (Use.find(regMethod) == Use.end()) {
	std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
	for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
	std::cout << std::endl;
	return;
      }
      Use[regMethod] = 1;
    }
  }

  // --------------------------------------------------------------------------------------------------

  // --- Here the preparation phase begins

  // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
  TString outfileName( "TMVA.root" );
  TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

  // Create the factory object. Later you can choose the methods
  // whose performance you'd like to investigate. The factory is 
  // the only TMVA object you have to interact with
  //
  // The first argument is the base of the name of all the
  // weightfiles in the directory weight/
  //
  // The second argument is the output file for the training results
  // All TMVA output can be suppressed by removing the "!" (not) in
  // front of the "Silent" argument in the option string
  TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
					      "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );

  // You can add so-called "Spectator variables", which are not used in the MVA training,
  // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
  // input variables, the response values of all trained MVAs, and the spectator variables
  //   factory->AddSpectator( "spec1 := var1*2",  "Spectator 1", "units", 'F' );
  // factory->AddSpectator( "spec2 := var1*3",  "Spectator 2", "units", 'F' );

  // Read training and test data
  // (it is also possible to use ASCII format as input -> see TMVA Users Guide)
  //   TString fname = "./tmva_class_example.root";
   
  //if (gSystem->AccessPathName( fname ))  // file does not exist in local directory
  //   gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root");
   
  
  //   std::cout << "--- TMVAClassification       : Using input file: " << input->GetName() << std::endl;
   
  // --- Register the training and test trees
  TChain *signal     = new TChain("ewkzp2j");
  TChain *background = new TChain("ewkzp2j");
  TSystemDirectory dir(inputDir,inputDir);
  TList *files = dir.GetListOfFiles();
  if (files) {
    TSystemFile *file;
    TString fname;
    TIter next(files);
    while ((file=(TSystemFile*)next())) {
      fname = file->GetName();
      if(!fname.EndsWith("_summary.root")) continue;
      if(fname.Contains("Data")) continue;
      if(!fname.Contains("DY")) continue;
      bool isSignal(false);
      if(fname.Contains("JJ")) { signal->Add(fname); isSignal=true; }
      else if(fname.Contains("50toInf") && fname.Contains("DY")) background->Add(fname);
      cout << fname << " added as " << (isSignal ? "signal" : "background") << endl;
    }
  }else{
    cout << "[Error] no files found in " << inputDir << endl;
  }
  cout << "Signal has " << signal->GetEntries() << " raw events" << endl
       << "Background has " << background->GetEntries() << " raw events"<< endl;

  // global event weights per tree
  Double_t signalWeight     = 1.0;
  Double_t backgroundWeight = 1.0;
  factory->AddSignalTree    ( signal,     signalWeight     );
  factory->AddBackgroundTree( background, backgroundWeight );
  // event-per-event weights per tree
  factory->SetBackgroundWeightExpression( "weight/cnorm" );
  factory->SetSignalWeightExpression( "weight/cnorm" );

  //define variables for the training
  if(minimalTrain)
    {
      factory->AddVariable( "mjj",     "M_{jj}"              "GeV", 'F' );
      factory->AddVariable( "detajj",  "#Delta#eta_{jj}",     "",    'F' );
      factory->AddVariable( "spt",     "#Delta_{rel}",        "GeV", 'F' );
    }
  else
    {
      factory->AddVariable( "mjj",     "M_{jj}"              "GeV",  'F' );
      factory->AddVariable( "detajj",  "#Delta#eta_{jj}",     "",    'F' );
      factory->AddVariable( "setajj",  "#Sigma#eta_{j}",      "",    'F' );
      factory->AddVariable( "eta1",    "#eta(1)",             "",    'F' );
      factory->AddVariable( "eta2",    "#eta(2)",             "",    'F' );
      factory->AddVariable( "pt1",     "p_{T}(1)",            "GeV", 'F' );
      factory->AddVariable( "pt2",     "p_{T}(2)",            "GeV", 'F' );
      factory->AddVariable( "spt",     "#Delta_{rel}",        "GeV", 'F' );
      if(useQG) factory->AddVariable( "qg1",   "q/g(1)",      "",    'F' );
      if(useQG) factory->AddVariable( "qg2",   "q/g(2)",      "",    'F' );
    }
  

  // Apply additional cuts on the signal and background samples (can be different)
  TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
  TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5";
  factory->PrepareTrainingAndTestTree( mycuts, mycutb,
				       "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

  // ---- Book MVA methods
  //
  // Please lookup the various method configuration options in the corresponding cxx files, eg:
  // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
  // it is possible to preset ranges in the option string in which the cut optimisation should be done:
  // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

  // Cut optimisation
  if (Use["Cuts"])
    factory->BookMethod( TMVA::Types::kCuts, "Cuts",
			 "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

  if (Use["CutsD"])
    factory->BookMethod( TMVA::Types::kCuts, "CutsD",
			 "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

  if (Use["CutsPCA"])
    factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
			 "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

  if (Use["CutsGA"])
    factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
			 "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );

  if (Use["CutsSA"])
    factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
			 "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

  // Likelihood ("naive Bayes estimator")
  if (Use["Likelihood"])
    factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
			 "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

  // Decorrelated likelihood
  if (Use["LikelihoodD"])
    factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
			 "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

  // PCA-transformed likelihood
  if (Use["LikelihoodPCA"])
    factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
			 "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 

  // Use a kernel density estimator to approximate the PDFs
  if (Use["LikelihoodKDE"])
    factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
			 "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 

  // Use a variable-dependent mix of splines and kernel density estimator
  if (Use["LikelihoodMIX"])
    factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
			 "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

  // Test the multi-dimensional probability density estimator
  // here are the options strings for the MinMax and RMS methods, respectively:
  //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
  //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
  if (Use["PDERS"])
    factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
			 "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

  if (Use["PDERSD"])
    factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
			 "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

  if (Use["PDERSPCA"])
    factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
			 "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

  // Multi-dimensional likelihood estimator using self-adapting phase-space binning
  if (Use["PDEFoam"])
    factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
			 "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );

  if (Use["PDEFoamBoost"])
    factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost",
			 "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );

  // K-Nearest Neighbour classifier (KNN)
  if (Use["KNN"])
    factory->BookMethod( TMVA::Types::kKNN, "KNN",
			 "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

  // H-Matrix (chi2-squared) method
  if (Use["HMatrix"])
    factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" );

  // Linear discriminant (same as Fisher discriminant)
  if (Use["LD"])
    factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

  // Fisher discriminant (same as LD)
  if (Use["Fisher"])
    factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
   
  if (Use["FisherCat"]){
    TMVA::MethodBase* fiCat = factory->BookMethod( TMVA::Types::kCategory, "FisherCat","" );
    TMVA::MethodCategory* mcategory = dynamic_cast<TMVA::MethodCategory*>(fiCat);
    mcategory->AddMethod( "mjj<250", "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat1", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
    mcategory->AddMethod( "mjj>=250&&mjj<350" , "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat0000", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
    mcategory->AddMethod( "mjj>=350&&mjj<450" , "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat0350", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
    mcategory->AddMethod( "mjj>=450&&mjj<550" , "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat0450", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
    mcategory->AddMethod( "mjj>=550&&mjj<750" , "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat0550", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
    mcategory->AddMethod( "mjj>=750&&mjj<1000", "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat0750", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
    mcategory->AddMethod( "mjj>=1000"         , "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat1000", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
  }


  // Fisher with Gauss-transformed input variables
  if (Use["FisherG"])
    factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

  // Composite classifier: ensemble (tree) of boosted Fisher classifiers
  if (Use["BoostedFisher"])
    factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", 
			 "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" );

  // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
  if (Use["FDA_MC"])
    factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
			 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

  if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
    factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
			 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

  if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
    factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
			 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

  if (Use["FDA_MT"])
    factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
			 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

  if (Use["FDA_GAMT"])
    factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
			 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

  if (Use["FDA_MCMT"])
    factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
			 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

  // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
  if (Use["MLP"])
    factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );

  if (Use["MLPBFGS"])
    factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

  if (Use["MLPBNN"])
    factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators

  // CF(Clermont-Ferrand)ANN
  if (Use["CFMlpANN"])
    factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  

  // Tmlp(Root)ANN
  if (Use["TMlpANN"])
    factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...

  // Support Vector Machine
  if (Use["SVM"])
    factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

  // Boosted Decision Trees
  if (Use["BDTG"]) // Gradient Boost
    factory->BookMethod( TMVA::Types::kBDT, "BDTG",
			 "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:MaxDepth=2" );

  if (Use["BDT"])  // Adaptive Boost
    factory->BookMethod( TMVA::Types::kBDT, "BDT",
			 "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" );

  if (Use["BDTB"]) // Bagging
    factory->BookMethod( TMVA::Types::kBDT, "BDTB",
			 "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" );

  if (Use["BDTD"]) // Decorrelation + Adaptive Boost
    factory->BookMethod( TMVA::Types::kBDT, "BDTD",
			 "!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=25:PruneMethod=CostComplexity:PruneStrength=25.0:VarTransform=Decorrelate");
  //"!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" );

  if (Use["BDTF"])  // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables
    factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher",
			 "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" );

  // RuleFit -- TMVA implementation of Friedman's method
  if (Use["RuleFit"])
    factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
			 "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

  // For an example of the category classifier usage, see: TMVAClassificationCategory




  // --------------------------------------------------------------------------------------------------

  // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

  // factory->OptimizeAllMethods("SigEffAt001","Scan");
  // factory->OptimizeAllMethods("ROCIntegral","FitGA");

  // --------------------------------------------------------------------------------------------------

  // ---- Now you can tell the factory to train, test, and evaluate the MVAs

  // Train MVAs using the set of training events
  factory->TrainAllMethods();

  // ---- Evaluate all MVAs using the set of test events
  factory->TestAllMethods();

  // ----- Evaluate and compare performance of all configured MVAs
  factory->EvaluateAllMethods();

  // --------------------------------------------------------------

  // Save the output
  outputFile->Close();

  std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
  std::cout << "==> TMVAClassification is done!" << std::endl;
  std::cout << " ==> Weights are stored in " << TMVA::gConfig().GetIONames().fWeightFileDir << std::endl;
  delete factory;



  // Launch the GUI for the root macros
  //   if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
int main(int argc, char* argv[]){

	// Configurable parameters
	// int max_events;                 // Maximum number of events to process
	//string filelist;                // The file containing a list of files to use as input
	//string input_prefix;            // A prefix that will be added to the path of each input file
	string folder;
	string output_name;             // Name of the ouput ROOT File
	string output_folder;           // Folder to write the output in
	string paramfile;
	string paramfile2;
	string classname;
	bool twotag;
	bool onetag;

	po::options_description config("Configuration");
	po::variables_map vm;
	po::notify(vm);

	config.add_options()    
		("folder",              po::value<string>(&folder)->default_value("output/Paper_2012/"))
		//   ("input_prefix",        po::value<string>(&input_prefix)->default_value(""))
		("output_name",         po::value<string>(&output_name)->default_value("test_tmva.root"))
		("output_folder",       po::value<string>(&output_folder)->default_value(""))
		("paramfile",						po::value<string>(&paramfile)->default_value("./scripts/Paper_params_2012.dat"))
		("paramfile2", 					po::value<string>(&paramfile2)->default_value("./scripts/TMVAinputshad.dat"))
		("classname",						po::value<string>(&classname)->default_value("HhhMVA"))
		("twotag",								po::value<bool>(&twotag)->default_value(true))
		("onetag",              po::value<bool>(&onetag)->default_value(false))
		;
	po::store(po::command_line_parser(argc, argv).
			options(config).allow_unregistered().run(), vm);
	po::notify(vm);


	std::cout << "-------------------------------------" << std::endl;
	std::cout << "Train MVA" << std::endl;
	std::cout << "-------------------------------------" << std::endl;      string param_fmt = "%-25s %-40s\n";
	std::vector<string> bckglist;
	bckglist.push_back("TTJetsFullLept");
	bckglist.push_back("TTJetsSemiLept");
	bckglist.push_back("TTJetsHadronicExt");
//	bckglist.push_back("WWJetsTo2L2Nu");
//	bckglist.push_back("WZJetsTo2L2Q");
//	bckglist.push_back("WZJetsTo3LNu");
//	bckglist.push_back("ZZJetsTo2L2Nu");
//	bckglist.push_back("ZZJetsTo2L2Q");
//	bckglist.push_back("ZZJetsTo4L");
//	bckglist.push_back("DYJetsToTauTauSoup");
//	bckglist.push_back("DYJetsToLLSoup");
//	bckglist.push_back("DYJetsToTauTau");
//	bckglist.push_back("DYJetsToLL");
//	bckglist.push_back("T-tW");
//	bckglist.push_back("Tbar-tW");

	std::vector<string> signallist;
	signallist.push_back("GluGluToHTohhTo2Tau2B_mH-300");

	sample_names_.reserve(bckglist.size()+signallist.size());
	sample_names_.insert(sample_names_.end(),bckglist.begin(),bckglist.end());
	sample_names_.insert(sample_names_.end(),signallist.begin(),signallist.end());



	std::vector<TFile*> BackgroundSamples;
	for(unsigned int iter=0;iter<bckglist.size();++iter){
		BackgroundSamples.push_back(TFile::Open((folder+bckglist.at(iter)+"_mt_2012.root").c_str()));
	}

	std::vector<TFile*> SignalSamples;
	for(unsigned int sigIter=0;sigIter<signallist.size();++sigIter){
		SignalSamples.push_back(TFile::Open((folder+signallist.at(sigIter)+"_mt_2012.root").c_str()));
	}

	std::vector<TTree*> backgroundTrees;
	for(unsigned int iter2=0;iter2<BackgroundSamples.size();++iter2){
		backgroundTrees.push_back(dynamic_cast<TTree*>(BackgroundSamples.at(iter2)->Get("ntuple")));
	}

	std::vector<TTree*> signalTrees;
	for(unsigned int sigIter2=0;sigIter2<SignalSamples.size();++sigIter2){
		signalTrees.push_back(dynamic_cast<TTree*>(SignalSamples.at(sigIter2)->Get("ntuple")));
	}

	TFile *outfile = new TFile((output_folder+output_name).c_str(),"RECREATE");

	TMVA::Factory *factory = new TMVA::Factory(classname,outfile,"!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification");


	std::vector<std::string> vars;
	std::ifstream parafile(paramfile2.c_str());
	std::cout<<paramfile2.c_str()<<std::endl;
	string line;
	while(getline(parafile,line)){
		vars.push_back(line);
	}
	parafile.close();

	std::cout<<(vars.at(0)).c_str()<<std::endl;

	std::vector<float> var2;
	for(unsigned int variter=0;variter<vars.size();++variter){
		var2.push_back(::atof((vars.at(variter)).c_str()));
	}


	for(unsigned int variter=0;variter<vars.size();++variter){
		factory->AddVariable((vars.at(variter)).c_str(),(vars.at(variter)).c_str(),"",'F');
	}

	factory->AddSpectator("mt_1","mt_1","",'F');
	factory->AddSpectator("n_prebjets","n_prebjets","",'I');
	factory->AddSpectator("prebjetbcsv_1","prebjetbcsv_1","",'F');
	factory->AddSpectator("prebjetbcsv_2","prebjetbcsv_2","",'F');

	double weightval_=0;

 ParseParamFile(paramfile);	

	for(unsigned int bckgit=0;bckgit<backgroundTrees.size();++bckgit){
		auto it = sample_info_.find(bckglist.at(bckgit).c_str());
		if(it!=sample_info_.end()){
			double evt = it->second.first;
			double xs = it->second.second;
			weightval_=(double) xs/evt;
			std::cout<<weightval_<<std::endl;
		}
		factory->AddBackgroundTree(backgroundTrees.at(bckgit),weightval_);
	}
	for(unsigned int sgit=0;sgit<signalTrees.size();++sgit){
		auto it = sample_info_.find(signallist.at(sgit).c_str());
		if(it!=sample_info_.end()){
			double evt = it->second.first;
			double xs=it->second.second;
			weightval_=(Double_t) xs/evt;
		}
		std::cout<<weightval_<<std::endl;
		factory->AddSignalTree(signalTrees.at(sgit),weightval_);
	}
	factory->SetBackgroundWeightExpression("wt");
	factory->SetSignalWeightExpression("wt");
	TCut mycutb, mycuts;
	if(twotag){
	mycutb="n_prebjets>1&&mt_1<30&&prebjetbcsv_1>0.679&&prebjetbcsv_2>0.679";
	mycuts="n_prebjets>1&&mt_1<30&&prebjetbcsv_1>0.679&&prebjetbcsv_2>0.679";
	}
	else if(onetag){
	mycutb="n_prebjets>1&&mt_1<30&&prebjetbcsv_1>0.679&&prebjetbcsv_2<0.679";
	mycuts="n_prebjets>1&&mt_1<30&&prebjetbcsv_1>0.679&&prebjetbcsv_2<0.679";
	}
	else{
	mycutb="n_prebjets>1&&mt_1<30";
	mycuts="n_prebjets>1&&mt_1<30";
	}
//TCut mycutb="";
//TCut mycuts="";
	factory->PrepareTrainingAndTestTree( mycuts, mycutb,"SplitMode=Random:!V");

	factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

	factory->TrainAllMethods();
	factory->TestAllMethods();
	factory->EvaluateAllMethods();

	outfile->Close();
	delete factory;

	return 0;
}
Пример #9
0
void test2(){
  //---------------------------------------------------------------
  // This loads the library
  TMVA::Tools::Instance();
  TString outfileName( "trainingBDT_tZq.root" );
  TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
  TMVA::Factory *factory = new TMVA::Factory( "BDT_trainning_tzq", outputFile,"!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );
  
  
  
  TFile *input_sig      = TFile::Open( "../TreeReader/outputroot/histofile_tZq.root" );
  TFile *input_wz       = TFile::Open( "../TreeReader/outputroot/histofile_WZ.root" );
  
  
  TTree *signal            = (TTree*)input_sig->Get("Ttree_tZq");
  TTree *background     = (TTree*)input_wz->Get("Ttree_WZ");
  
  factory->AddSignalTree    ( signal,	   1.);
  factory->AddBackgroundTree( background,  1.);
  
  
  std::vector<TString > varList;
  varList.push_back("tree_cosThetaStar");;
  varList.push_back("tree_topMass");     
  varList.push_back("tree_totMass");     
  varList.push_back("tree_deltaPhilb");  
  varList.push_back("tree_deltaRlb");    
  varList.push_back("tree_deltaRTopZ");  
  varList.push_back("tree_asym");        
  varList.push_back("tree_Zpt");         
  varList.push_back("tree_ZEta");        
  varList.push_back("tree_topPt");       
  varList.push_back("tree_topEta");      
  varList.push_back("tree_NJets");       
  varList.push_back("tree_NBJets");	 
  varList.push_back("tree_deltaRZl");	 
  varList.push_back("tree_deltaPhiZmet");
  varList.push_back("tree_btagDiscri");  
  
  varList.push_back("tree_totPt");	
  varList.push_back("tree_totEta");	
  
  
  varList.push_back("tree_leptWPt");	 
  varList.push_back("tree_leptWEta");	 
  varList.push_back("tree_leadJetPt");   
  varList.push_back("tree_leadJetEta");  
  varList.push_back("tree_deltaRZleptW");
  varList.push_back("tree_deltaPhiZleptW");
  
  
  varList.push_back("tree_met" );
  varList.push_back("tree_mTW" );
  
  
  for(unsigned int i=0; i< varList.size() ; i++) factory->AddVariable( varList[i].Data(),    'F');
  
  factory->SetSignalWeightExpression    ("tree_EvtWeight");
  factory->SetBackgroundWeightExpression("tree_EvtWeight");
   
  
  // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5";

   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
   
   
   
   //factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );
//   factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:nEventsMin=100:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );
   factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:nEventsMin=100:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );

 


   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );

  
  
  
}
Пример #10
0
void TMVAClassification( TString myMethodList = "", int isMC=1, int useSvtx=1)
{
   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //
   // if you like to use a method via the plugin mechanism, we recommend using
   //
   // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)
   // (an example is given for using the BDT as plugin (see below),
   // but of course the real application is when you write your own
   // method based)

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // to get access to the GUI and all tmva macros
   TString thisdir = gSystem->DirName(gInterpreter->GetCurrentMacroName());
   gROOT->SetMacroPath(thisdir + ":" + gROOT->GetMacroPath());
   //gROOT->ProcessLine(".L /Users/kjung/root5-34-23/tmva/test/TMVAGui.C");

   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   // --- Cut optimisation
   Use["Cuts"]            = 0;
   Use["CutsD"]           = 0;
   Use["CutsPCA"]         = 0;
   Use["CutsGA"]          = 0;
   Use["CutsSA"]          = 0;
   // 
   // --- 1-dimensional likelihood ("naive Bayes estimator")
   Use["Likelihood"]      = 0;
   Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
   Use["LikelihoodPCA"]   = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
   Use["LikelihoodKDE"]   = 0;
   Use["LikelihoodMIX"]   = 0;
   //
   // --- Mutidimensional likelihood and Nearest-Neighbour methods
   Use["PDERS"]           = 0;
   Use["PDERSD"]          = 0;
   Use["PDERSPCA"]        = 0;
   Use["PDEFoam"]         = 0;
   Use["PDEFoamBoost"]    = 0; // uses generalised MVA method boosting
   Use["KNN"]             = 0; // k-nearest neighbour method
   //
   // --- Linear Discriminant Analysis
   Use["LD"]              = 0; // Linear Discriminant identical to Fisher
   Use["Fisher"]          = 0;
   Use["FisherG"]         = 0;
   Use["BoostedFisher"]   = 0; // uses generalised MVA method boosting
   Use["HMatrix"]         = 0;
   //
   // --- Function Discriminant analysis
   Use["FDA_GA"]          = 0; // minimisation of user-defined function using Genetics Algorithm
   Use["FDA_SA"]          = 0;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   Use["FDA_MCMT"]        = 0;
   //
   // --- Neural Networks (all are feed-forward Multilayer Perceptrons)
   Use["MLP"]             = 0; // Recommended ANN
   Use["MLPBFGS"]         = 0; // Recommended ANN with optional training method
   Use["MLPBNN"]          = 0; // Recommended ANN with BFGS training method and bayesian regulator
   Use["CFMlpANN"]        = 0; // Depreciated ANN from ALEPH
   Use["TMlpANN"]         = 0; // ROOT's own ANN
   //
   // --- Support Vector Machine 
   Use["SVM"]             = 0;
   // 
   // --- Boosted Decision Trees
   Use["BDT"]             = 0; // uses Adaptive Boost
   Use["BDTG"]            = 1; // uses Gradient Boost
   Use["BDTB"]            = 0; // uses Bagging
   Use["BDTD"]            = 0; // decorrelation + Adaptive Boost
   Use["BDTF"]            = 0; // allow usage of fisher discriminant for node splitting 
   // 
   // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
   Use["RuleFit"]         = 0;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;

   // Select methods (don't look at this code - not of interest)
   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 1;
      }
   }

   // --------------------------------------------------------------------------------------------------

   // --- Here the preparation phase begins

   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   TString outfileName;
   string svtxExt = "noSvtx";
   if(useSvtx) svtxExt = "withSvtx";
   if(!isMC) outfileName = "TMVA_trained_data.root";
   else outfileName = Form("TMVA_trained_cJet_medDCuts_BvC_%s.root",svtxExt.c_str());
   cout << "fn: "<< outfileName << endl;
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
   outputFile->cd();

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory is 
   // the only TMVA object you have to interact with
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );

   // If you wish to modify default settings
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
   //factory->AddVariable( "myvar1 := var1+var2", 'F' );

   if(useSvtx) factory->AddVariable("svtxptFrac","svtx pt fraction","units",'F');
   //factory->AddVariable( "djetR", "Closest Meson to Jet dr", "", 'F' );
   factory->AddVariable( "nIP","number of IP trks","units",'I');
   if(useSvtx) factory->AddVariable( "svtxm", "svtx mass", "units", 'F');
   if(useSvtx) factory->AddVariable( "svtxmEnergyFrac","svtxmEnergyFrac","units",'F');
   if(useSvtx) factory->AddVariable( "svtxpt", "svtx pt", "units", 'F');
   if(useSvtx) factory->AddVariable( "svtxmcorr", "corrected svtx mass", "units", 'F');
   if(useSvtx) factory->AddVariable( "svtxdl", "svtx displacement", "units", 'F');
   if(useSvtx) factory->AddVariable( "svtxdls", "svtx displacement significance", "units", 'F');
   if(useSvtx) factory->AddVariable( "svtxntrk", "svtx ntracks", "units", 'F');
   if(useSvtx) factory->AddVariable( "sv2Trkdl", "2trk svtx close2PV dl", "units", 'F');
   if(useSvtx) factory->AddVariable( "svtxTrkSumChi2", "svtx trk sum chi2", "units", 'F');
   if(useSvtx) factory->AddVariable( "svtxTrkNetCharge", "svtx trk net chg", "units", 'F');
   if(useSvtx) factory->AddVariable( "svtxNtrkInCone", "svtx ntrk in cone", "units", 'F');
   //factory->AddVariable( "jteta",  "Jet eta", "units", 'F' );
   factory->AddVariable( "closestDMass","Closest DMass", "units", 'F' );
   factory->AddVariable( "closestDType","Closest Type","units",'F');
   factory->AddVariable( "closestDPt","Closest DpT", "units", 'F' );
   /*factory->AddVariable( "chargedMax","chargedMax","units",'F');
   factory->AddVariable( "chargedSum","chargedSum","units",'F');
   factory->AddVariable( "neutralMax","neutralMax","units",'F');
   factory->AddVariable( "neutralSum","neutralSum","units",'F');
   factory->AddVariable( "photonMax","photonMax","units",'F');
   factory->AddVariable( "photonSum","photonSum","units",'F');
   factory->AddVariable( "eSum","eSum","units",'F');
   factory->AddVariable( "muSum","muSum","units",'F');*/
   for(int i=0; i<3; i++){
     //factory->AddVariable( Form("ipProb0_%d",i),Form("prob0 IP part %d",i),"units",'F');
     //factory->AddVariable( Form("ipPt_%d",i),Form("IP pt part %d",i),"units",'F');
     factory->AddVariable( Form("trackIP2dSig_%d",i),Form("IP trk 2d sig part %d",i),"units",'F');
     factory->AddVariable( Form("trackIP3dSig_%d",i),Form("IP trk 3d sig part %d",i),"units",'F');
     factory->AddVariable( Form("trackIP2d_%d",i),Form("IP trk 2d part %d",i),"units",'F');
     factory->AddVariable( Form("trackIP3d_%d",i),Form("IP trk 3d part %d",i),"units",'F');
  }
  for(int i=0; i<1; i++){
     //factory->AddVariable( Form("trackPtRel_%d",i),Form("pt rel part %d",i),"units",'F');
     //factory->AddVariable( Form("trackPPar_%d",i),Form("track ppar part %d",i),"units",'F');
     //factory->AddVariable( Form("trackPParRatio_%d",i),Form("track ppar part %d",i),"units",'F');
     factory->AddVariable( Form("trackJetDist_%d",i),Form("dist to jet part %d",i),"units",'F');
     factory->AddVariable( Form("trackDecayLenVal_%d",i),Form("trk decay len part %d",i),"units",'F');
     //factory->AddVariable( Form("trackDeltaR_%d",i),Form("trk dr to jet part %d",i),"units",'F');
     //factory->AddVariable( Form("trackPtRatio_%d",i),Form("trk pt ratio part %d",i),"units",'F');
   }
   //factory->AddVariable( "trackSip2dSigAboveCharm","trackSip2dSigAboveCharm","units",'F');
   //factory->AddVariable( "trackSip3dSigAboveCharm","trackSip3dSigAboveCharm","units",'F');
   //factory->AddVariable( "trackSip2dValAboveCharm","trackSip2dValAboveCharm","units",'F');
   //factory->AddVariable( "trackSip3dValAboveCharm","trackSip3dValAboveCharm","units",'F');
   //factory->AddVariable( "svJetDeltaR","svJetDeltaR","units",'F');
   factory->AddVariable( "trackSumJetDeltaR","trackSumJetDeltaR","units",'F');
     
   // You can add so-called "Spectator variables", which are not used in the MVA training,
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
   // input variables, the response values of all trained MVAs, and the spectator variables
   //factory->AddSpectator( "spec1 := var1*2",  "Spectator 1", "units", 'F' );
   
   if(isMC) factory->AddSpectator( "refpt",  "ref pT", "units", 'F' );
   factory->AddSpectator( "rawpt",  "raw pT", "units", 'F' );
   //factory->AddSpectator( "dCandPt", "D-Meson pT", "units" , 'F' );
   if(isMC) factory->AddSpectator( "refparton_flavorForB", "jet flavor", "units" , 'F' );
   //factory->AddSpectator( "evtSelection", "event selection", "units" , 'F' );
   //factory->AddSpectator( "vz", "z-vertex", "units" , 'F' );
   //if(isMC) factory->AddSpectator( "subid", "subid", "units" , 'F' );
   //factory->AddSpectator( "pthat", "pthat", "units" , 'F' );
   //factory->AddSpectator( "run", "run", "units" , 'I' );
   //factory->AddSpectator( "bin", "centrality", "units" , 'I' );
   factory->AddSpectator( "jtpt",  "Jet pT", "units", 'F' );
   
   // Read training and test data
   // (it is also possible to use ASCII format as input -> see TMVA Users Guide)
   TString fname;
   if(!isMC) fname = "/Users/kjung/charmJets/pPb/input/DMesonCJet_pPbData_ppReco_akPu3PF_convertToJetTree_withLHCbVars_medDCuts.root";
   else fname = "/Users/kjung/charmJets/pPb/input/DMesonCJet_QCDJetOnly_pPbMC_ppReco_akPu3PF_convertToJetTree_medDCuts.root";
   
   //if (gSystem->AccessPathName( fname ))  // file does not exist in local directory
   //   gSystem->Exec("curl -O http://root.cern.ch/files/tmva_class_example.root");
   
   TFile *input = TFile::Open( fname );
   
   std::cout << "--- TMVAClassification       : Using input file: " << input->GetName() << std::endl;
   
   // --- Register the training and test trees

   TTree *signal, *background;
   if(useSvtx){
      signal = (TTree*)input->Get("jets");
      background = (TTree*)input->Get("jets");
   }
   else{
      signal = (TTree*)input->Get("jetsNoSvtx");
      background = (TTree*)input->Get("jetsNoSvtx");
   }

   TTree *signal_2 = (TTree*)input->Get("dMesons");
   TTree *background_2 = (TTree*)input->Get("dMesons");

   signal->AddFriend(signal_2);
   background->AddFriend(background_2);
   
   // global event weights per tree (see below for setting event-wise weights)
   Double_t signalWeight     = 1.0;
   Double_t backgroundWeight = 1.0;
   
   // You can add an arbitrary number of signal or background trees
   factory->AddSignalTree    ( signal,     signalWeight     );
   factory->AddBackgroundTree( background, backgroundWeight );
   
   // To give different trees for training and testing, do as follows:
   //    factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" );
   //    factory->AddSignalTree( signalTestTree,     signalTestWeight,  "Test" );
   
   // Use the following code instead of the above two or four lines to add signal and background
   // training and test events "by hand"
   // NOTE that in this case one should not give expressions (such as "var1+var2") in the input
   //      variable definition, but simply compute the expression before adding the event
   //
   //     // --- begin ----------------------------------------------------------
   //     std::vector<Double_t> vars( 4 ); // vector has size of number of input variables
   //     Float_t  treevars[4], weight;
   //     
   //     // Signal
   const int nvars = 35; //67;
   const int nvarsWithInt = 2;
   double weight;
   std::vector<double> vars(nvars);
   double treevars[nvars-nvarsWithInt];
   int treevars2[nvarsWithInt];
   //std::string variables[nvars] = {"djetR","closestDPt","svtxm","svtxdl","jtpt","refpt","rawpt","refparton_flavorForB","svtxdls","trackIP2dSig_0","trackIP3dSig_0","trackIP2d_0","trackIP3d_0","ipProb0_0","trackPtRel_0","trackPPar_0","trackPParRatio_0","trackJetDist_0","trackDecayLenVal_0","trackDeltaR_0","trackPtRatio_0","ipPt_0","trackIP2dSig_1","trackIP3dSig_1","trackIP2d_1","trackIP3d_1","ipProb0_1","trackPtRel_1","trackPPar_1","trackPParRatio_1","trackJetDist_1","trackDecayLenVal_1","trackDeltaR_1","trackPtRatio_1","ipPt_1","trackIP2dSig_2","trackIP3dSig_2","trackIP2d_2","trackIP3d_2","ipProb0_2","trackPtRel_2","trackPPar_2","trackPParRatio_2","trackJetDist_2","trackDecayLenVal_2","trackDeltaR_2","trackPtRatio_2","ipPt_2","trackIP2dSig_3","trackIP3dSig_3","trackIP2d_3","trackIP3d_3","ipProb0_3","trackPtRel_3","trackPPar_3","trackPParRatio_3","trackJetDist_3","trackDecayLenVal_3","trackDeltaR_3","trackPtRatio_3","ipPt_3","trackSip2dValAboveCharm","trackSip3dValAboveCharm","svJetDeltaR","trackSumJetDeltaR","nIP","svtxntrk"};
   
   std::string variables[nvars] = {"jtpt","refpt","rawpt","refparton_flavorForB","svtxptFrac","svtxmEnergyFrac","svtxpt","svtxm",
   "svtxdl","svtxdls","svtxTrkSumChi2","svtxTrkNetCharge","sv2Trkdl", "closestDMass","closestDType","closestDPt","trackIP2dSig_0","trackIP2dSig_1",
"trackIP2dSig_2","trackIP3dSig_0","trackIP3dSig_1","trackIP3dSig_2","trackIP2d_0","trackIP2d_1",
"trackIP2d_2","trackIP3d_0","trackIP3d_1","trackIP3d_2","trackJetDist_0","trackDecayLenVal_0","svJetDeltaR",
"trackSumJetDeltaR","svtxNtrkInCone","svtxntrk","nIP"};

   //std::string variables[nvars] = {"jtpt","refpt","rawpt","refparton_flavorForB","svtxptFrac","svtxdl","svtxdls","closestDPt","closestDType","closestDMass","svtxm","svtxmcorr","svJetDeltaR","trackSumJetDeltaR","svtxpt","sv2Trkdl","svtxTrkSumChi2","svtxTrkNetCharge","svtxNtrkInCone","svtxntrk"};
   signal->SetBranchAddress("weight", &weight);
	
   for (UInt_t ivar=0; ivar<nvars-nvarsWithInt; ivar++) signal->SetBranchAddress( variables[ivar].c_str(), &(treevars[ivar]) );
   for (UInt_t ivar=nvars-nvarsWithInt; ivar<nvars; ivar++) signal->SetBranchAddress( variables[ivar].c_str(), &(treevars2[ivar]) );
   for (UInt_t i=0; i<signal->GetEntries(); i++) {
     signal->GetEntry(i);
     for (UInt_t ivar=0; ivar<nvars-nvarsWithInt; ivar++) vars[ivar] = treevars[ivar];
      for (UInt_t ivar=nvars-nvarsWithInt; ivar<nvars; ivar++) vars[ivar] = treevars2[ivar];
     // add training and test events; here: first half is training, second is testing
     // note that the weight can also be event-wise
     //for(int ij=0; ij<nvars; ij++) cout << ij << " " << vars[ij] << endl;
     if(isMC && (abs(vars[3])==4)) {
       if (i%2==0)  factory->AddSignalTrainingEvent( vars, weight );
       else                              factory->AddSignalTestEvent    ( vars, weight );
     }
   }
   //   
   //     // Background (has event weights)
   background->SetBranchAddress( "weight", &weight );
   for (UInt_t ivar=0; ivar<nvars-nvarsWithInt; ivar++) background->SetBranchAddress( variables[ivar].c_str(), &(treevars[ivar]) );
   for (UInt_t ivar=nvars-nvarsWithInt; ivar<nvars; ivar++) background->SetBranchAddress( variables[ivar].c_str(), &(treevars2[ivar]) );
   for (UInt_t i=0; i<background->GetEntries(); i++) {
     background->GetEntry(i);
     for (UInt_t ivar=0; ivar<nvars-nvarsWithInt; ivar++) vars[ivar] = treevars[ivar];
      for (UInt_t ivar=nvars-nvarsWithInt; ivar<nvars; ivar++) vars[ivar] = treevars2[ivar];
     // add training and test events; here: first half is training, second is testing
     // note that the weight can also be event-wise
     if(isMC && (abs(vars[3])==5)) {
       if (i%2==0) factory->AddBackgroundTrainingEvent( vars, weight );
       else                                factory->AddBackgroundTestEvent    ( vars, weight );
     }
   }
   // --- end ------------------------------------------------------------
   //
   // --- end of tree registration 

   // Set individual event weights (the variables must exist in the original TTree)
   //    for signal    : factory->SetSignalWeightExpression    ("weight1*weight2");
   //    for background: factory->SetBackgroundWeightExpression("weight1*weight2");
   factory->SetSignalWeightExpression("weight");
   factory->SetBackgroundWeightExpression( "weight" );

   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5";
   
   // Tell the factory how to use the training and testing events
   //
   // If no numbers of events are given, half of the events in the tree are used 
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut,
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );
   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

   // ---- Book MVA methods
   //
   // Please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["CutsD"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsD",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

   if (Use["CutsPCA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

   if (Use["CutsGA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                           "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );

   if (Use["CutsSA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                           "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   // Likelihood ("naive Bayes estimator")
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                           "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

   // Decorrelated likelihood
   if (Use["LikelihoodD"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
                           "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

   // PCA-transformed likelihood
   if (Use["LikelihoodPCA"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 

   // Use a kernel density estimator to approximate the PDFs
   if (Use["LikelihoodKDE"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 

   // Use a variable-dependent mix of splines and kernel density estimator
   if (Use["LikelihoodMIX"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
                           "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

   // Test the multi-dimensional probability density estimator
   // here are the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSD"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

   if (Use["PDERSPCA"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

   // Multi-dimensional likelihood estimator using self-adapting phase-space binning
   if (Use["PDEFoam"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                           "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );

   if (Use["PDEFoamBoost"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost",
                           "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN",
                           "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // H-Matrix (chi2-squared) method
   if (Use["HMatrix"])
      factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" );

   // Linear discriminant (same as Fisher discriminant)
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher discriminant (same as LD)
   if (Use["Fisher"])
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher with Gauss-transformed input variables
   if (Use["FisherG"])
      factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

   // Composite classifier: ensemble (tree) of boosted Fisher classifiers
   if (Use["BoostedFisher"])
      factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", 
                           "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" );

   // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

   if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   if (Use["FDA_MT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   if (Use["FDA_MCMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

   // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );

   if (Use["MLPBFGS"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

   if (Use["MLPBNN"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators

   // CF(Clermont-Ferrand)ANN
   if (Use["CFMlpANN"])
      factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  

   // Tmlp(Root)ANN
   if (Use["TMlpANN"])
      factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDTG"]) // Gradient Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                           //"!H:!V:NTrees=850:MinNodeSize=2%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" );
                           "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.9:SeparationType=GiniIndex:nCuts=500:MaxDepth=2" );
   if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );

   if (Use["BDTB"]) // Bagging
      factory->BookMethod( TMVA::Types::kBDT, "BDTB",
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" );

   if (Use["BDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTD",
                           "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" );

   if (Use["BDTF"])  // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables
      factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher",
                           "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" );

   // RuleFit -- TMVA implementation of Friedman's method
   if (Use["RuleFit"])
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                           "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

   // For an example of the category classifier usage, see: TMVAClassificationCategory

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

   // ---- STILL EXPERIMENTAL and only implemented for BDT's ! 
   // factory->OptimizeAllMethods("SigEffAt001","Scan");
   // factory->OptimizeAllMethods("ROCIntegral","FitGA");

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;
   
   delete factory;
   
   // Launch the GUI for the root macros
   //if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
Пример #11
0
void TMVAClassification( TString myMethodList = "" )
{
   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //
   // if you like to use a method via the plugin mechanism, we recommend using
   //
   // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)
   // (an example is given for using the BDT as plugin (see below),
   // but of course the real application is when you write your own
   // method based)

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // to get access to the GUI and all tmva macros
   TString tmva_dir(TString(gRootDir) + "/tmva");
   if(gSystem->Getenv("TMVASYS"))
      tmva_dir = TString(gSystem->Getenv("TMVASYS"));
   gROOT->SetMacroPath(tmva_dir + "/test/:" + gROOT->GetMacroPath() );
   gROOT->ProcessLine(".L TMVAGui.C");
   gROOT->ProcessLine(".L BDT.C");
   gROOT->ProcessLine(".L BDTControlPlots.C");
   gROOT->ProcessLine(".L BDT_Reg.C");

   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   //
   // Turn on-off the MVA to run
   //


   // --- Cut optimisation
   Use["Cuts"]            = 0;
   Use["CutsD"]           = 0;
   Use["CutsPCA"]         = 0;
   Use["CutsGA"]          = 1;
   Use["CutsSA"]          = 0;
   // 
   // --- 1-dimensional likelihood ("naive Bayes estimator")
   Use["Likelihood"]      = 0;
   Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
   Use["LikelihoodPCA"]   = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
   Use["LikelihoodKDE"]   = 0;
   Use["LikelihoodMIX"]   = 0;
   //
   // --- Mutidimensional likelihood and Nearest-Neighbour methods
   Use["PDERS"]           = 0;
   Use["PDERSD"]          = 0;
   Use["PDERSPCA"]        = 0;
   Use["PDEFoam"]         = 0;
   Use["PDEFoamBoost"]    = 0; // uses generalised MVA method boosting
   Use["KNN"]             = 0; // k-nearest neighbour method
   //
   // --- Linear Discriminant Analysis
   Use["LD"]              = 0; // Linear Discriminant identical to Fisher
   Use["Fisher"]          = 0;
   Use["FisherG"]         = 0;
   Use["BoostedFisher"]   = 0; // uses generalised MVA method boosting
   Use["HMatrix"]         = 0;
   //
   // --- Function Discriminant analysis
   Use["FDA_GA"]          = 0; // minimisation of user-defined function using Genetics Algorithm
   Use["FDA_SA"]          = 0;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   Use["FDA_MCMT"]        = 0;
   //
   // --- Neural Networks (all are feed-forward Multilayer Perceptrons)
   Use["MLP"]             = 1; // Recommended ANN
   Use["MLPBFGS"]         = 0; // Recommended ANN with optional training method
   Use["MLPBNN"]          = 0; // Recommended ANN with BFGS training method and bayesian regulator
   Use["CFMlpANN"]        = 0; // Depreciated ANN from ALEPH
   Use["TMlpANN"]         = 0; // ROOT's own ANN
   //
   // --- Support Vector Machine 
   Use["SVM"]             = 0;
   // 
   // --- Boosted Decision Trees
   Use["BDT"]             = 0; // uses Adaptive Boost
   Use["BDTG"]            = 1; // uses Gradient Boost
   Use["BDTB"]            = 0; // uses Bagging
   Use["BDTD"]            = 0; // decorrelation + Adaptive Boost
   Use["BDTF"]            = 0; // allow usage of fisher discriminant for node splitting 
   // 
   // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
   Use["RuleFit"]         = 0;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;

   // Select methods (don't look at this code - not of interest)
   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 1;
      }
   }

   // --------------------------------------------------------------------------------------------------

   // --- Here the preparation phase begins

   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   string outFileName = "TMVA_" + sSelection[mode-1] + ".root";
   TString outfileName( outFileName.c_str());
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory is 
   // the only TMVA object you have to interact with
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );

   // If you wish to modify default settings
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
   /*
   factory->AddVariable( "myvar1 := var1+var2", 'F' );
   factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' );
   factory->AddVariable( "var3",                "Variable 3", "units", 'F' );
   factory->AddVariable( "var4",                "Variable 4", "units", 'F' );
   */

   //Not order matter for the CutRange declarations
   if(mode==1){
     //SRZjets inside Z use mT2, pTll, mjj, dR_ll, mEff, mWWT
     factory->AddVariable( "mT2", 'F');     
     factory->AddVariable( "pTll", 'F');
     factory->AddVariable( "mjj", 'F');
     factory->AddVariable( "dR_ll", 'F');
     factory->AddVariable( "mEff", 'F');
     factory->AddVariable( "mWWT", 'F');    
     factory->AddVariable( "mctPerp", 'F');    
     factory->AddVariable( "metrel", 'F');   
     factory->AddVariable( "met/mEff", 'F');
     factory->AddVariable( "mT2jj", 'F');        
     factory->AddVariable( "sphericity", 'F');     
     factory->AddVariable( "sphericityTrans", 'F');     
     factory->AddVariable( "abs(llAcoplanarity+3.1415)", 'F');     
     factory->AddVariable( "abs(jjAcoplanarity+3.1415)", 'F');   
     factory->AddVariable( "mTl[0]", 'F');
     factory->AddVariable( "mTl[1]", 'F');  

     //factory->AddVariable( "mctPara", 'F');   //issue with var content NAN
   }
   else if(mode==2){
     factory->AddVariable( "mT2", 'F');     
     factory->AddVariable( "pTll", 'F');
     factory->AddVariable( "mjj", 'F');
     factory->AddVariable( "dR_ll", 'F');
     factory->AddVariable( "mEff", 'F');
     factory->AddVariable( "mWWT", 'F');    
     factory->AddVariable( "mctPerp", 'F');    
     factory->AddVariable( "metrel", 'F');   
     factory->AddVariable( "met/mEff", 'F');
     factory->AddVariable( "mT2jj", 'F');        
     factory->AddVariable( "sphericity", 'F');     
     factory->AddVariable( "sphericityTrans", 'F');     
     factory->AddVariable( "abs(llAcoplanarity+3.1415)", 'F');     
     factory->AddVariable( "abs(jjAcoplanarity+3.1415)", 'F');   
     factory->AddVariable( "mTl[0]", 'F');
     factory->AddVariable( "mTl[1]", 'F'); 
   }
   else if(mode==3){
     factory->AddVariable( "mT2", 'F');     
     factory->AddVariable( "pTll", 'F');
     factory->AddVariable( "mjj", 'F');
     factory->AddVariable( "dR_ll", 'F');
     factory->AddVariable( "mEff", 'F');
     factory->AddVariable( "mWWT", 'F');    
     factory->AddVariable( "mctPerp", 'F');    
     factory->AddVariable( "metrel", 'F');   
     factory->AddVariable( "met/mEff", 'F');
     factory->AddVariable( "mT2jj", 'F');        
     factory->AddVariable( "sphericity", 'F');     
     factory->AddVariable( "sphericityTrans", 'F');     
     factory->AddVariable( "abs(llAcoplanarity+3.1415)", 'F');     
     factory->AddVariable( "abs(jjAcoplanarity+3.1415)", 'F');   
     factory->AddVariable( "mTl[0]", 'F');
     factory->AddVariable( "mTl[1]", 'F'); 
   }
   else if(mode==4){
     factory->AddVariable( "mT2", 'F');     
     factory->AddVariable( "pTll", 'F');
     factory->AddVariable( "dR_ll", 'F');
     factory->AddVariable( "mWWT", 'F');    
     factory->AddVariable( "metrel", 'F');   
     factory->AddVariable( "mTl[0]", 'F');
     factory->AddVariable( "mTl[1]", 'F'); 
   }

   // You can add so-called "Spectator variables", which are not used in the MVA training,
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
   // input variables, the response values of all trained MVAs, and the spectator variables
   //factory->AddSpectator( "spec1 := var1*2",  "Spectator 1", "units", 'F' );
   //factory->AddSpectator( "spec2 := var1*3",  "Spectator 2", "units", 'F' );

   // Read training and test data
   // (it is also possible to use ASCII format as input -> see TMVA Users Guide)
   //TString fname = "./tmva_class_example.root";
   TString fname_bkg = string(getenv("HISTOANA")) + "/SusyAna/" + ver +
     "toyNt_Bkg_Zjets_SherpaAlpgen_WZ_ZZ_PowHeg_WW_PowHeg_TopMCNLO_" + toySkim + "_rlep.root";
   if(toySkim == "DIL_optimSRSS")
     fname_bkg = string(getenv("HISTOANA")) + "/SusyAna/" + ver +
     "toyNt_Bkg_Zjets_SherpaAlpgen_WZ_ZZ_PowHeg_WW_PowHeg_TopMCNLO_FAKE_" + toySkim + ".root";

   std::cout << "Bkg file: " << fname_bkg << std::endl;
   TFile* fBkg = TFile::Open( fname_bkg.Data() );

   TString fname_sig;
   fname_sig = string(getenv("HISTOANA")) + "/SusyAna/" + ver + "ToyNtOutputs/";
   if(mode==1)        fname_sig += "164339_" + toySkim + ".root"; //wA_noslep_300_50
   else if(mode==2)   fname_sig += "164326_" + toySkim + ".root"; //wA_noslep_150_0
   else if(mode==3)   fname_sig += "157955_" + toySkim + ".root"; //wA_slep_142_107
   //else if(mode==4)   fname_sig += "176559_" + toySkim + ".root"; //wC_slep_117_47
   else if(mode==4)   fname_sig += "144907_" + toySkim + ".root"; //wC_slep_150_50
     
   std::cout << "Signal file: " << fname_sig << std::endl;
   TFile* fSig = TFile::Open( fname_sig.Data() );

   //if (gSystem->AccessPathName( fname ))  // file does not exist in local directory
   //   gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root");
   
   
   std::cout << "--- TMVAClassification       : Using input Bkg file: " << fBkg->GetName() << std::endl;
   std::cout << "--- TMVAClassification       : Using input Sig file: " << fSig->GetName() << std::endl;
   
   // --- Register the training and test trees

   TTree *signal     = (TTree*)fSig->Get("ToyNt");
   TTree *background = (TTree*)fBkg->Get("ToyNt");
   
   // global event weights per tree (see below for setting event-wise weights)
   Double_t signalWeight     = 1.0;
   Double_t backgroundWeight = 1.0;
   
   // You can add an arbitrary number of signal or background trees
   factory->AddSignalTree    ( signal,     signalWeight     );
   factory->AddBackgroundTree( background, backgroundWeight );
   
   // To give different trees for training and testing, do as follows:
   //    factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" );
   //    factory->AddSignalTree( signalTestTree,     signalTestWeight,  "Test" );
   
   // Use the following code instead of the above two or four lines to add signal and background
   // training and test events "by hand"
   // NOTE that in this case one should not give expressions (such as "var1+var2") in the input
   //      variable definition, but simply compute the expression before adding the event
   //
   //     // --- begin ----------------------------------------------------------
   //     std::vector<Double_t> vars( 4 ); // vector has size of number of input variables
   //     Float_t  treevars[4], weight;
   //     
   //     // Signal
   //     for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
   //     for (UInt_t i=0; i<signal->GetEntries(); i++) {
   //        signal->GetEntry(i);
   //        for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
   //        // add training and test events; here: first half is training, second is testing
   //        // note that the weight can also be event-wise
   //        if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight );
   //        else                              factory->AddSignalTestEvent    ( vars, signalWeight );
   //     }
   //   
   //     // Background (has event weights)
   //     background->SetBranchAddress( "weight", &weight );
   //     for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
   //     for (UInt_t i=0; i<background->GetEntries(); i++) {
   //        background->GetEntry(i);
   //        for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
   //        // add training and test events; here: first half is training, second is testing
   //        // note that the weight can also be event-wise
   //        if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight );
   //        else                                factory->AddBackgroundTestEvent    ( vars, backgroundWeight*weight );
   //     }
         // --- end ------------------------------------------------------------
   //
   // --- end of tree registration 

   // Set individual event weights (the variables must exist in the original TTree)
   //    for signal    : factory->SetSignalWeightExpression    ("weight1*weight2");
   //    for background: factory->SetBackgroundWeightExpression("weight1*weight2");
   factory->SetBackgroundWeightExpression( "w" );
   factory->SetSignalWeightExpression( "w");

   // Apply additional cuts on the signal and background samples (can be different)
   // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";

   TCut mycuts = "";
   if(mode==1){  //Optimisation inside Z peak
     mycuts = "(llType==0 || llType==1) && abs(mll-91.2)<10 && nCJets>=2 \
&& j_pt[0]>20 && j_isC20[0] && Alt$(j_pt[1],0) && Alt$(j_isC20[1],0)";
     /* \&& mEff>250 && met/mEff>0.3"; */
   }
   else if(mode==2){
     mycuts = "(llType==0 || llType==1) && abs(mll-91.2)<10 && nCJets>=2 \
&& j_pt[0]>20 && j_isC20[0] && Alt$(j_pt[1],0) && Alt$(j_isC20[1],0)	\
&& mT2jj>60";
   }
   else if(mode==3){
     mycuts = "llType==1 && nBJets==0 &&nFJets==0 ";
   }
   else if(mode==4){
     mycuts = "llType==2";
   }
   
   /*
   //Optimation close to diagonal
   TCut mycuts = "(llType==0 || llType==1) && mll<50 && nCJets>=2 \
   && j_pt[0]>20 && j_isC20[0] && Alt$(j_pt[1],0) && Alt$(j_isC20[1],0)"; 
   */

   // for example: TCut mycutb = "abs(var1)<0.5";
   TCut mycutb = mycuts; 
   
   // Tell the factory how to use the training and testing events
   //
   // If no numbers of events are given, half of the events in the tree are used 
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut,
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );
   if(mode==1)
     factory->PrepareTrainingAndTestTree( mycuts, mycutb,
					  "nTrain_Signal=4000:nTrain_Background=100000:SplitMode=Random:NormMode=NumEvents:!V" );
   else if(mode==2)
     factory->PrepareTrainingAndTestTree( mycuts, mycutb,
					  "nTrain_Signal=4000:nTrain_Background=100000:SplitMode=Random:NormMode=NumEvents:!V" );
   else if(mode==3)
     factory->PrepareTrainingAndTestTree( mycuts, mycutb,
					  "nTrain_Signal=1000:nTrain_Background=100000:SplitMode=Random:NormMode=NumEvents:!V" );
   else if(mode==4)
     factory->PrepareTrainingAndTestTree( mycuts, mycutb,
					  "nTrain_Signal=1000:nTrain_Background=100000:SplitMode=Random:NormMode=NumEvents:!V" );

   // ---- Book MVA methods
   //
   // Please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                           "!H:V:FitMethod=MC:CutRangeMin[0]=0:CutRangeMax[0]=1000:CutRangeMin[1]=0:CutRangeMax[1]=1000:CutRangeMin[2]=0:CutRangeMax[2]=2000:CutRangeMin[3]=0:CutRangeMax[3]=5:CutRangeMin[4]=0:CutRangeMax[4]=3000:CutRangeMin[5]=0:CutRangeMax[5]=2000:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["CutsD"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsD",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

   if (Use["CutsPCA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

   if (Use["CutsGA"]){
     if(mode==1)
       factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
			    "!H:V:FitMethod=GA:CutRangeMin[0]=0:CutRangeMax[0]=1000:CutRangeMin[1]=0:CutRangeMax[1]=1000:CutRangeMin[2]=0:CutRangeMax[2]=2000:CutRangeMin[3]=0:CutRangeMax[3]=5:CutRangeMin[4]=0:CutRangeMax[4]=3000:CutRangeMin[5]=0:CutRangeMax[5]=2000:VarProp=FSmart:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );
     else if(mode==2)
       factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
			    "!H:V:FitMethod=GA:CutRangeMin[0]=0:CutRangeMax[0]=1000:CutRangeMin[1]=0:CutRangeMax[1]=3000:CutRangeMin[2]=0:CutRangeMax[2]=3000:CutRangeMin[3]=0:CutRangeMax[3]=1000:CutRangeMin[4]=0:CutRangeMax[4]=1000:CutRangeMin[5]=0:CutRangeMax[5]=3000:VarProp=FSmart:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );
     else if(mode==3)
       factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
			    "!H:V:FitMethod=GA:CutRangeMin[0]=0:CutRangeMax[0]=1000:CutRangeMin[1]=0:CutRangeMax[1]=2000:CutRangeMin[2]=0:CutRangeMax[2]=1000:CutRangeMin[3]=0:CutRangeMax[3]=5:VarProp=FSmart:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );

   }


   if (Use["CutsSA"]){
     if(mode==1)
       factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
			    "!H:!V:FitMethod=SA:CutRangeMin[0]=0:CutRangeMax[0]=1000:CutRangeMin[1]=0:CutRangeMax[1]=1000:CutRangeMin[2]=0:CutRangeMax[2]=2000:CutRangeMin[3]=0:CutRangeMax[3]=5:CutRangeMin[4]=0:CutRangeMax[4]=3000:CutRangeMin[5]=0:CutRangeMax[5]=2000:VarProp=FSmart:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );
     else if(mode==2) 
       factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
			    "!H:!V:FitMethod=SA:CutRangeMin[0]=0:CutRangeMax[0]=1000:CutRangeMin[1]=0:CutRangeMax[1]=3000:CutRangeMin[2]=0:CutRangeMax[2]=3000:CutRangeMin[3]=0:CutRangeMax[3]=1000:CutRangeMin[4]=0:CutRangeMax[4]=1000:CutRangeMin[5]=0:CutRangeMax[5]=3000:VarProp=FSmart:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );
     else if(mode==3)
       factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
			    "!H:!V:FitMethod=SA:CutRangeMin[0]=0:CutRangeMax[0]=1000:CutRangeMin[1]=0:CutRangeMax[1]=2000:CutRangeMin[2]=0:CutRangeMax[2]=1000:CutRangeMin[3]=0:CutRangeMax[3]=5:VarProp=FSmart:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );
   }
   
   // Likelihood ("naive Bayes estimator")
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                           "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

   // Decorrelated likelihood
   if (Use["LikelihoodD"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
                           "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

   // PCA-transformed likelihood
   if (Use["LikelihoodPCA"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 

   // Use a kernel density estimator to approximate the PDFs
   if (Use["LikelihoodKDE"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 

   // Use a variable-dependent mix of splines and kernel density estimator
   if (Use["LikelihoodMIX"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
                           "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

   // Test the multi-dimensional probability density estimator
   // here are the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSD"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

   if (Use["PDERSPCA"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

   // Multi-dimensional likelihood estimator using self-adapting phase-space binning
   if (Use["PDEFoam"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                           "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );

   if (Use["PDEFoamBoost"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost",
                           "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN",
                           "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // H-Matrix (chi2-squared) method
   if (Use["HMatrix"])
      factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" );

   // Linear discriminant (same as Fisher discriminant)
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", "!H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher discriminant (same as LD)
   if (Use["Fisher"])
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher with Gauss-transformed input variables
   if (Use["FisherG"])
      factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

   // Composite classifier: ensemble (tree) of boosted Fisher classifiers
   if (Use["BoostedFisher"])
      factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", 
                           "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" );

   // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

   if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   if (Use["FDA_MT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   if (Use["FDA_MCMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

   // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=N+1:TestRate=5:!UseRegulator" );

   if (Use["MLPBFGS"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

   if (Use["MLPBNN"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators

   // CF(Clermont-Ferrand)ANN
   if (Use["CFMlpANN"])
      factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  

   // Tmlp(Root)ANN
   if (Use["TMlpANN"])
      factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3:IgnoreNegWeightsInTraining=True"  ); // n_cycles:#nodes:#nodes:...

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees [MY DEFAULT]
   if (Use["BDTG"]){ // Gradient Boost
     factory->BookMethod( TMVA::Types::kBDT, "BDTG",
			  "!H:!V:NTrees=300:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=10:NNodesMax=5:IgnoreNegWeightsInTraining=True" );
   }
   
   if (Use["BDT"]){  // Adaptive Boost
     if(mode<3){
     factory->BookMethod( TMVA::Types::kBDT, "BDT",
			  "!H:V:NTrees=100:nEventsMin=2000:MaxDepth=4:UseRandomisedTrees=True:UseNVars=4:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:PruneStrength=-1:IgnoreNegWeightsInTraining=True" );
     }
     else if(mode==3){
       factory->BookMethod( TMVA::Types::kBDT, "BDT",
			    "!H:V:NTrees=50:nEventsMin=2000:MaxDepth=4:UseRandomisedTrees=True:UseNVars=4:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:PruneStrength=-1:IgnoreNegWeightsInTraining=True" );
     }
   }


   if (Use["BDTB"]) // Bagging
      factory->BookMethod( TMVA::Types::kBDT, "BDTB",
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   if (Use["BDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTD",
                           "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );

   if (Use["BDTF"])  // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables
      factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher",
                           "!H:!V:NTrees=50:nEventsMin=150:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   // RuleFit -- TMVA implementation of Friedman's method
   if (Use["RuleFit"])
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                           "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

   // For an example of the category classifier usage, see: TMVAClassificationCategory

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

   // factory->OptimizeAllMethods("SigEffAt001","Scan");
   // factory->OptimizeAllMethods("ROCIntegral","GA");

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
Пример #12
0
std::pair<TString,TString> TMVAClassification (
    TString infilename,
    AnalysisType analysisType = AnalysisType::DIRECT,
    TString additionalRootFileName = "")
{
    TMVA::Tools::Instance();

    std::string tmstr (now ());
    TString tmstmp (tmstr.c_str ());
   
  
    std::cout << "==> Start TMVAClassification" << std::endl;
    std::cout << "-------------------- open input file ---------------- " << std::endl;
    TString fname = infilename; //pathToData + infilename + TString (".root");
    if (analysisType != AnalysisType::TRANSFORMED)
        fname = pathToData + infilename + TString (".root");
    std::cout << "open file " << std::endl << fname.Data () << std::endl;


    std::cout << "-------------------- get tree ---------------- " << std::endl;
    TString treeName = "data";
    if (analysisType == AnalysisType::TRANSFORMED)
        treeName = "transformed";

    std::cout << "-------------------- create tchain with treeName ---------------- " << std::endl;
    std::cout << treeName << std::endl;
    TChain* tree = new TChain (treeName);
    std::cout << "add file" << std::endl;
    std::cout << fname << std::endl;
    tree->Add (fname);
    TChain* treeFriend (NULL);
    if (additionalRootFileName.Length () > 0)
    {
        std::cout << "-------------------- add additional input file ---------------- " << std::endl;
        std::cout << additionalRootFileName << std::endl;
        treeFriend = new TChain (treeName);
        treeFriend->Add (additionalRootFileName);
        tree->AddFriend (treeFriend,"p");
    }
//    tree->Draw ("mass:prediction");
//    return std::make_pair(TString("hallo"),TString ("nix"));
    TString outfileName;
    if (analysisType == AnalysisType::BACKGROUND)
    {
        outfileName = TString ("BACK_" + infilename) + tmstmp + TString (".root");
    }
    else
        outfileName += TString ( "TMVA__" ) + tmstmp + TString (".root");

    std::cout << "-------------------- open output file ---------------- " << std::endl;
    TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

    std::cout << "-------------------- prepare factory ---------------- " << std::endl;
    TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
						"AnalysisType=Classification:Transformations=I:!V" );
    std::cout << "-------------------- add variables ---------------- " << std::endl;


    for (auto varname : variableNames)
    {
	factory->AddVariable (varname.c_str (), 'F');
    }

    for (auto varname : spectatorNames)
    {
	factory->AddSpectator (varname.c_str (), 'F');
    }
    
   
    std::cout << "-------------------- add trees ---------------- " << std::endl;
    TCut signalCut ("signal==1");
    TCut backgroundCut ("signal==0");
    if (analysisType == AnalysisType::TRANSFORMED)
    {
        signalCut = "(signal_original==1 && signal_in==0)";
        backgroundCut = "(signal_original==0 && signal_in==0)";
    }
    if (analysisType == AnalysisType::BACKGROUND)
    {
        signalCut     = TString("(signal==0) * (prediction > 0.7)");
        backgroundCut = TString("(signal==0) * (prediction < 0.4)");
    }
    //tree->Draw ("prediction",signalCut);
    //return std::make_pair(TString("hallo"),TString ("nix"));
    factory->AddTree(tree, "Signal", 1.0, baseCut + signalCut, "TrainingTesting");
    factory->AddTree(tree, "Background", 1.0, baseCut + backgroundCut, "TrainingTesting");


    
    TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
    TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5";

    /* // Set individual event weights (the variables must exist in the original TTree) */
    if (analysisType == AnalysisType::BACKGROUND)
    {
        factory->SetSignalWeightExpression ("prediction");
        factory->SetBackgroundWeightExpression ("1");
    }

   
    std::cout << "-------------------- prepare ---------------- " << std::endl;
    factory->PrepareTrainingAndTestTree( mycuts, mycutb,
					 "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=0:nTest_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );


    TString methodName ("");
    if (analysisType == AnalysisType::BACKGROUND)
        methodName = TString ("TONBKG_") + tmstmp;

    if (false)
    {
	// gradient boosting training
        methodName += TString("GBDT");
	factory->BookMethod(TMVA::Types::kBDT, methodName,
			    "NTrees=40:BoostType=Grad:Shrinkage=0.01:MaxDepth=7:UseNvars=6:nCuts=20:MinNodeSize=10");
    }
    if (false)
    {
        methodName += TString("Likelihood");
	factory->BookMethod( TMVA::Types::kLikelihood, methodName,
			     "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );
    }
    

    
    if (false)
    {
	TString layoutString ("Layout=TANH|100,LINEAR");

	TString training0 ("LearningRate=1e-1,Momentum=0.0,Repetitions=1,ConvergenceSteps=300,BatchSize=20,TestRepetitions=15,WeightDecay=0.001,Regularization=NONE,DropConfig=0.0+0.5+0.5+0.5,DropRepetitions=1,Multithreading=True");
	TString training1 ("LearningRate=1e-2,Momentum=0.5,Repetitions=1,ConvergenceSteps=300,BatchSize=30,TestRepetitions=7,WeightDecay=0.001,Regularization=L2,Multithreading=True,DropConfig=0.0+0.1+0.1+0.1,DropRepetitions=1");
	TString training2 ("LearningRate=1e-2,Momentum=0.3,Repetitions=1,ConvergenceSteps=300,BatchSize=40,TestRepetitions=7,WeightDecay=0.0001,Regularization=L2,Multithreading=True");
	TString training3 ("LearningRate=1e-3,Momentum=0.1,Repetitions=1,ConvergenceSteps=200,BatchSize=70,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True");

	TString trainingStrategyString ("TrainingStrategy=");
	trainingStrategyString += training0 + "|" + training1 + "|" + training2 + "|" + training3;
      
	TString nnOptions ("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=G:WeightInitialization=XAVIERUNIFORM");
	nnOptions.Append (":"); nnOptions.Append (layoutString);
	nnOptions.Append (":"); nnOptions.Append (trainingStrategyString);

        methodName += TString("NNgauss");
	factory->BookMethod( TMVA::Types::kNN, methodName, nnOptions ); // NN
    }

    if (false)
    {
	TString layoutString ("Layout=TANH|200,TANH|70,LINEAR");

	TString training0 ("LearningRate=1e-2,Momentum=0.0,Repetitions=1,ConvergenceSteps=300,BatchSize=20,TestRepetitions=15,WeightDecay=0.001,Regularization=NONE,DropConfig=0.0+0.5+0.5+0.5,DropRepetitions=1,Multithreading=True");
	TString training1 ("LearningRate=1e-3,Momentum=0.5,Repetitions=1,ConvergenceSteps=300,BatchSize=30,TestRepetitions=7,WeightDecay=0.001,Regularization=L2,Multithreading=True,DropConfig=0.0+0.1+0.1+0.1,DropRepetitions=1");
	TString training2 ("LearningRate=1e-4,Momentum=0.3,Repetitions=1,ConvergenceSteps=300,BatchSize=40,TestRepetitions=7,WeightDecay=0.0001,Regularization=L2,Multithreading=True");
	TString training3 ("LearningRate=1e-5,Momentum=0.1,Repetitions=1,ConvergenceSteps=200,BatchSize=70,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True");

	TString trainingStrategyString ("TrainingStrategy=");
	trainingStrategyString += training0 + "|" + training1 + "|" + training2 + "|" + training3;
//	trainingStrategyString += training0 + "|" + training2 + "|" + training3;
//	trainingStrategyString += training0 + "|" + training2;

      
	//       TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CROSSENTROPY");
	TString nnOptions ("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=N:WeightInitialization=XAVIERUNIFORM");
	//       TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CHECKGRADIENTS");
	nnOptions.Append (":"); nnOptions.Append (layoutString);
	nnOptions.Append (":"); nnOptions.Append (trainingStrategyString);

        methodName = TString("NNnormalized");
        factory->BookMethod( TMVA::Types::kNN, methodName, nnOptions ); // NN
    }


    if (true)
    {
	TString layoutString ("Layout=TANH|100,TANH|50,LINEAR");

	TString training0 ("LearningRate=1e-2,Momentum=0.0,Repetitions=1,ConvergenceSteps=100,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Regularization=NONE,DropConfig=0.0+0.5+0.5+0.5,DropRepetitions=1,Multithreading=True");
	TString training1 ("LearningRate=1e-3,Momentum=0.0,Repetitions=1,ConvergenceSteps=20,BatchSize=30,TestRepetitions=7,WeightDecay=0.001,Regularization=L2,Multithreading=True,DropConfig=0.0+0.1+0.1+0.1,DropRepetitions=1");
	TString training2 ("LearningRate=1e-4,Momentum=0.0,Repetitions=1,ConvergenceSteps=20,BatchSize=40,TestRepetitions=7,WeightDecay=0.0001,Regularization=L2,Multithreading=True");
	TString training3 ("LearningRate=1e-5,Momentum=0.0,Repetitions=1,ConvergenceSteps=30,BatchSize=70,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True");

	TString trainingStrategyString ("TrainingStrategy=");
	trainingStrategyString += training0 + "|" + training1 + "|" + training2 + "|" + training3;

      
	TString nnOptions ("!H:!V:ErrorStrategy=CROSSENTROPY:VarTransform=P+G:WeightInitialization=XAVIERUNIFORM");
	nnOptions.Append (":"); nnOptions.Append (layoutString);
	nnOptions.Append (":"); nnOptions.Append (trainingStrategyString);

        methodName += TString("NNPG");
	factory->BookMethod( TMVA::Types::kNN, methodName, nnOptions ); // NN
    }
   
   
   
    factory->TrainAllMethods();
//    return std::make_pair(TString("hallo"),TString ("nix"));
    factory->TestAllMethods();
    factory->EvaluateAllMethods();

    //input->Close();
    outputFile->Close();

//    TMVA::TMVAGui (outfileName);
   
    delete factory;
    delete tree;
    switch (analysisType)
    {
    case AnalysisType::BACKGROUND:
        std::cout << "DONE BACKGROUND" << std::endl;
        break;
    case AnalysisType::DIRECT:
        std::cout << "DONE DIRECT" << std::endl;
        break;
    case AnalysisType::TRANSFORMED:
        std::cout << "DONE TRANSFORMED" << std::endl;
        break;
        
    };
    std::cout << "classification, return : " << outfileName << "  ,  " << methodName << std::endl;
    return std::make_pair (outfileName, methodName);
}