示例#1
0
文件: tmva.c 项目: dbarge/tauTo3mu
void TMVAClassification()
{
   TMVA::Tools::Instance();

   std::cout << "==> Start TMVAClassification" << std::endl;
   TString outfileName( "TMVA.root" );

   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
                                               "AnalysisType=Classification" );
   factory->AddVariable("LifeTime", 'F');
   factory->AddVariable("FlightDistance", 'F');
   factory->AddVariable("pt", 'F');
   
   TString fname = "../tau_data/training.root";
   TFile *input = TFile::Open( fname );
   
   TTree *tree     = (TTree*)input->Get("data");
   
   factory->AddTree(tree, "Signal", 1., "signal == 1", "Training");
   factory->AddTree(tree, "Signal", 1., "signal == 1", "Test");
   factory->AddTree(tree, "Background", 1., "signal == 0", "Training");
   factory->AddTree(tree, "Background", 1., "signal == 0", "Test");
   
   // gradient boosting training
   factory->BookMethod(TMVA::Types::kBDT, "GBDT",
                       "NTrees=40:BoostType=Grad:Shrinkage=0.01:MaxDepth=7:UseNvars=6:nCuts=20:MinNodeSize=10");
   factory->TrainAllMethods();
   input->Close();
   outputFile->Close();

   delete factory;
}
void TMVAClassification( TString myMethodList = "" )
{
   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //
   // if you like to use a method via the plugin mechanism, we recommend using
   //
   // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)
   // (an example is given for using the BDT as plugin (see below),
   // but of course the real application is when you write your own
   // method based)

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   // --- Cut optimisation
   Use["Cuts"]            = 1;
   Use["CutsD"]           = 1;
   Use["CutsPCA"]         = 0;
   Use["CutsGA"]          = 0;
   Use["CutsSA"]          = 0;
   // 
   // --- 1-dimensional likelihood ("naive Bayes estimator")
   Use["Likelihood"]      = 1;
   Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
   Use["LikelihoodPCA"]   = 1; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
   Use["LikelihoodKDE"]   = 0;
   Use["LikelihoodMIX"]   = 0;
   //
   // --- Mutidimensional likelihood and Nearest-Neighbour methods
   Use["PDERS"]           = 1;
   Use["PDERSD"]          = 0;
   Use["PDERSPCA"]        = 0;
   Use["PDEFoam"]         = 0;
   Use["PDEFoamBoost"]    = 0; // uses generalised MVA method boosting
   Use["KNN"]             = 1; // k-nearest neighbour method
   //
   // --- Linear Discriminant Analysis
   Use["LD"]              = 1; // Linear Discriminant identical to Fisher
   Use["Fisher"]          = 0;
   Use["FisherG"]         = 0;
   Use["BoostedFisher"]   = 0; // uses generalised MVA method boosting
   Use["HMatrix"]         = 0;
   //
   // --- Function Discriminant analysis
   Use["FDA_GA"]          = 1; // minimisation of user-defined function using Genetics Algorithm
   Use["FDA_SA"]          = 0;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   Use["FDA_MCMT"]        = 0;
   //
   // --- Neural Networks (all are feed-forward Multilayer Perceptrons)
   Use["MLP"]             = 0; // Recommended ANN
   Use["MLPBFGS"]         = 0; // Recommended ANN with optional training method
   Use["MLPBNN"]          = 1; // Recommended ANN with BFGS training method and bayesian regulator
   Use["CFMlpANN"]        = 0; // Depreciated ANN from ALEPH
   Use["TMlpANN"]         = 0; // ROOT's own ANN
   //
   // --- Support Vector Machine 
   Use["SVM"]             = 1;
   // 
   // --- Boosted Decision Trees
   Use["BDT"]             = 1; // uses Adaptive Boost
   Use["BDTG"]            = 0; // uses Gradient Boost
   Use["BDTB"]            = 0; // uses Bagging
   Use["BDTD"]            = 0; // decorrelation + Adaptive Boost
   Use["BDTF"]            = 0; // allow usage of fisher discriminant for node splitting 
   // 
   // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
   Use["RuleFit"]         = 1;
   // ---------------------------------------------------------------
(TMVA::gConfig().GetVariablePlotting()).fMaxNumOfAllowedVariablesForScatterPlots = 20;
   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;

   // Select methods (don't look at this code - not of interest)
   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 1;
      }
   }

   // --------------------------------------------------------------------------------------------------

   // --- Here the preparation phase begins
//FILEOUT
   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   TString outfileName( "TMVA_2GeV_barrel2.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory is 
   // the only TMVA object you have to interact with
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in
   // front of the "Silent" argument in the option string
//   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
 //                                              "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );
   TMVA::Factory* factory = new TMVA::Factory("TMVAMulticlass_2GeV_barrel2",outputFile,
		                           "!V:!Silent:Color:!DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=multiclass" );
   	TFile* file = TFile::Open("/scratchfs/higgs/yudan/SingleParticle/Reco//all_2GeV/PID_2GeV_E30L_E10mm_H48L_H10mm_2GeV.root");
	if (!file->IsOpen()) std::cout << "could not open file" <<std::endl;

	TTree* treePi = (TTree*)file->Get("Pion");
	if (treePi == 0) std::cout << "could not open tree" <<std::endl;


	TTree* treeMu = (TTree*)file->Get("Muon");
	if (treeMu == 0) std::cout << "could not open tree" <<std::endl;


	TTree* treeE = (TTree*)file->Get("Electron");
	if (treeE == 0) std::cout << "could not open tree" <<std::endl;


   // If you wish to modify default settings
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
//   TCut mycuts = "NTrk==1 && MCPP[2]/MCPEn<0.7"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";


   factory->AddTree( treePi, TString("Pi"), 1. ,"abs(cosTheta)>0.3&&abs(cosTheta)<0.55");
   factory->AddTree( treeMu, TString("Mu"), 1.,"abs(cosTheta)>0.3&&abs(cosTheta)<0.55");
   factory->AddTree( treeE, TString("E"), 1.,"abs(cosTheta)>0.3&&abs(cosTheta)<0.55");



	factory->AddVariable("EcalNHit","EcalNHit", "units", 'I');
	factory->AddVariable("HcalNHit","HcalNHit", "units", 'I');
	factory->AddVariable("NLEcal","NLEcal", "units", 'I');
	factory->AddVariable("NLHcal","NLHcal", "units", 'I');
	factory->AddVariable("maxDisHtoL","maxDisHtoL", "units", 'F');
	factory->AddVariable("avDisHtoL","avDisHtoL", "units", 'F');
	factory->AddVariable("EE := EcalEn/EClu","EcalEn", "units", 'F');
	factory->AddVariable("graDepth","graDepth", "units", 'F');
	factory->AddVariable("cluDepth","cluDepth", "units", 'F');
	factory->AddVariable("minDepth","minDepth", "units", 'F');
	factory->AddVariable("MaxDisHel","MaxDisHel", "units", 'F');
	factory->AddVariable("FD_all","FD_all", "units", 'F');
	factory->AddVariable("FD_ECAL","FD_ECAL", "units", 'F');
	factory->AddVariable("FD_HCAL","FD_HCAL", "units", 'F');
	factory->AddVariable("E_10 := EEClu_L10/EcalEn","EEClu_L10", "units", 'F');
	factory->AddVariable("E_R := EEClu_R/EcalEn","EEClu_R", "units", 'F');
	factory->AddVariable("E_r := EEClu_r/EcalEn","EEClu_r", "units", 'F');
	factory->AddVariable("rms_Hcal","rms_Hcal", "units", 'F');
	factory->AddVariable("av_NHH","av_NHH", "units", 'F');
	factory->AddVariable("AL_Ecal","AL_Ecal", "units", 'I');
	factory->AddVariable("FD_ECALF10","FD_ECALF10", "units", 'F');
	factory->AddVariable("FD_ECALL20","FD_ECALL20", "units", 'F');
	factory->AddVariable("NH_ECALF10","NH_ECALF10", "units", 'I');
	factory->AddVariable("dEdx","dEdx", "units", 'F');
	  
  
   std::cout << "--- TMVAClassification       : Using input file: " << std::endl;
   
   // --- Register the training and test trees

   
   // global event weights per tree (see below for setting event-wise weights)
   Double_t signalWeight     = 1.0;
   Double_t backgroundWeight = 1.0;
   
   // You can add an arbitrary number of signal or background trees
   
   // To give different trees for training and testing, do as follows:
   //    factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" );
   //    factory->AddSignalTree( signalTestTree,     signalTestWeight,  "Test" );
   
   // Use the following code instead of the above two or four lines to add signal and background
   // training and test events "by hand"
   // NOTE that in this case one should not give expressions (such as "var1+var2") in the input
   //      variable definition, but simply compute the expression before adding the event
   //
   //     // --- begin ----------------------------------------------------------
   //     std::vector<Double_t> vars( 4 ); // vector has size of number of input variables
   //     Float_t  treevars[4], weight;
   //     
   //     // Signal
   //     for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
   //     for (UInt_t i=0; i<signal->GetEntries(); i++) {
   //        signal->GetEntry(i);
   //        for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
   //        // add training and test events; here: first half is training, second is testing
   //        // note that the weight can also be event-wise
   //        if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight );
   //        else                              factory->AddSignalTestEvent    ( vars, signalWeight );
   //     }
   //   
   //     // Background (has event weights)
   //     background->SetBranchAddress( "weight", &weight );
   //     for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
   //     for (UInt_t i=0; i<background->GetEntries(); i++) {
   //        background->GetEntry(i);
   //        for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
   //        // add training and test events; here: first half is training, second is testing
   //        // note that the weight can also be event-wise
   //        if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight );
   //        else                                factory->AddBackgroundTestEvent    ( vars, backgroundWeight*weight );
   //     }
         // --- end ------------------------------------------------------------
   //
   // --- end of tree registration 

   // Set individual event weights (the variables must exist in the original TTree)
   //    for signal    : factory->SetSignalWeightExpression    ("weight1*weight2");
   //    for background: factory->SetBackgroundWeightExpression("weight1*weight2");
   //factory->SetBackgroundWeightExpression( "weight" );

   // Apply additional cuts on the signal and background samples (can be different)
      // Tell the factory how to use the training and testing events
   //
   // If no numbers of events are given, half of the events in the tree are used 
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut,
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );

   factory->PrepareTrainingAndTestTree( "", "SplitMode=Random:NormMode=NumEvents:!V" );
   // ---- Book MVA methods
   //
   // Please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["CutsD"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsD",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

   if (Use["CutsPCA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

   if (Use["CutsGA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                           "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );

   if (Use["CutsSA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                           "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   // Likelihood ("naive Bayes estimator")
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                           "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

   // Decorrelated likelihood
   if (Use["LikelihoodD"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
                           "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

   // PCA-transformed likelihood
   if (Use["LikelihoodPCA"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 

   // Use a kernel density estimator to approximate the PDFs
   if (Use["LikelihoodKDE"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 

   // Use a variable-dependent mix of splines and kernel density estimator
   if (Use["LikelihoodMIX"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
                           "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

   // Test the multi-dimensional probability density estimator
   // here are the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSD"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

   if (Use["PDERSPCA"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

   // Multi-dimensional likelihood estimator using self-adapting phase-space binning
   if (Use["PDEFoam"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                           "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );

   if (Use["PDEFoamBoost"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost",
                           "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN",
                           "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // H-Matrix (chi2-squared) method
   if (Use["HMatrix"])
      factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" );

   // Linear discriminant (same as Fisher discriminant)
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher discriminant (same as LD)
   if (Use["Fisher"])
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher with Gauss-transformed input variables
   if (Use["FisherG"])
      factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

   // Composite classifier: ensemble (tree) of boosted Fisher classifiers
   if (Use["BoostedFisher"])
      factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", 
                           "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" );

   // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

   if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   if (Use["FDA_MT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   if (Use["FDA_MCMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

   // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );

   if (Use["MLPBFGS"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

   if (Use["MLPBNN"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators

   // CF(Clermont-Ferrand)ANN
   if (Use["CFMlpANN"])
      factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  

   // Tmlp(Root)ANN
   if (Use["TMlpANN"])
      factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDTG"]) // Gradient Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTG",
			"!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.50:nCuts=20:NNodesMax=8" );
                         //  "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:MaxDepth=2" );

   if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" );

   if (Use["BDTB"]) // Bagging
      factory->BookMethod( TMVA::Types::kBDT, "BDTB",
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" );

   if (Use["BDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTD",
                           "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" );

   if (Use["BDTF"])  // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables
      factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher",
                           "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" );

   // RuleFit -- TMVA implementation of Friedman's method
   if (Use["RuleFit"])
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                           "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

   // For an example of the category classifier usage, see: TMVAClassificationCategory

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

   // factory->OptimizeAllMethods("SigEffAt001","Scan");
   // factory->OptimizeAllMethods("ROCIntegral","FitGA");

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();
//
//   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();
//
//   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
//   if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
int main(int argc, char** argv) {

    if(argc != 2)
    {
        std::cerr << ">>>>> analysis.cpp::usage: " << argv[0] << " configFileName      MVAconfigFileName" << std::endl ;
        return 1;
    }

    // Parse the config file
    parseConfigFile (argv[1]) ;

    std::string treeName  = gConfigParser -> readStringOption("Input::treeName");
    std::string fileSamples = gConfigParser -> readStringOption("Input::fileSamples");
    std::string inputDirectory = gConfigParser -> readStringOption("Input::inputDirectory");

    std::string inputBeginningFile = "out_NtupleProducer_";
    try {
        inputBeginningFile = gConfigParser -> readStringOption("Input::inputBeginningFile");
    }
    catch (char const* exceptionString) {
        std::cerr << " exception = " << exceptionString << std::endl;
    }
    std::cout << ">>>>> Input::inputBeginningFile  " << inputBeginningFile  << std::endl;



    double LUMI = gConfigParser -> readDoubleOption("Options::Lumi");

    std::vector<std::string> SignalName;
    SignalName = gConfigParser -> readStringListOption("Options::SignalName");

    for (int iSignalSample=0; iSignalSample<SignalName.size(); iSignalSample++) {
        std::cout << " Signal[" << iSignalSample << "] = " << SignalName.at(iSignalSample) << std::endl;
    }

    std::string nameWeight = "1";
    try {
        nameWeight = gConfigParser -> readStringOption("Options::nameWeight");
    }
    catch (char const* exceptionString) {
        std::cerr << " exception = " << exceptionString << std::endl;
    }
    std::cout << ">>>>> Input::nameWeight  " << nameWeight  << std::endl;



    TTree *treeJetLepVect[200];

    char *nameSample[1000];
    char *nameHumanReadable[1000];
    char* xsectionName[1000];

    char nameFileIn[1000];
    sprintf(nameFileIn,"%s",fileSamples.c_str());

    int numberOfSamples = ReadFile(nameFileIn, nameSample, nameHumanReadable, xsectionName);

    double Normalization[1000];
    double xsection[1000];

    for (int iSample=0; iSample<numberOfSamples; iSample++) {
        xsection[iSample] = atof(xsectionName[iSample]);
    }

    for (int iSample=0; iSample<numberOfSamples; iSample++) {
        char nameFile[20000];
        sprintf(nameFile,"%s/%s%s.root",inputDirectory.c_str(),inputBeginningFile.c_str(),nameSample[iSample]);

        TFile* f = new TFile(nameFile, "READ");

        treeJetLepVect[iSample] = (TTree*) f->Get(treeName.c_str());
        char nameTreeJetLep[100];
        sprintf(nameTreeJetLep,"treeJetLep_%d",iSample);
        treeJetLepVect[iSample]->SetName(nameTreeJetLep);

        double XSection;
        XSection = xsection[iSample];
        Normalization[iSample] = XSection * LUMI / 1000.;
    }

    //==== cut
    std::string CutFile = gConfigParser -> readStringOption("Selections::CutFile");
    std::vector<std::string> vCut;
    std::cout << " nCuts   = " << ReadFileCut(CutFile, vCut) << std::endl;

    std::string Cut;
    if (vCut.size() != 0) {
        Cut = vCut.at(0);
    }
    else {
        Cut = "1";
    }

    //==== HiggsMass
    std::string HiggsMass = gConfigParser -> readStringOption("Options::HiggsMass");

    //==== list of methods
    std::vector<std::string> vectorMyMethodList = gConfigParser -> readStringListOption("Options::MVAmethods");
    TString myMethodList;
    for (int iMVA = 0; iMVA < vectorMyMethodList.size(); iMVA++) {
        if (iMVA == 0) myMethodList = Form ("%s",vectorMyMethodList.at(iMVA).c_str());
        else           myMethodList = Form ("%s,%s",myMethodList.Data(),vectorMyMethodList.at(iMVA).c_str());
    }

    //==== output
    TString outfileName = gConfigParser -> readStringOption("Output::outFileName");


    // This loads the library
    TMVA::Tools::Instance();

    // Default MVA methods to be trained + tested
    std::map<std::string,int> Use;

    Use["MLP"]             = 1;
    Use["BDTG"]            = 1;
    Use["FDA_GA"]          = 0;
    Use["PDEFoam"]         = 0;


    std::cout << std::endl;
    std::cout << "==> Start TMVAClassification" << std::endl;

    // Select methods (don't look at this code - not of interest)
    if (myMethodList != "") {
        for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

        std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
        for (UInt_t i=0; i<mlist.size(); i++) {
            std::string regMethod(mlist[i]);

            if (Use.find(regMethod) == Use.end()) {
                std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
                for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
                std::cout << std::endl;
                return 0;
            }
            Use[regMethod] = 1;
        }
    }

    // --------------------------------------------------------------------------------------------------
    // --- Here the preparation phase begins

    // Create a new root output file
    TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

//   TMVA::Factory *factory = new TMVA::Factory( "TMVAMulticlass",     outputFile, "AnalysisType=multiclass:!V:!Silent:!V:Transformations=I;D" );
    TMVA::Factory *factory = new TMVA::Factory( "TMVAMulticlass",     outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=multiclass" );

    factory->AddVariable( "jetpt1" , 'F');
    factory->AddVariable( "jetpt2" , 'F');
    factory->AddVariable( "mjj" , 'F');
    factory->AddVariable( "detajj" , 'F');
    factory->AddVariable( "dphilljetjet" , 'F');

    factory->AddVariable( "pt1" , 'F');
    factory->AddVariable( "pt2" , 'F');
    factory->AddVariable( "mll" , 'F');
    factory->AddVariable( "dphill" , 'F');
    factory->AddVariable( "mth" , 'F');

    factory->AddVariable( "dphillmet" , 'F');
    factory->AddVariable( "mpmet" , 'F');

    factory->AddSpectator( "channel" , 'F');

    for (int iSample=0; iSample<numberOfSamples; iSample++) {
        int numEnt = treeJetLepVect[iSample]->GetEntries(Cut.c_str());
        std::cout << " Sample = " << nameSample[iSample] << " ~ " << nameHumanReadable[iSample] << " --> " << numEnt << std::endl;
        if (numEnt != 0) {
            if (iSample == 0) factory->AddTree( treeJetLepVect[iSample], "Signal", Normalization[iSample] );
            else if (iSample == 1) factory->AddTree( treeJetLepVect[iSample], "Background", Normalization[iSample] );
            else factory->AddTree( treeJetLepVect[iSample], TString(nameHumanReadable[iSample]), Normalization[iSample] );

//     factory->AddTree( treeJetLepVect[iSample], TString(nameHumanReadable[iSample]), Normalization[iSample] );
//     factory->AddTree( treeJetLepVect[iSample], TString(nameHumanReadable[iSample]), Normalization[iSample] , nameWeight.c_str());
            //     factory->AddTree( treeJetLepVect[iSample], TString(nameHumanReadable[iSample]));
        }
    }

//   for (int iSample=0; iSample<numberOfSamples; iSample++){
//    int numEnt = treeJetLepVect[iSample]->GetEntries(Cut.c_str());
//    std::cout << " Sample = " << nameSample[iSample] << " ~ " << nameHumanReadable[iSample] << " --> " << numEnt << std::endl;
//    if (numEnt != 0) {
//     bool isSig = false;
//     for (std::vector<std::string>::const_iterator itSig = SignalName.begin(); itSig != SignalName.end(); itSig++){
//      if (nameHumanReadable[iSample] == *itSig) isSig = true;
//     }
//     if (isSig) {
//      factory->AddTree( treeJetLepVect[iSample], TString("Signal"), Normalization[iSample] ); //---> ci deve essere uno chiamato Signal!
//     }
//     else {
//      factory->AddTree( treeJetLepVect[iSample], TString(nameHumanReadable[iSample]), Normalization[iSample] );
//     }
//    }
//   }
//
//   for (int iSample=0; iSample<numberOfSamples; iSample++){
//    int numEnt = treeJetLepVect[iSample]->GetEntries(Cut.c_str());
//    std::cout << " Sample = " << nameSample[iSample] << " ~ " << nameHumanReadable[iSample] << " --> " << numEnt << std::endl;
//    if (numEnt != 0) {
//     bool isSig = false;
//     for (std::vector<std::string>::const_iterator itSig = SignalName.begin(); itSig != SignalName.end(); itSig++){
//      if (nameHumanReadable[iSample] == *itSig) isSig = true;
//     }
//     if (isSig) {
// //      factory->AddTree( treeJetLepVect[iSample], TString("Signal"), Normalization[iSample] ); //---> ci deve essere uno chiamato Signal!
//     }
//     else {
//      factory->AddTree( treeJetLepVect[iSample], TString(nameHumanReadable[iSample]), Normalization[iSample] );
//     }
//    }
//   }


    std::cerr << " AAAAAAAAAAAAAAAAAAAAAAAAAAAAA " << std::endl;

    TCut mycuts = Cut.c_str();

//   factory->SetWeightExpression( nameWeight.c_str() );
//   factory->SetBackgroundWeightExpression( nameWeight.c_str() );
//   factory->SetSignalWeightExpression    ( nameWeight.c_str() );

    std::cerr << " BBBBBBBBBBBBBBBBBBBBBBBBBBBBB " << std::endl;

    factory->PrepareTrainingAndTestTree( mycuts ,"SplitMode=Random:NormMode=None:!V");
//   factory->PrepareTrainingAndTestTree( "" ,"SplitMode=Random:NormMode=None:!V");

    std::cerr << " CCCCCCCCCCCCCCCCCCCCCCCCCCCCC " << std::endl;



    // gradient boosted decision trees
//   if (Use["BDTG"])    factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.50:nCuts=20:NNodesMax=8");
    if (Use["BDTG"])    factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=600:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.50:nCuts=20:NNodesMax=8");
    // neural network
    if (Use["MLP"])     factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:NCycles=1000:HiddenLayers=N+5,5:TestRate=5:EstimatorType=MSE");
    // functional discriminant with GA minimizer
    if (Use["FDA_GA"])  factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );
    // PDE-Foam approach
    if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );



    //==== Optimize parameters in MVA methods
//   factory->OptimizeAllMethods();
//   factory->OptimizeAllMethods("ROCIntegral","Scan");
    //==== Train MVAs using the set of training events ====
    factory->TrainAllMethods();

    //==== Evaluate all MVAs using the set of test events ====
    factory->TestAllMethods();

    //==== Evaluate and compare performance of all configured MVAs ====
    factory->EvaluateAllMethods();

    // --------------------------------------------------------------

    // Save the output
    outputFile->Close();

    std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
    std::cout << "==> TMVAnalysis is done!" << std::endl;

    delete factory;

    //==== change position of weights file
    std::string toDo;

    toDo = "rm -r Weights-MVA-MultiClass/weights_" + HiggsMass + "_testVariables";
    std::cerr << "toDo = " << toDo << std::endl;
    system (toDo.c_str());

    toDo = "mv weights Weights-MVA-MultiClass/weights_" + HiggsMass + "_testVariables";
    std::cerr << "toDo = " << toDo << std::endl;
    system (toDo.c_str());

    // Launch the GUI for the root macros
    //   if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
示例#4
0
void TMVA_stop( TString signal_name = "T2tt", int train_region = 1, float x_parameter = 0.25)
{
   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVA_stop.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //
   // if you like to use a method via the plugin mechanism, we recommend using
   //
   // mylinux~> root -l TMVA_stop.C\(\"P_myMethod\"\)
   // (an example is given for using the BDT as plugin (see below),
   // but of course the real application is when you write your own
   // method based)

  //-----------------------------------------------------
  // define event selection (store in TCut sel)
  //-----------------------------------------------------

  TCut njets4("mini_njets>=4");
  TCut met100("mini_met>=100");
  TCut mt120("mini_mt>=120");
  TCut nb1("mini_nb>=1");
  TCut isotrk("mini_passisotrk==1");
  TCut lep_pt30("mini_nlep>=1 && mini_lep1pt>30.0");
  TCut sig("mini_sig==1");
   
  TCut  sel0  = njets4 + met100 + mt120 + nb1 + isotrk + lep_pt30 + sig;

  cout << "Using selection      : " << sel0.GetTitle() << endl;
  cout << "Doing signal point   : " << train_region       << endl;

  //-----------------------------------------------------
  // choose which variables to include in MVA training
  //-----------------------------------------------------
  
  std::map<std::string,int> mvaVar;
  mvaVar[ "met" ]			= 1;
  mvaVar[ "lep1pt" ]  	    = 0;
  mvaVar[ "mt2w" ]	  		= 1;
  mvaVar[ "htratiom" ]	    = 1;
  mvaVar[ "chi2" ]	        = 1;
  mvaVar[ "dphimjmin" ]		= 1;
  mvaVar[ "pt_b" ]			= 0;
  mvaVar[ "nb" ]			= 0;
  mvaVar[ "pt_J1" ]			= 0;
  mvaVar[ "pt_J2" ]			= 0;
  mvaVar[ "rand" ]			= 0;

  mvaVar[ "mt" ]			= 0;
  mvaVar[ "mt2bl" ]			= 0;
  mvaVar[ "mt2b" ]			= 0;
  mvaVar[ "lep1eta" ]			= 0;
  mvaVar[ "thrjetlm" ]			= 0;
  mvaVar[ "apljetlm" ]			= 0;
  mvaVar[ "sphjetlm" ]			= 0;
  mvaVar[ "cirjetlm" ]			= 0;
  mvaVar[ "chi2min" ]			= 0;
  mvaVar[ "chi2min_mt2b" ]		= 0;
  mvaVar[ "chi2min_mt2bl" ]		= 0;
  mvaVar[ "chi2min_mt2w" ]		= 0;
  mvaVar[ "mt2bmin" ]			= 0;
  mvaVar[ "mt2blmin" ]			= 0;
  mvaVar[ "mt2wmin_chi2" ]		= 0;
  mvaVar[ "mt2bmin_chi2" ]		= 0;
  mvaVar[ "mt2blmin_chi2" ]		= 0;
  mvaVar[ "mt2wmin_chi2prob" ]		= 0;
  mvaVar[ "mt2bmin_chi2prob" ]		= 0;
  mvaVar[ "mt2blmin_chi2prob" ]		= 0;
  mvaVar[ "htratiol" ]              	= 0;
  mvaVar[ "dphimj1" ]			= 0;
  mvaVar[ "dphimj2" ]			= 0;
  mvaVar[ "metsig" ]			= 0;

  //---------------------------------
  //choose bkg samples to include
  //---------------------------------
  cout << "Background trees: " << endl;
  int n_backgrounds = 8;

  TString backgrounds[] = {"ttdl_powheg", "ttsl_powheg", "w1to4jets", "tW_lep", "triboson", "diboson", "ttV", "DY1to4Jtot" };

  TString bkgPath = "/nfs-3/userdata/stop/Train/V00-02-18__V00-03-00_4jetsMET100_bkg/";

  TChain* chBackground = new TChain("t");
 
  for (int i = 0; i < n_backgrounds; i++) {
     TString backgroundChain = bkgPath + "/" + backgrounds[i] + ".root";
     cout << "    " << backgroundChain << endl;
     chBackground ->Add(backgroundChain );
  }

  //---------------------------------
  //choose signal sample to include
  //---------------------------------
  cout << "Signal trees: " << endl;
  TString s_train_region = "";
  s_train_region += train_region;
  TString s_x_parameter = "";
  s_x_parameter = Form("%.2f",x_parameter);

  TString signalPath = "/nfs-3/userdata/stop/Train/";
  TString signalVersion = "V00-02-18__V00-03-00_4jetsMET100_";

  TChain *chSignal = new TChain("t");

  TString base_name = signalPath + "/" + signalVersion + signal_name + "/" + signal_name + "_" + s_train_region;
  if (signal_name == "T2bw") base_name = base_name + "_" + s_x_parameter;
  TString signalChain  = base_name + ".root" ;

  cout << "    " << signalChain << endl;

  chSignal->Add(signalChain);

  //-----------------------------------------------------
  // choose backgrounds to include for multiple outputs
  //-----------------------------------------------------
  
  // bool doMultipleOutputs = false;

  // TChain *chww = new TChain("Events");
  // chww->Add(Form("%s/WWTo2L2Nu_PU_testFinal_baby.root",babyPath));
  // chww->Add(Form("%s/GluGluToWWTo4L_PU_testFinal_baby.root",babyPath));
  
  // TChain *chwjets = new TChain("Events");
  // chwjets->Add(Form("%s/WJetsToLNu_PU_testFinal_baby.root",babyPath));
  
  // TChain *chtt = new TChain("Events");
  // chtt->Add(Form("%s/TTJets_PU_testFinal_baby.root",babyPath));
  
  // std::map<std::string,int> includeBkg;
  // includeBkg["ww"]      = 1;
  // includeBkg["wjets"]   = 0;
  // includeBkg["tt"]      = 0;

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   // --- Cut optimisation
   Use["Cuts"]            = 0;
   Use["CutsD"]           = 0;
   Use["CutsPCA"]         = 0;
   Use["CutsGA"]          = 0;
   Use["CutsSA"]          = 0;
   // 
   // --- 1-dimensional likelihood ("naive Bayes estimator")
   Use["Likelihood"]      = 0;
   Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
   Use["LikelihoodPCA"]   = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
   Use["LikelihoodKDE"]   = 0;
   Use["LikelihoodMIX"]   = 0;
   //
   // --- Mutidimensional likelihood and Nearest-Neighbour methods
   Use["PDERS"]           = 0;
   Use["PDERSD"]          = 0;
   Use["PDERSPCA"]        = 0;
   Use["PDEFoam"]         = 0;
   Use["PDEFoamBoost"]    = 0; // uses generalised MVA method boosting
   Use["KNN"]             = 0; // k-nearest neighbour method
   //
   // --- Linear Discriminant Analysis
   Use["LD"]              = 0; // Linear Discriminant identical to Fisher
   Use["Fisher"]          = 0;
   Use["FisherG"]         = 0;
   Use["BoostedFisher"]   = 0; // uses generalised MVA method boosting
   Use["HMatrix"]         = 0;
   //
   // --- Function Discriminant analysis
   Use["FDA_GA"]          = 0; // minimisation of user-defined function using Genetics Algorithm
   Use["FDA_SA"]          = 0;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   Use["FDA_MCMT"]        = 0;
   //
   // --- Neural Networks (all are feed-forward Multilayer Perceptrons)
   Use["MLP"]             = 0; // Recommended ANN
   Use["MLPBFGS"]         = 0; // Recommended ANN with optional training method
   Use["MLPBNN"]          = 0; // Recommended ANN with BFGS training method and bayesian regulator
   Use["CFMlpANN"]        = 0; // Depreciated ANN from ALEPH
   Use["TMlpANN"]         = 0; // ROOT's own ANN
   //
   // --- Support Vector Machine 
   Use["SVM"]             = 0;
   // 
   // --- Boosted Decision Trees
   Use["BDT"]             = 1; // uses Adaptive Boost
   Use["BDT1"]            = 0; // uses Adaptive Boost
   Use["BDTG"]            = 0; // uses Gradient Boost
   Use["BDTB"]            = 0; // uses Bagging
   Use["BDTD"]            = 0; // decorrelation + Adaptive Boost
   // 
   // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
   Use["RuleFit"]         = 0;
   //
   // --- multi-output MVA's
   Use["multi_BDTG"]      = 0;
   Use["multi_MLP"]       = 0;
   Use["multi_FDA_GA"]    = 0;

   //
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;

   // --- Here the preparation phase begins

   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   TString outfileName = "TMVA_" + signal_name + "_" + s_train_region;
   if (signal_name == "T2bw") outfileName = outfileName +"_" + s_x_parameter;
   outfileName += ".root";
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   TString classification_name = "classification_" + signal_name + "_" + s_train_region;
   if (signal_name == "T2bw") classification_name = classification_name +"_" + s_x_parameter;

   /*
   TString multioutfileName( "TMVA_HWW_multi.root" );
   TFile* multioutputFile;

   if( doMultipleOutputs )
     multioutputFile = TFile::Open( multioutfileName, "RECREATE" );
   */

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory is 
   // the only TMVA object you have to interact with
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( classification_name, outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );
   /*
   TMVA::Factory *multifactory;
   if( doMultipleOutputs )
     multifactory= new TMVA::Factory( "TMVAMulticlass", multioutputFile,
                                      "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=multiclass" );
   */
   
   // If you wish to modify default settings
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
   //factory->AddVariable( "myvar1 := var1+var2", 'F' );
   //factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' );
   //factory->AddVariable( "var3",                "Variable 3", "units", 'F' );
   //factory->AddVariable( "var4",                "Variable 4", "units", 'F' );

   //--------------------------------------------------------
   // choose which variables to include in training
   //--------------------------------------------------------

   if( mvaVar[ "met"           ]  == 1 ) factory->AddVariable( "mini_met"                    ,  "E_{T}^{miss}"               ,       "GeV", 'F' );
   if( mvaVar[ "mt"            ]  == 1 ) factory->AddVariable( "mini_mt"                     ,  "M_{T}"                      ,       "GeV", 'F' );
   if( mvaVar[ "mt2w"          ]  == 1 ) factory->AddVariable( "mini_mt2w"                   ,  "MT2W"                       ,       "GeV", 'F' );
   if( mvaVar[ "mt2bl"         ]  == 1 ) factory->AddVariable( "mini_mt2bl"                  ,  "MT2bl"                      ,       "GeV", 'F' );
   if( mvaVar[ "mt2b"          ]  == 1 ) factory->AddVariable( "mini_mt2b"                   ,  "MT2b"                       ,       "GeV", 'F' );
   if( mvaVar[ "chi2"          ]  == 1 ) factory->AddVariable( "mini_chi2"                   ,  "chi2"                       ,       ""   , 'F' );
   if( mvaVar[ "lep1pt"        ]  == 1 ) factory->AddVariable( "mini_lep1pt"                 ,  "lepton pt"                  ,       ""   , 'F' );
   if( mvaVar[ "lep1eta"       ]  == 1 ) factory->AddVariable( "mini_lep1eta"                ,  "lepton eta"                 ,       ""   , 'F' );
   if( mvaVar[ "thrjetlm"      ]  == 1 ) factory->AddVariable( "mini_thrjetlm"               ,  "thrust"                     ,       ""   , 'F' );
   if( mvaVar[ "apljetlm"      ]  == 1 ) factory->AddVariable( "mini_apljetlm"               ,  "aplanarity"                 ,       ""   , 'F' );
   if( mvaVar[ "sphjetlm"      ]  == 1 ) factory->AddVariable( "mini_sphjetlm"               ,  "sphericity"                 ,       ""   , 'F' );
   if( mvaVar[ "cirjetlm"      ]  == 1 ) factory->AddVariable( "mini_cirjetlm"               ,  "circularity"                ,       ""   , 'F' );
   if( mvaVar[ "chi2min"       ]  == 1 ) factory->AddVariable( "mini_min(chi2min,100)"       ,  "#chi^{2}_{min}"             ,       ""   , 'F' );
   if( mvaVar[ "chi2minprob"   ]  == 1 ) factory->AddVariable( "mini_chi2minprob"            ,  "Prob(#chi^{2}_{min})"       ,       ""   , 'F' );
   if( mvaVar[ "chi2min_mt2b"  ]  == 1 ) factory->AddVariable( "mini_chi2min_mt2b"           ,  "MT2b(#chi^{2}_{min})"       ,       ""   , 'F' );
   if( mvaVar[ "chi2min_mt2bl" ]  == 1 ) factory->AddVariable( "mini_chi2min_mt2bl"          ,  "MT2bl(#chi^{2}_{min})"      ,       ""   , 'F' );
   if( mvaVar[ "chi2min_mt2w"  ]  == 1 ) factory->AddVariable( "mini_chi2min_mt2w"           ,  "MT2W(#chi^{2}_{min})"       ,       ""   , 'F' );
   if( mvaVar[ "mt2bmin"       ]  == 1 ) factory->AddVariable( "mini_mt2bmin"                ,  "MT2b_{min}"                 ,       ""   , 'F' );
   if( mvaVar[ "mt2blmin"      ]  == 1 ) factory->AddVariable( "mini_mt2blmin"               ,  "MT2bl_{min}"                ,       ""   , 'F' );
   if( mvaVar[ "mt2wmin"       ]  == 1 ) factory->AddVariable( "mini_mt2wmin"                ,  "MT2W_{min}"                 ,       ""   , 'F' );
   if( mvaVar[ "mt2bmin_chi2"  ]  == 1 ) factory->AddVariable( "min(mt2bmin_chi2,100)"  ,  "#chi^{2}(MT2b_{min})"       ,       ""   , 'F' );
   if( mvaVar[ "mt2blmin_chi2" ]  == 1 ) factory->AddVariable( "min(mt2blmin_chi2,100)" ,  "#chi^{2}(MT2bl_{min})"      ,       ""   , 'F' );
   if( mvaVar[ "mt2wmin_chi2"  ]  == 1 ) factory->AddVariable( "min(mt2wmin_chi2,100)"  ,  "#chi^{2}(MT2W_{min})"       ,       ""   , 'F' );
   if( mvaVar[ "mt2bmin_chi2prob"  ]  == 1 ) factory->AddVariable( "mt2bmin_chi2prob"   ,  "Prob(#chi^{2}(MT2b_{min}))"       ,       ""   , 'F' );
   if( mvaVar[ "mt2blmin_chi2prob" ]  == 1 ) factory->AddVariable( "mt2blmin_chi2prob"  ,  "Prob(#chi^{2}(MT2bl_{min}))"      ,       ""   , 'F' );
   if( mvaVar[ "mt2wmin_chi2prob"  ]  == 1 ) factory->AddVariable( "mt2wmin_chi2prob"   ,  "Prob(#chi^{2}(MT2W_{min}))"       ,       ""   , 'F' );
   if( mvaVar[ "htratiol"      ]  == 1 ) factory->AddVariable( "mini_htssl/(mini_htosl+mini_htssl)"    ,  "H_{T}^{SSL}/H_{T}"          ,       ""   , 'F' );
   if( mvaVar[ "htratiom"      ]  == 1 ) factory->AddVariable( "mini_htssm/(mini_htosm+mini_htssm)"    ,  "H_{T}^{SSM}/H_{T}"          ,       ""   , 'F' );
   if( mvaVar[ "dphimj1"       ]  == 1 ) factory->AddVariable( "mini_dphimj1"                ,  "#Delta#phi(j1,E_{T}^{miss})",       ""   , 'F' );
   if( mvaVar[ "dphimj2"       ]  == 1 ) factory->AddVariable( "mini_dphimj2"                ,  "#Delta#phi(j2,E_{T}^{miss})",       ""   , 'F' );
   if( mvaVar[ "dphimjmin"     ]  == 1 ) factory->AddVariable( "mini_dphimjmin"              ,  "min(#Delta#phi(j_{1,2},E_{T}^{miss}))",       ""   , 'F' );
   if( mvaVar[ "rand"          ]  == 1 ) factory->AddVariable( "mini_rand"                   ,  "random(0,1)"                ,       ""   , 'F' );
   if( mvaVar[ "metsig"        ]  == 1 ) factory->AddVariable( "met/sqrt(htosl+htssl)"  ,  "E_{T}^{miss}/#sqrt{H_{T}}"  ,       "#sqrt{GeV}"   , 'F' )
;
   if( mvaVar[ "pt_b"          ]  == 1 ) factory->AddVariable( "mini_pt_b"  ,       "P_T(b) GeV"   , 'F' );
   if( mvaVar[ "nb"            ]  == 1 ) factory->AddVariable( "mini_nb"  ,       "P_T(b) GeV"   , 'F' );
   if( mvaVar[ "pt_J1"          ]  == 1 ) factory->AddVariable( "pt_J1"  ,       "P_T(J1) GeV"   , 'F' );
   if( mvaVar[ "pt_J2"          ]  == 1 ) factory->AddVariable( "pt_J2"  ,       "P_T(J2) GeV"   , 'F' );
   
   /*
   if( doMultipleOutputs ){
     if (mvaVar["lephard_pt"])       multifactory->AddVariable( "lephard_pt",                 "1st lepton pt",                "GeV", 'F' );
     if (mvaVar["lepsoft_pt"])       multifactory->AddVariable( "lepsoft_pt",                 "2nd lepton pt",                "GeV", 'F' );
     if (mvaVar["dil_dphi"])         multifactory->AddVariable( "dil_dphi",                   "dphi(ll)",                     "",    'F' );
     if (mvaVar["dil_mass"])         multifactory->AddVariable( "dil_mass",                   "M(ll)",                        "GeV", 'F' );
     if (mvaVar["event_type"])       multifactory->AddVariable( "event_type",                 "Dil Flavor Type",              "",    'F' );
     if (mvaVar["met_projpt"])       multifactory->AddVariable( "met_projpt",                 "Proj. MET",                    "GeV", 'F' );
     if (mvaVar["met_pt"])           multifactory->AddVariable( "met_pt",                     "MET",                          "GeV", 'F' );
     if (mvaVar["mt_lephardmet"])    multifactory->AddVariable( "mt_lephardmet",              "MT(lep1,MET)",                 "GeV", 'F' );
     if (mvaVar["mt_lepsoftmet"])    multifactory->AddVariable( "mt_lepsoftmet",              "MT(lep2,MET)",                 "GeV", 'F' );
     if (mvaVar["mthiggs"])          multifactory->AddVariable( "mthiggs",                    "MT(Higgs)",                    "GeV", 'F' );
     if (mvaVar["dphi_lephardmet"])  multifactory->AddVariable( "dphi_lephardmet",            "dphi(lep1,MET)",               "GeV", 'F' );
     if (mvaVar["dphi_lepsoftmet"])  multifactory->AddVariable( "dphi_lepsoftmet",            "dphi(lep2,MET)",               "GeV", 'F' );
     if (mvaVar["lepsoft_fbrem"])    multifactory->AddVariable( "lepsoft_fbrem",              "2nd lepton f_{brem}",          "",    'F' );
     if (mvaVar["lepsoft_eOverPIn"]) multifactory->AddVariable( "lepsoft_eOverPIn",           "2nd lepton E/p",               "",    'F' );
     if (mvaVar["lepsoft_qdphi"])    multifactory->AddVariable( "lepsoft_q * lepsoft_dPhiIn", "2nd lepton q#times#Delta#phi", "",    'F' );
   }
   */

   // You can add so-called "Spectator variables", which are not used in the MVA training,
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
   // input variables, the response values of all trained MVAs, and the spectator variables
   //factory->AddSpectator( "spec1 := var1*2",  "Spectator 1", "units", 'F' );
   //factory->AddSpectator( "spec2 := var1*3",  "Spectator 2", "units", 'F' );

//   TTree* signalTrainingTree =  (TTree*) chSignalTrain;
//   TTree* signalTestTree =  (TTree*) chSignalTest;
//
//   TTree* bkgTrainingTree =  (TTree*) chBkgTrain;
//   TTree* bkgTestTree =  (TTree*) chBkgTest;
   
//    std::cout << "--- TMVAClassification       : Using bkg input files: -------------------" <<  std::endl;
// 
//    TObjArray *listOfBkgFiles = chbackground->GetListOfFiles();
//    TIter bkgFileIter(listOfBkgFiles);
//    TChainElement* currentBkgFile = 0;
// 
//    while((currentBkgFile = (TChainElement*)bkgFileIter.Next())) {
//      std::cout << currentBkgFile->GetTitle() << std::endl;
//    }
// 
//    std::cout << "--- TMVAClassification       : Using sig input files: -------------------" <<  std::endl;
//    
//    TObjArray *listOfSigFiles = chsignal->GetListOfFiles();
//    TIter sigFileIter(listOfSigFiles);
//    TChainElement* currentSigFile = 0;
// 
//    while((currentSigFile = (TChainElement*)sigFileIter.Next())) {
//      std::cout << currentSigFile->GetTitle() << std::endl;
//    }

   // global event weights per tree (see below for setting event-wise weights)
   Double_t signalWeight     = 1.0;
   Double_t backgroundWeight = 1.0;

   // You can add an arbitrary number of signal or background trees
//   factory->AddSignalTree    ( chSignal,     signalWeight     );
//   factory->AddBackgroundTree( chBackground, backgroundWeight );

   factory->AddTree(chSignal, "Signal", signalWeight, sel0+"mini_rand < 0.5", "train");
   factory->AddTree(chSignal, "Signal", signalWeight, sel0+"mini_rand >= 0.5", "test");
   factory->AddTree(chBackground, "Background", backgroundWeight, sel0+"mini_rand < 0.5", "train");
   factory->AddTree(chBackground, "Background", backgroundWeight, sel0+"mini_rand >= 0.5", "test");
   
   // To give different trees for training and testing, do as follows:
   //factory->AddSignalTree( signalTrainingTree, signalWeight, "Training" );
   //factory->AddSignalTree( signalTestTree,     signalWeight,  "Test" );

   //factory->AddBackgroundTree( bkgTrainingTree, backgroundWeight, "Training" );
   //factory->AddBackgroundTree( bkgTestTree,     backgroundWeight,  "Test" );
   
   // Use the following code instead of the above two or four lines to add signal and background
   // training and test events "by hand"
   // NOTE that in this case one should not give expressions (such as "var1+var2") in the input
   //      variable definition, but simply compute the expression before adding the event
   //
   //     // --- begin ----------------------------------------------------------
   //     std::vector<Double_t> vars( 4 ); // vector has size of number of input variables
   //     Float_t  treevars[4], weight;
   //     
   //     // Signal
   //     for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
   //     for (UInt_t i=0; i<signal->GetEntries(); i++) {
   //        signal->GetEntry(i);
   //        for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
   //        // add training and test events; here: first half is training, second is testing
   //        // note that the weight can also be event-wise
   //        if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight );
   //        else                              factory->AddSignalTestEvent    ( vars, signalWeight );
   //     }
   //   
   //     // Background (has event weights)
   //     background->SetBranchAddress( "weight", &weight );
   //     for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
   //     for (UInt_t i=0; i<background->GetEntries(); i++) {
   //        background->GetEntry(i);
   //        for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
   //        // add training and test events; here: first half is training, second is testing
   //        // note that the weight can also be event-wise
   //        if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight );
   //        else                                factory->AddBackgroundTestEvent    ( vars, backgroundWeight*weight );
   //     }
   //      // --- end ------------------------------------------------------------
   //
   // --- end of tree registration 
   
   // Set individual event weights (the variables must exist in the original TTree)
   factory->SetSignalWeightExpression    ("mini_weight");
   factory->SetBackgroundWeightExpression("mini_weight");

   /*
   if( doMultipleOutputs ){
     multifactory->AddTree(signal,"Signal");
     multifactory->SetSignalWeightExpression    ("event_scale1fb");
     multifactory->SetBackgroundWeightExpression("event_scale1fb");
     multifactory->SetWeightExpression("event_scale1fb");
     
     if( includeBkg["ww"] ){
       TTree* ww = (TTree*) chww;
       multifactory->AddTree(ww,"WW");
       cout << "Added WW to multi-MVA" << endl;
     }
     if( includeBkg["wjets"] ){
       TTree* wjets = (TTree*) chwjets;
       multifactory->AddTree(wjets,"WJets");
       cout << "Added W+jets to multi-MVA" << endl;
     }
     if( includeBkg["tt"] ){
       TTree* tt = (TTree*) chtt;
       multifactory->AddTree(tt,"tt");
       cout << "Added ttbar multi-MVA" << endl;
     }
   }
   */

   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = sel0; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   TCut mycutb = sel0; // for example: TCut mycutb = "abs(var1)<0.5";

   // Tell the factory how to use the training and testing events
   //
   // If no numbers of events are given, half of the events in the tree are used 
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut,
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );
   
   //Use random splitting
//   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
//                                        "nTrain_Signal=100000:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
   factory->PrepareTrainingAndTestTree( "", "",
                                        "nTrain_Signal=0:nTrain_Background=0:NormMode=None:!V" );

   // if( doMultipleOutputs ){
   //   multifactory->PrepareTrainingAndTestTree( mycuts, mycutb,
   //                                             "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
   // }

   //Use alternate splitting 
   //(this is preferable since its easier to track which events were used for training, but the job crashes! need to fix this...)
   //factory->PrepareTrainingAndTestTree( mycuts, mycutb,
   //                                     "nTrain_Signal=0:nTrain_Background=0:SplitMode=Alternate:NormMode=NumEvents:!V" );

   // ---- Book MVA methods
   //
   // Please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["CutsD"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsD",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

   if (Use["CutsPCA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

   if (Use["CutsGA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                           "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );

   if (Use["CutsSA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                           "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   // Likelihood ("naive Bayes estimator")
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                           "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

   // Decorrelated likelihood
   if (Use["LikelihoodD"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
                           "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

   // PCA-transformed likelihood
   if (Use["LikelihoodPCA"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 

   // Use a kernel density estimator to approximate the PDFs
   if (Use["LikelihoodKDE"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 

   // Use a variable-dependent mix of splines and kernel density estimator
   if (Use["LikelihoodMIX"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
                           "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

   // Test the multi-dimensional probability density estimator
   // here are the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSD"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

   if (Use["PDERSPCA"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

   // Multi-dimensional likelihood estimator using self-adapting phase-space binning
   if (Use["PDEFoam"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                           "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );

   if (Use["PDEFoamBoost"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost",
                           "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN",
                           "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // H-Matrix (chi2-squared) method
   if (Use["HMatrix"])
      factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" );

   // Linear discriminant (same as Fisher discriminant)
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher discriminant (same as LD)
   if (Use["Fisher"])
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher with Gauss-transformed input variables
   if (Use["FisherG"])
      factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

   // Composite classifier: ensemble (tree) of boosted Fisher classifiers
   if (Use["BoostedFisher"])
      factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", 
                           "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" );

   // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

   if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   if (Use["FDA_MT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   if (Use["FDA_MCMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

   // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );

//      factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:VarTransform=N:NCycles=1000:HiddenLayers=N+N:TestRate=5:!UseRegulator:LearningRate=0.2:DecayRate=0.001:BPMode=batch:BatchSize=500"); 

   if (Use["MLPBFGS"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

   if (Use["MLPBNN"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators

   // CF(Clermont-Ferrand)ANN
   if (Use["CFMlpANN"])
      factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  

   // Tmlp(Root)ANN
   if (Use["TMlpANN"])
      factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDTG"]) // Gradient Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                           "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5" );

   if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   if (Use["BDT1"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT1",
                             "!H:!V:NTrees=200:nEventsMin=300:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=4:PruneMethod=NoPruning" );

   if (Use["BDTB"]) // Bagging
      factory->BookMethod( TMVA::Types::kBDT, "BDTB",
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   if (Use["BDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTD",
                           "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );

   // RuleFit -- TMVA implementation of Friedman's method
   if (Use["RuleFit"])
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                           "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

   // if( doMultipleOutputs ){
   //   if (Use["multi_BDTG"]) // gradient boosted decision trees
   //     multifactory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.50:nCuts=20:NNodesMax=8");
   //   if (Use["multi_MLP"]) // neural network
   //     multifactory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:NCycles=1000:HiddenLayers=N+5,5:TestRate=5:EstimatorType=MSE");
   //   if (Use["multi_FDA_GA"]) // functional discriminant with GA minimizer
   //     multifactory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );
   // }
   
   // For an example of the category classifier usage, see: TMVAClassificationCategory

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

   // factory->OptimizeAllMethods("SigEffAt001","Scan");
   // factory->OptimizeAllMethods("ROCIntegral","GA");

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs
  
   // Train MVAs using the set of training events
   factory->TrainAllMethods();
  
   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();
  
   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();
  
   // if( doMultipleOutputs ){
   //   // Train nulti-MVAs using the set of training events
   //   multifactory->TrainAllMethods();
     
   //   // ---- Evaluate all multi-MVAs using the set of test events
   //   multifactory->TestAllMethods();
     
   //   // ----- Evaluate and compare performance of all configured multi-MVAs
   //   multifactory->EvaluateAllMethods();
   // }
   
   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();
   //if( doMultipleOutputs )  multioutputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;
  
   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
示例#5
0
void TMVAMulticlass(){
   TString outfileName = "TMVAMulticlass.root";
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=multiclass" );
   factory->AddVariable( "var0", 'F' );
   factory->AddVariable( "var1", 'F' );
   TFile *input(0);
   TString fname = "./data.root";
   if (!gSystem->AccessPathName( fname )) {
      // first we try to find data.root in the local directory
      std::cout << "--- TMVAMulticlass   : Accessing " << fname << std::endl;
      input = TFile::Open( fname );
   }
   else {
      gROOT->LoadMacro( "./createData.C");
      create_multiclassdata(20000);
      cout << " created data.root for tests of the multiclass features"<<endl;
      input = TFile::Open( fname );
   }
   if (!input) {
      std::cout << "ERROR: could not open data file" << std::endl;
      exit(1);
   }
   TTree *tree     = (TTree*)input->Get("TreeR");
   
   gROOT->cd( outfileName+TString(":/") );
   factory->AddTree    ( tree, "Signal1",    1. , "cls==0"   );
   factory->AddTree    ( tree, "Signal2",    1. , "cls==1"   );
   factory->AddTree    ( tree, "Background",    1., "cls==2" );
   factory->PrepareTrainingAndTestTree( "", "SplitMode=Random:NormMode=NumEvents:!V" );

   factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5");
   factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:VarTransform=N:NCycles=100:HiddenLayers=N+5,3:TestRate=5"); // testing vartransforms
   factory->BookMethod( TMVA::Types::kMLP, "MLP2", "!H:!V:NeuronType=tanh:NCycles=100:HiddenLayers=N+5,3:TestRate=5");
   factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                        "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x0*x1:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );
   
  // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------
   
   // Save the output
   outputFile->Close();
   
   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;
   
   delete factory;
   
   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );
   
   
}
示例#6
0
int TMVAKaggleHiggs ( TString myMethodList = "" )
{
   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //
   // if you like to use a method via the plugin mechanism, we recommend using
   //
   // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)
   // (an example is given for using the BDT as plugin (see below),
   // but of course the real application is when you write your own
   // method based)

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   // --- Cut optimisation
   Use["Cuts"]            = 0;
   Use["CutsD"]           = 0;
   Use["CutsPCA"]         = 0;
   Use["CutsGA"]          = 0;
   Use["CutsSA"]          = 0;
   // 
   // --- 1-dimensional likelihood ("naive Bayes estimator")
   Use["Likelihood"]      = 0;
   Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
   Use["LikelihoodPCA"]   = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
   Use["LikelihoodKDE"]   = 0;
   Use["LikelihoodMIX"]   = 0;
   //
   // --- Mutidimensional likelihood and Nearest-Neighbour methods
   Use["PDERS"]           = 0;
   Use["PDERSD"]          = 0;
   Use["PDERSPCA"]        = 0;
   Use["PDEFoam"]         = 0;
   Use["PDEFoamBoost"]    = 0; // uses generalised MVA method boosting
   Use["KNN"]             = 0; // k-nearest neighbour method
   //
   // --- Linear Discriminant Analysis
   Use["LD"]              = 0; // Linear Discriminant identical to Fisher
   Use["Fisher"]          = 0;
   Use["FisherG"]         = 0;
   Use["BoostedFisher"]   = 0; // uses generalised MVA method boosting
   Use["HMatrix"]         = 0;
   //
   // --- Function Discriminant analysis
   Use["FDA_GA"]          = 0; // minimisation of user-defined function using Genetics Algorithm
   Use["FDA_SA"]          = 0;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   Use["FDA_MCMT"]        = 0;
   //
   // --- Neural Networks (all are feed-forward Multilayer Perceptrons)
   Use["MLP"]             = 0; // Recommended ANN
   Use["MLPBFGS"]         = 0; // Recommended ANN with optional training method
   Use["MLPBNN"]          = 0; // Recommended ANN with BFGS training method and bayesian regulator
   Use["CFMlpANN"]        = 0; // Depreciated ANN from ALEPH
   Use["TMlpANN"]         = 0; // ROOT's own ANN
   Use["NN"]              = 1; // improved implementation of a NN
   //
   // --- Support Vector Machine 
   Use["SVM"]             = 0;
   // 
   // --- Boosted Decision Trees
   Use["BDT"]             = 1; // uses Adaptive Boost
   Use["BDTG"]            = 0; // uses Gradient Boost
   Use["BDTB"]            = 0; // uses Bagging
   Use["BDTD"]            = 0; // decorrelation + Adaptive Boost
   Use["BDTF"]            = 0; // allow usage of fisher discriminant for node splitting 
   // 
   // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
   Use["RuleFit"]         = 0;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAKaggleHiggs" << std::endl;

   // Select methods (don't look at this code - not of interest)
   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return 1;
         }
         Use[regMethod] = 1;
      }
   }

   // --------------------------------------------------------------------------------------------------

   // --- Here the preparation phase begins

   // Read training and test data
   // (it is also possible to use ASCII format as input -> see TMVA Users Guide)
   TString fname = "./training.root";
   
   if (gSystem->AccessPathName( fname ))  // file does not exist in local directory
      gSystem->Exec("curl -O http://root.cern.ch/files/tmva_class_example.root");
   
   TFile *input = TFile::Open( fname );
   
   std::cout << "--- TMVAClassification       : Using input file: " << input->GetName() << std::endl;
   
   // --- Register the training and test trees

   TTree *tree     = (TTree*)input->Get("data");
   
   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   TString outfileName( "TMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory is 
   // the only TMVA object you have to interact with
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification" );

   // If you wish to modify default settings
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
   // factory->AddVariable( "myvar1 := var1+var2", 'F' );
   // factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' );
   // factory->AddVariable( "var3",                "Variable 3", "units", 'F' );
   // factory->AddVariable( "var4",                "Variable 4", "units", 'F' );

   TString limit ("-900.0");
   TString replacementValue ("0.0");
   std::vector<std::string> vars = {"DER_mass_MMC","DER_mass_transverse_met_lep","DER_mass_vis","DER_pt_h","DER_deltaeta_jet_jet","DER_mass_jet_jet","DER_prodeta_jet_jet","DER_deltar_tau_lep","DER_pt_tot","DER_sum_pt","DER_pt_ratio_lep_tau","DER_met_phi_centrality","DER_lep_eta_centrality","PRI_tau_pt","PRI_tau_eta","PRI_tau_phi","PRI_lep_pt","PRI_lep_eta","PRI_lep_phi","PRI_met","PRI_met_phi","PRI_met_sumet","PRI_jet_num","PRI_jet_leading_pt","PRI_jet_leading_eta","PRI_jet_leading_phi","PRI_jet_subleading_pt","PRI_jet_subleading_eta","PRI_jet_subleading_phi","PRI_jet_all_pt"};
   
   for (std::vector<std::string>::iterator it = vars.begin (), itEnd = vars.end (); it != itEnd; ++it)
   {
       std::string s = *it;
       TString current;
       current.Form ("%s:=(%s<%s?%s:%s)",s.c_str (), s.c_str (), limit.Data (), replacementValue.Data (), s.c_str ());
       factory->AddVariable (current, 'F');
   }

   // You can add so-called "Spectator variables", which are not used in the MVA training,
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
   // input variables, the response values of all trained MVAs, and the spectator variables

//   factory->AddSpectator( "spec1 := var1*2",  "Spectator 1", "units", 'F' );
//   factory->AddSpectator( "spec2 := var1*3",  "Spectator 2", "units", 'F' );

   
   // global event weights per tree (see below for setting event-wise weights)
   Double_t weight     = 1.0;
//   Double_t backgroundWeight = 1.0;
   
   // You can add an arbitrary number of signal or background trees
//   factory->AddBackgroundTree( background, backgroundWeight );

   factory->AddTree(tree, "Signal", 1., "Label == 1");
   factory->AddTree(tree, "Background", 1., "Label == 0");
   
   // To give different trees for training and testing, do as follows:
   //    factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" );
   //    factory->AddSignalTree( signalTestTree,     signalTestWeight,  "Test" );
   
   // Use the following code instead of the above two or four lines to add signal and background
   // training and test events "by hand"
   // NOTE that in this case one should not give expressions (such as "var1+var2") in the input
   //      variable definition, but simply compute the expression before adding the event
   //
   //     // --- begin ----------------------------------------------------------
   //     std::vector<Double_t> vars( 4 ); // vector has size of number of input variables
   //     Float_t  treevars[4], weight;
   //     
   //     // Signal
   //     for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
   //     for (UInt_t i=0; i<signal->GetEntries(); i++) {
   //        signal->GetEntry(i);
   //        for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
   //        // add training and test events; here: first half is training, second is testing
   //        // note that the weight can also be event-wise
   //        if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight );
   //        else                              factory->AddSignalTestEvent    ( vars, signalWeight );
   //     }
   //   
   //     // Background (has event weights)
   //     background->SetBranchAddress( "weight", &weight );
   //     for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
   //     for (UInt_t i=0; i<background->GetEntries(); i++) {
   //        background->GetEntry(i);
   //        for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
   //        // add training and test events; here: first half is training, second is testing
   //        // note that the weight can also be event-wise
   //        if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight );
   //        else                                factory->AddBackgroundTestEvent    ( vars, backgroundWeight*weight );
   //     }
         // --- end ------------------------------------------------------------
   //
   // --- end of tree registration 

   // Set individual event weights (the variables must exist in the original TTree)
   //    for signal    : factory->SetSignalWeightExpression    ("weight1*weight2");
   //    for background: factory->SetBackgroundWeightExpression("weight1*weight2");
   factory->SetSignalWeightExpression( "Weight" );
   factory->SetBackgroundWeightExpression( "Weight" );

   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = "Label==1"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   TCut mycutb = "Label==0"; // for example: TCut mycutb = "abs(var1)<0.5";

   // Tell the factory how to use the training and testing events
   //
   // If no numbers of events are given, half of the events in the tree are used 
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut,
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );
   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=0:nTest_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
//   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
//                                        "nTrain_Signal=5000:nTrain_Background=5000:nTest_Signal=5000:nTest_Background=5000:SplitMode=Random:NormMode=NumEvents:!V" );

   // ---- Book MVA methods
   //
   // Please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["CutsD"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsD",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

   if (Use["CutsPCA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

   if (Use["CutsGA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                           "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );

   if (Use["CutsSA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                           "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   // Likelihood ("naive Bayes estimator")
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                           "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

   // Decorrelated likelihood
   if (Use["LikelihoodD"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
                           "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

   // PCA-transformed likelihood
   if (Use["LikelihoodPCA"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 

   // Use a kernel density estimator to approximate the PDFs
   if (Use["LikelihoodKDE"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 

   // Use a variable-dependent mix of splines and kernel density estimator
   if (Use["LikelihoodMIX"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
                           "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

   // Test the multi-dimensional probability density estimator
   // here are the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSD"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

   if (Use["PDERSPCA"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

   // Multi-dimensional likelihood estimator using self-adapting phase-space binning
   if (Use["PDEFoam"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                           "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );

   if (Use["PDEFoamBoost"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost",
                           "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN",
                           "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // H-Matrix (chi2-squared) method
   if (Use["HMatrix"])
      factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" );

   // Linear discriminant (same as Fisher discriminant)
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher discriminant (same as LD)
   if (Use["Fisher"])
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher with Gauss-transformed input variables
   if (Use["FisherG"])
      factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

   // Composite classifier: ensemble (tree) of boosted Fisher classifiers
   if (Use["BoostedFisher"])
      factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", 
                           "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" );

   // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

   if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   if (Use["FDA_MT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   if (Use["FDA_MCMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

   // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );

   if (Use["MLPBFGS"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

   if (Use["MLPBNN"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators


   // improved neural network implementation 
   if (Use["NN"])
   {
//       TString layoutString ("Layout=TANH|(N+100)*2,LINEAR");
//       TString layoutString ("Layout=SOFTSIGN|100,SOFTSIGN|50,SOFTSIGN|20,LINEAR");
//       TString layoutString ("Layout=RELU|300,RELU|100,RELU|30,RELU|10,LINEAR");
//       TString layoutString ("Layout=SOFTSIGN|50,SOFTSIGN|30,SOFTSIGN|20,SOFTSIGN|10,LINEAR");
//       TString layoutString ("Layout=TANH|50,TANH|30,TANH|20,TANH|10,LINEAR");
//       TString layoutString ("Layout=SOFTSIGN|50,SOFTSIGN|20,LINEAR");
       TString layoutString ("Layout=SOFTSIGN|70,SOFTSIGN|30,LINEAR");

       std::vector<TString> strategy;
       strategy.push_back (TString ("LearningRate=1e-2,Momentum=0.9,Repetitions=1,ConvergenceSteps=70,BatchSize=120,TestRepetitions=7,WeightDecay=0.001,Regularization=NONE,DropConfig=0.5+0.5+0.5+0.5,DropRepetitions=1,Multithreading=True"));
       strategy.push_back (TString ("LearningRate=1e-4,Momentum=0.5,Repetitions=1,ConvergenceSteps=70,BatchSize=80,TestRepetitions=7,WeightDecay=0.001,Regularization=L2,Multithreading=True,DropConfig=0.1+0.1+0.1+0.1,DropRepetitions=1"));
       strategy.push_back (TString ("LearningRate=1e-5,Momentum=0.3,Repetitions=1,ConvergenceSteps=70,BatchSize=60,TestRepetitions=7,WeightDecay=0.0001,Regularization=L2,Multithreading=True"));
       strategy.push_back (TString  ("LearningRate=1e-6,Momentum=0.0,Repetitions=1,ConvergenceSteps=70,BatchSize=40,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True"));
//       strategy.push_back (TString ("LearningRate=1e-6,Momentum=0.0,Repetitions=1,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True"));

       TString trainingStrategyString ("TrainingStrategy=");
       for (std::vector<TString>::const_iterator it = strategy.begin (), itEnd = strategy.end (); it != itEnd; ++it)
       {
           if (it != strategy.begin ())
               trainingStrategyString += "|";
           trainingStrategyString += *it;
       }
      
//       TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CROSSENTROPY");
       TString nnOptions ("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=G:WeightInitialization=XAVIERUNIFORM");
//       TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CHECKGRADIENTS");
       nnOptions.Append (":"); nnOptions.Append (layoutString);
       nnOptions.Append (":"); nnOptions.Append (trainingStrategyString);

       factory->BookMethod( TMVA::Types::kNN, "NN", nnOptions ); // NN
   }


   
   // CF(Clermont-Ferrand)ANN
   if (Use["CFMlpANN"])
      factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  

   // Tmlp(Root)ANN
   if (Use["TMlpANN"])
      factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDTG"]) // Gradient Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                           "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" );

   if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );

   if (Use["BDTB"]) // Bagging
      factory->BookMethod( TMVA::Types::kBDT, "BDTB",
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" );

   if (Use["BDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTD",
                           "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" );

   if (Use["BDTF"])  // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables
      factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher",
                           "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" );

   // RuleFit -- TMVA implementation of Friedman's method
   if (Use["RuleFit"])
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                           "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

   // For an example of the category classifier usage, see: TMVAClassificationCategory

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

   // ---- STILL EXPERIMENTAL and only implemented for BDT's ! 
   // factory->OptimizeAllMethods("SigEffAt001","Scan");
   // factory->OptimizeAllMethods("ROCIntegral","FitGA");

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVA::TMVAGui( outfileName );

   return 0;
}
示例#7
0
void TMVARegression( int optimIndex, int Cat=0, TString myMethodList = "" ) 
{

   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the 
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVARegression.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   // --- Mutidimensional likelihood and Nearest-Neighbour methods
   Use["PDERS"]           = 0;
   Use["PDEFoam"]         = 0; 
   Use["KNN"]             = 0;
   // 
   // --- Linear Discriminant Analysis
   Use["LD"]		        = 0;
   // 
   // --- Function Discriminant analysis
   Use["FDA_GA"]          = 0;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   // 
   // --- Neural Network
   Use["MLP"]             = 0; 
   // 
   // --- Support Vector Machine 
   Use["SVM"]             = 0;
   // 
   // --- Boosted Decision Trees
   Use["BDT"]             = 0;
   Use["BDTG"]            = 1;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVARegression" << std::endl;

   // Select methods (don't look at this code - not of interest)
   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 1;
      }
   }

   // --------------------------------------------------------------------------------------------------

   // --- Here the preparation phase begins

   // Create a new root output file
   TString outfileName( Form("TMVAoutput/TMVAReg_%i.root",optimIndex) );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory will
   // then run the performance analysis for you.
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/ 
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in 
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( Form("TMVARegression_%i_Cat%i",optimIndex,Cat), outputFile, 
                                               "!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=Regression" );

   // If you wish to modify default settings 
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]

   factory->AddVariable( "jet_eta", "jet_eta", "units", 'F' );
   factory->AddVariable( "jet_emfrac", "jet_emfrac", "units", 'F' );
   factory->AddVariable( "jet_hadfrac", "jet_hadfrac", "units", 'F' );
   factory->AddVariable( "jet_nconstituents", "jet_nconst", "units", 'F' );
   factory->AddVariable( "jet_vtx3dL", "jet_vtx3dL", "units", 'F' );
   factory->AddVariable( "MET", "MET", "units", 'F' );
   factory->AddVariable( "jet_dPhiMETJet", "jet_dPhiMETJet", "units", 'F' );
   
   //factory->AddVariable( "hJet_vtxPt", "hJet_vtxPt", "units", 'F' );
   //factory->AddVariable( "hJet_JECUnc", "hJet_JECUnc", "units", 'F' );
   //factory->AddVariable( "hJet_ptLeadTrack", "hJet_ptLeadTrack", "units", 'F' );
   //factory->AddVariable( "hJet_SoftLeptPtCut:=(hJet_SoftLeptIdlooseMu>0. || hJet_SoftLeptId95>0.) ? (hJet_SoftLeptPt) : (-99)", "hJet_SoftLeptPt", "units", 'F' );
   //factory->AddVariable( "hJet_En", "hJet_En", "units", 'F' );
   //factory->AddVariable( "hJet_Et", "hJet_Et", "units", 'F' );
   //factory->AddVariable( "hJet_Mt", "hJet_Mt", "units", 'F' );
   //factory->AddVariable( "hJet_nch", "hJet_nch", "units", 'F' );
   //factory->AddVariable( "hJet_vtx3deL", "hJet_vtx3deL", "units", 'F' );
   //factory->AddVariable( "hJet_vtxMass", "hJet_vtxMass", "units", 'F' );
   //factory->AddVariable( "hJet_ptRaw", "hJet_ptRaw", "units", 'F' );
   //factory->AddVariable( "hJet_EnRaw", "hJet_EnRaw", "units", 'F' );
   //factory->AddVariable( "hJet_SoftLeptptRelCut:=(hJet_SoftLeptIdlooseMu>0. || hJet_SoftLeptId95>0.) ? (hJet_SoftLeptptRel) : (-99)", "hJet_SoftLeptptRel", "units", 'F' );
   //factory->AddVariable( "hJet_SoftLeptdRCut:=(hJet_SoftLeptIdlooseMu>0. || hJet_SoftLeptId95>0.) ? (hJet_SoftLeptdR) : (-99)", "hJet_SoftLeptdR", "units", 'F' );
   //factory->AddVariable( "rho25", "rho25", "units", 'F' );
   //factory->AddVariable( "dPhiMETJet", "dPhiMETJet", "units", 'F' );


   // You can add so-called "Spectator variables", which are not used in the MVA training, 
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
   // input variables, the response values of all trained MVAs, and the spectator variables

   // Add the variable carrying the regression target
   //factory->AddTarget( "jet_genPt" ); 
   factory->AddTarget( "jet_genJetPt/jet_pt" ); 

   // It is also possible to declare additional targets for multi-dimensional regression, ie:
   // -- factory->AddTarget( "fvalue2" );
   // BUT: this is currently ONLY implemented for MLP

   // Read training and test data (see TMVAClassification for reading ASCII files)
   // load the signal and background event samples from ROOT trees
/*   TFile *input(0);
   TString fname = "./tmva_reg_example.root";
   if (!gSystem->AccessPathName( fname )) 
      input = TFile::Open( fname ); // check if file in local directory exists
   else 
      input = TFile::Open( "http://root.cern.ch/files/tmva_reg_example.root" ); // if not: download from ROOT server
   
   if (!input) {
      std::cout << "ERROR: could not open data file" << std::endl;
      exit(1);
   }
   std::cout << "--- TMVARegression           : Using input file: " << input->GetName() << std::endl;

   // --- Register the regression tree

   TTree *regTree = (TTree*)input->Get("TreeR");
*/

   TChain chainTraining("Events");
   chainTraining.Add("TrainingFiles/training.root");
   TTree *regTreeTraining = (TTree*) chainTraining;

   TChain chainTesting("Events");
   chainTesting.Add("TrainingFiles/testing.root");
   TTree *regTreeTesting = (TTree*) chainTesting;

   // global event weights per tree (see below for setting event-wise weights)
   Double_t regWeight  = 1.0;   

   // You can add an arbitrary number of regression trees
   factory->AddTree( regTreeTraining, "Regression", regWeight, "", "training" );
   factory->AddTree( regTreeTesting, "Regression", regWeight, "", "test" );

   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycut = "hJet_pt[0]>20. && hJet_pt[1]>20. && fabs(hJet_eta[0])<2.5 && fabs(hJet_eta[1])<2.5 && hJet_csv[0]>0. && hJet_csv[1]>0. && hJet_ptLeadTrack[0]<1500. && hJet_ptLeadTrack[1]<1500. && hJet_genJetPt[0]>0. && hJet_genJetPt[1]>0. && hJet_puJetIdL[0]>0.0 && hJet_puJetIdL[1]>0.0";
  TCut testingCut = "hJet_pt[0]>20. && hJet_pt[1]>20. && abs(hJet_eta[0])<2.5 && abs(hJet_eta[1])<2.5";
  TCut mjjCut = "sqrt(pow(hJet_pt[0]*cos(hJet_phi[0])+hJet_pt[1]*cos(hJet_phi[1]),2)+pow(hJet_pt[0]*sin(hJet_phi[0])+hJet_pt[1]*sin(hJet_phi[1]),2)) < 110";

  if(Cat==1) mjjCut = "sqrt(pow(hJet_pt[0]*cos(hJet_phi[0])+hJet_pt[1]*cos(hJet_phi[1]),2)+pow(hJet_pt[0]*sin(hJet_phi[0])+hJet_pt[1]*sin(hJet_phi[1]),2)) > 110";
  TCut jetPtCut="jet_pt>90";
  if(Cat==1) jetPtCut="jet_pt>90";
  //TCut trainingCut = "hJet_pt[0]>20. && hJet_pt[1]>20. && abs(hJet_eta[0])<2.5 && abs(hJet_eta[1])<2.5 && hJet_genJetPt[0]>0. && hJet_genJetPt[1]>0. && hJet_csv[0]>0.0 && hJet_csv[1]>0.0 && hJet_ptLeadTrack[0]<1500. && hJet_ptLeadTrack[1]<1500.";
  TCut trainingCut = "jet_pt>20. && abs(jet_eta)<2.5 && jet_genJetPt>0. && jet_dRJetGenJet < 0.4 && (jet_partonID)==5";

// for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1";
// for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1";

   // tell the factory to use all remaining events in the trees after training for testing:
   // factory->PrepareTrainingAndTestTree(mycut, "nTrain_Regression=600000:nTest_Regression=600000:SplitMode=Random:NormMode=NumEvents:!V");
   //factory->PrepareTrainingAndTestTree(mycut, "nTrain_Regression=337500:nTest_Regression=337500:SplitMode=Random:NormMode=NumEvents:!V");
   //   factory->PrepareTrainingAndTestTree(mycut, "nTrain_Regression=158393:nTest_Regression=158393:SplitMode=Random:NormMode=NumEvents:!V");
   //factory->PrepareTrainingAndTestTree(mycut, "nTrain_Regression=14197:nTest_Regression=14197:SplitMode=Random:NormMode=NumEvents:!V");
   factory->PrepareTrainingAndTestTree(trainingCut+jetPtCut, "!V");

   // If no numbers of events are given, half of the events in the tree are used 
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );  

   // ---- Book MVA methods
   //
   // please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // PDE - RS method
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS", 
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" );
   // And the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );   
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );   

   if (Use["PDEFoam"])
       factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", 
			    "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN", 
                           "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // Linear discriminant
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", 
                           "!H:!V:VarTransform=None" );

	// Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"]) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                          "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" );
   
   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" );

   if (Use["FDA_MT"]) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"]) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   // Neural network (MLP)
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" );

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );
//100, 5
   // Boosted Decision Tree   //100,5 nCuts=-1
   if (Use["BDT"])
     factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=100:nEventsMin=4:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=-1:PruneMethod=CostComplexity:PruneStrength=30" );

   const bool doScan1=0;

   int ntreesArray[8] = {100,200,300,400,500,600,700,800};
   float shrinkageArray[3] = {0.1,0.2,0.3};
   float gradbaggingfracArray[3] = {0.7,0.8,0.9};
   int maxdepthArray[3] = {2,3,4};
   int nnodesmaxArray[3] = {5,10,15};
   if(!doScan1) shrinkageArray[2]=1.0;
   int nnodesmaxIndex=-1,maxdepthIndex=-1,gradbaggingfracIndex=-1,shrinkageIndex=-1,ntreesIndex=-1;

   if(doScan1){
     nnodesmaxIndex = optimIndex/216;
     maxdepthIndex = (optimIndex-nnodesmaxIndex*216)/72;
     gradbaggingfracIndex = (optimIndex-nnodesmaxIndex*216-maxdepthIndex*72)/24;
     shrinkageIndex = (optimIndex-nnodesmaxIndex*216-maxdepthIndex*72-gradbaggingfracIndex*24)/8;
     ntreesIndex = (optimIndex-nnodesmaxIndex*216-maxdepthIndex*72-gradbaggingfracIndex*24-shrinkageIndex*8);
   }
   else{
     nnodesmaxIndex = optimIndex/72;
     maxdepthIndex = (optimIndex-nnodesmaxIndex*72)/24;
     shrinkageIndex = (optimIndex-nnodesmaxIndex*72-maxdepthIndex*24)/8;
     ntreesIndex = (optimIndex-nnodesmaxIndex*72-maxdepthIndex*24-shrinkageIndex*8);
   }

   if (Use["BDTG"]){
     if(doScan1)
       factory->BookMethod( TMVA::Types::kBDT, "BDTG", 
			    Form("!H:!V:NTrees=%i::BoostType=Grad:Shrinkage=%.1f:UseBaggedGrad:GradBaggingFraction=%.1f:nCuts=200:MaxDepth=%i:NNodesMax=%i",ntreesArray[ntreesIndex],shrinkageArray[shrinkageIndex],gradbaggingfracArray[gradbaggingfracIndex],maxdepthArray[maxdepthIndex],nnodesmaxArray[nnodesmaxIndex]) );
     else
       factory->BookMethod( TMVA::Types::kBDT, "BDTG", 
			    Form("!H:!V:IgnoreNegWeights:NTrees=%i::BoostType=Grad:Shrinkage=%.1f:nCuts=200:MaxDepth=%i:NNodesMax=%i",ntreesArray[ntreesIndex],shrinkageArray[shrinkageIndex],maxdepthArray[maxdepthIndex],nnodesmaxArray[nnodesmaxIndex]) );
   }

   if(doScan1)
     cout << Form("!H:!V:NTrees=%i::BoostType=Grad:Shrinkage=%.2f:UseBaggedGrad:GradBaggingFraction=%.2f:nCuts=200:MaxDepth=%i:NNodesMax=%i",ntreesArray[ntreesIndex],shrinkageArray[shrinkageIndex],gradbaggingfracArray[gradbaggingfracIndex],maxdepthArray[maxdepthIndex],nnodesmaxArray[nnodesmaxIndex])<<endl;
   else
     cout << Form("!H:!V:IgnoreNegWeights:NTrees=%i::BoostType=Grad:Shrinkage=%.1f:nCuts=200:MaxDepth=%i:NNodesMax=%i",ntreesArray[ntreesIndex],shrinkageArray[shrinkageIndex],maxdepthArray[maxdepthIndex],nnodesmaxArray[nnodesmaxIndex]) << endl;

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();    

   // --------------------------------------------------------------
   
   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVARegression is done!" << std::endl;      

   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVARegGui( outfileName );
}
示例#8
0
int main(int argc, char * argv[])
{
    //Processing input options
    int c;
    std::string outFname;
    outFname = std::string("QualityNaF.root");

    // Open  input files, get the trees
    TChain *mc = InputFileReader("FileListNtuples_ext.txt","parametri_geo");
    // Preparing options for the TMVA::Factory
    std::string options( 
        "!V:" 
        "!Silent:"
        "Color:"
        "DrawProgressBar:"
        "Transformations=I;D;P;G,D:"
        "AnalysisType=Classification"
    );

    //Creating the factory
    TFile *   ldFile = new TFile(outFname.c_str(),"RECREATE");
    TMVA::Factory * factory = new TMVA::Factory("QualityNaF", ldFile, options.c_str());

    //Preparing variables 
    //general
    /*factory->AddVariable("Chisquare", 'F');
    factory->AddVariable("Layernonusati", 'I');
    factory->AddVariable("NTofUsed", 'I');
    factory->AddVariable("diffR", 'F');
    factory->AddVariable("TOF_Up_Down", 'F');*/
    //Tof	
    //factory->AddVariable("TOFchisq_s", 'F');
    //factory->AddVariable("TOFchisq_t", 'F');

    //RICH	
    factory->AddVariable("Richtotused", 'F');	
    factory->AddVariable("RichPhEl", 'F');
    factory->AddVariable("RICHprob", 'F');
    factory->AddVariable("RICHcollovertotal");
    factory->AddVariable("RICHLipBetaConsistency");  
    factory->AddVariable("RICHTOFBetaConsistency");  
    factory->AddVariable("RICHChargeConsistency");
    
    factory->AddVariable("RICHPmts");
    factory->AddVariable("RICHgetExpected");		
    factory->AddVariable("tot_hyp_p_uncorr");
    factory->AddVariable("Bad_ClusteringRICH");
    factory->AddVariable("NSecondariesRICHrich");

    //factory->AddVariable("HitHValldir"); 
    //factory->AddVariable("HitHVallrefl");  	
    
    //factory->AddVariable("HVBranchCheck:= (HitHValldir - HitHVoutdir) - (HitHVallrefl - HitHVoutrefl)");    

    factory->AddVariable("HitHVoutdir"); 
    factory->AddVariable("HitHVoutrefl");

    //Spectator Variables
    factory->AddSpectator("R", 'F');
    factory->AddSpectator("BetaRICH_new", 'F');	

    //Preselection cuts
    std::string PreSelection    = "qL1>0&&(joinCutmask&187)==187&&qL1<1.75&&R>0";
    std::string ChargeCut 	= "qUtof>0.8&&qUtof<1.3&&qLtof>0.8&&qLtof<1.3";
    std::string VelocityCut 	= /*"Beta<0.8";*/"((joinCutmask>>11))==1024&&BetaRICH_new>0&&BetaRICH_new<0.975";
    std::string signalCut 	= /*"(R/Beta)*(1-Beta^2)^0.5>1.65&&GenMass>1&&GenMass<2";*/"(R/BetaRICH_new)*(1-BetaRICH_new^2)^0.5>0.5&&(R/BetaRICH_new)*(1-BetaRICH_new^2)^0.5<1.5";	
    std::string bkgndCut 	= /*"(R/Beta)*(1-Beta^2)^0.5>1.65&&GenMass>0&&GenMass<1";*/"(R/BetaRICH_new)*(1-BetaRICH_new^2)^0.5>3";		 

    factory->AddTree(mc,"Signal"    ,1,(PreSelection +"&&"+ ChargeCut + "&&" + VelocityCut + "&&"+ signalCut).c_str());
    factory->AddTree(mc,"Background",1,(PreSelection +"&&"+ ChargeCut + "&&" + VelocityCut + "&&"+ bkgndCut).c_str());

    // Preparing
    std::string preselection = "";
    std::string inputparams(
        "SplitMode=Random:"
        "NormMode=NumEvents:"
        "!V"
    );
    factory->PrepareTrainingAndTestTree(preselection.c_str(),inputparams.c_str());

    // Training
    std::string trainparams ="!H:!V:MaxDepth=3";
    factory->BookMethod(TMVA::Types::kBDT, "BDT", trainparams.c_str());

    trainparams ="!H:!V";
    factory->BookMethod(TMVA::Types::kLikelihood, "Likelihood", trainparams.c_str());

    trainparams ="!H:!V:VarTransform=Decorrelate";
    //factory->BookMethod(TMVA::Types::kLikelihood, "LikelihoodD", trainparams.c_str());

    trainparams ="!H:!V";
    //factory->BookMethod(TMVA::Types::kCuts, "Cuts", trainparams.c_str());



    factory->TrainAllMethods();
    factory->TestAllMethods();
    factory->EvaluateAllMethods();
}
示例#9
0
void TMVAMulticlass( TString myMethodList = "" )
{
   
   // This loads the library
   TMVA::Tools::Instance();

   // to get access to the GUI and all tmva macros
   TString tmva_dir(TString(gRootDir) + "/tmva");
   if(gSystem->Getenv("TMVASYS"))
      tmva_dir = TString(gSystem->Getenv("TMVASYS"));
   gROOT->SetMacroPath(tmva_dir + "/test/:" + gROOT->GetMacroPath() );
   gROOT->ProcessLine(".L TMVAMultiClassGui.C");

   
   //---------------------------------------------------------------
   // default MVA methods to be trained + tested
   std::map<std::string,int> Use;
   Use["MLP"]             = 1;
   Use["BDTG"]            = 1;
   Use["FDA_GA"]          = 0;
   Use["PDEFoam"]         = 0;
   //---------------------------------------------------------------
   
   std::cout << std::endl;
   std::cout << "==> Start TMVAMulticlass" << std::endl;
   
   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;
      
      std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 1;
      }
   }

   // Create a new root output file.
   TString outfileName = "TMVAMulticlass.root";
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
   
   TMVA::Factory *factory = new TMVA::Factory( "TMVAMulticlass", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=multiclass" );
   factory->AddVariable( "var1", 'F' );
   factory->AddVariable( "var2", "Variable 2", "", 'F' );
   factory->AddVariable( "var3", "Variable 3", "units", 'F' );
   factory->AddVariable( "var4", "Variable 4", "units", 'F' );

   TFile *input(0);
   TString fname = "./tmva_example_multiple_background.root";
   if (!gSystem->AccessPathName( fname )) {
      // first we try to find the file in the local directory
      std::cout << "--- TMVAMulticlass   : Accessing " << fname << std::endl;
      input = TFile::Open( fname );
   }
   else {
      cout << "Creating testdata...." << std::endl;
      gROOT->ProcessLine(".L createData.C+");
      gROOT->ProcessLine("create_MultipleBackground(2000)");
      cout << " created tmva_example_multiple_background.root for tests of the multiclass features"<<endl;
      input = TFile::Open( fname );
   }
   if (!input) {
      std::cout << "ERROR: could not open data file" << std::endl;
      exit(1);
   }

   TTree *signal      = (TTree*)input->Get("TreeS");
   TTree *background0 = (TTree*)input->Get("TreeB0");
   TTree *background1 = (TTree*)input->Get("TreeB1");
   TTree *background2 = (TTree*)input->Get("TreeB2");
   
   gROOT->cd( outfileName+TString(":/") );
   factory->AddTree    (signal,"Signal");
   factory->AddTree    (background0,"bg0");
   factory->AddTree    (background1,"bg1");
   factory->AddTree    (background2,"bg2");
   
   factory->PrepareTrainingAndTestTree( "", "SplitMode=Random:NormMode=NumEvents:!V" );

   if (Use["BDTG"]) // gradient boosted decision trees
      factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.50:nCuts=20:MaxDepth=2");
   if (Use["MLP"]) // neural network
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:NCycles=1000:HiddenLayers=N+5,5:TestRate=5:EstimatorType=MSE");
   if (Use["FDA_GA"]) // functional discriminant with GA minimizer
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );
   if (Use["PDEFoam"]) // PDE-Foam approach
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );
   
  // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------
   
   // Save the output
   outputFile->Close();
   
   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;
   
   delete factory;
   
   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAMultiClassGui( outfileName );
   
   
}
示例#10
0
std::pair<TString,TString> TMVAClassification (
    TString infilename,
    AnalysisType analysisType = AnalysisType::DIRECT,
    TString additionalRootFileName = "")
{
    TMVA::Tools::Instance();

    std::string tmstr (now ());
    TString tmstmp (tmstr.c_str ());
   
  
    std::cout << "==> Start TMVAClassification" << std::endl;
    std::cout << "-------------------- open input file ---------------- " << std::endl;
    TString fname = infilename; //pathToData + infilename + TString (".root");
    if (analysisType != AnalysisType::TRANSFORMED)
        fname = pathToData + infilename + TString (".root");
    std::cout << "open file " << std::endl << fname.Data () << std::endl;


    std::cout << "-------------------- get tree ---------------- " << std::endl;
    TString treeName = "data";
    if (analysisType == AnalysisType::TRANSFORMED)
        treeName = "transformed";

    std::cout << "-------------------- create tchain with treeName ---------------- " << std::endl;
    std::cout << treeName << std::endl;
    TChain* tree = new TChain (treeName);
    std::cout << "add file" << std::endl;
    std::cout << fname << std::endl;
    tree->Add (fname);
    TChain* treeFriend (NULL);
    if (additionalRootFileName.Length () > 0)
    {
        std::cout << "-------------------- add additional input file ---------------- " << std::endl;
        std::cout << additionalRootFileName << std::endl;
        treeFriend = new TChain (treeName);
        treeFriend->Add (additionalRootFileName);
        tree->AddFriend (treeFriend,"p");
    }
//    tree->Draw ("mass:prediction");
//    return std::make_pair(TString("hallo"),TString ("nix"));
    TString outfileName;
    if (analysisType == AnalysisType::BACKGROUND)
    {
        outfileName = TString ("BACK_" + infilename) + tmstmp + TString (".root");
    }
    else
        outfileName += TString ( "TMVA__" ) + tmstmp + TString (".root");

    std::cout << "-------------------- open output file ---------------- " << std::endl;
    TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

    std::cout << "-------------------- prepare factory ---------------- " << std::endl;
    TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
						"AnalysisType=Classification:Transformations=I:!V" );
    std::cout << "-------------------- add variables ---------------- " << std::endl;


    for (auto varname : variableNames)
    {
	factory->AddVariable (varname.c_str (), 'F');
    }

    for (auto varname : spectatorNames)
    {
	factory->AddSpectator (varname.c_str (), 'F');
    }
    
   
    std::cout << "-------------------- add trees ---------------- " << std::endl;
    TCut signalCut ("signal==1");
    TCut backgroundCut ("signal==0");
    if (analysisType == AnalysisType::TRANSFORMED)
    {
        signalCut = "(signal_original==1 && signal_in==0)";
        backgroundCut = "(signal_original==0 && signal_in==0)";
    }
    if (analysisType == AnalysisType::BACKGROUND)
    {
        signalCut     = TString("(signal==0) * (prediction > 0.7)");
        backgroundCut = TString("(signal==0) * (prediction < 0.4)");
    }
    //tree->Draw ("prediction",signalCut);
    //return std::make_pair(TString("hallo"),TString ("nix"));
    factory->AddTree(tree, "Signal", 1.0, baseCut + signalCut, "TrainingTesting");
    factory->AddTree(tree, "Background", 1.0, baseCut + backgroundCut, "TrainingTesting");


    
    TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
    TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5";

    /* // Set individual event weights (the variables must exist in the original TTree) */
    if (analysisType == AnalysisType::BACKGROUND)
    {
        factory->SetSignalWeightExpression ("prediction");
        factory->SetBackgroundWeightExpression ("1");
    }

   
    std::cout << "-------------------- prepare ---------------- " << std::endl;
    factory->PrepareTrainingAndTestTree( mycuts, mycutb,
					 "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=0:nTest_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );


    TString methodName ("");
    if (analysisType == AnalysisType::BACKGROUND)
        methodName = TString ("TONBKG_") + tmstmp;

    if (false)
    {
	// gradient boosting training
        methodName += TString("GBDT");
	factory->BookMethod(TMVA::Types::kBDT, methodName,
			    "NTrees=40:BoostType=Grad:Shrinkage=0.01:MaxDepth=7:UseNvars=6:nCuts=20:MinNodeSize=10");
    }
    if (false)
    {
        methodName += TString("Likelihood");
	factory->BookMethod( TMVA::Types::kLikelihood, methodName,
			     "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );
    }
    

    
    if (false)
    {
	TString layoutString ("Layout=TANH|100,LINEAR");

	TString training0 ("LearningRate=1e-1,Momentum=0.0,Repetitions=1,ConvergenceSteps=300,BatchSize=20,TestRepetitions=15,WeightDecay=0.001,Regularization=NONE,DropConfig=0.0+0.5+0.5+0.5,DropRepetitions=1,Multithreading=True");
	TString training1 ("LearningRate=1e-2,Momentum=0.5,Repetitions=1,ConvergenceSteps=300,BatchSize=30,TestRepetitions=7,WeightDecay=0.001,Regularization=L2,Multithreading=True,DropConfig=0.0+0.1+0.1+0.1,DropRepetitions=1");
	TString training2 ("LearningRate=1e-2,Momentum=0.3,Repetitions=1,ConvergenceSteps=300,BatchSize=40,TestRepetitions=7,WeightDecay=0.0001,Regularization=L2,Multithreading=True");
	TString training3 ("LearningRate=1e-3,Momentum=0.1,Repetitions=1,ConvergenceSteps=200,BatchSize=70,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True");

	TString trainingStrategyString ("TrainingStrategy=");
	trainingStrategyString += training0 + "|" + training1 + "|" + training2 + "|" + training3;
      
	TString nnOptions ("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=G:WeightInitialization=XAVIERUNIFORM");
	nnOptions.Append (":"); nnOptions.Append (layoutString);
	nnOptions.Append (":"); nnOptions.Append (trainingStrategyString);

        methodName += TString("NNgauss");
	factory->BookMethod( TMVA::Types::kNN, methodName, nnOptions ); // NN
    }

    if (false)
    {
	TString layoutString ("Layout=TANH|200,TANH|70,LINEAR");

	TString training0 ("LearningRate=1e-2,Momentum=0.0,Repetitions=1,ConvergenceSteps=300,BatchSize=20,TestRepetitions=15,WeightDecay=0.001,Regularization=NONE,DropConfig=0.0+0.5+0.5+0.5,DropRepetitions=1,Multithreading=True");
	TString training1 ("LearningRate=1e-3,Momentum=0.5,Repetitions=1,ConvergenceSteps=300,BatchSize=30,TestRepetitions=7,WeightDecay=0.001,Regularization=L2,Multithreading=True,DropConfig=0.0+0.1+0.1+0.1,DropRepetitions=1");
	TString training2 ("LearningRate=1e-4,Momentum=0.3,Repetitions=1,ConvergenceSteps=300,BatchSize=40,TestRepetitions=7,WeightDecay=0.0001,Regularization=L2,Multithreading=True");
	TString training3 ("LearningRate=1e-5,Momentum=0.1,Repetitions=1,ConvergenceSteps=200,BatchSize=70,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True");

	TString trainingStrategyString ("TrainingStrategy=");
	trainingStrategyString += training0 + "|" + training1 + "|" + training2 + "|" + training3;
//	trainingStrategyString += training0 + "|" + training2 + "|" + training3;
//	trainingStrategyString += training0 + "|" + training2;

      
	//       TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CROSSENTROPY");
	TString nnOptions ("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=N:WeightInitialization=XAVIERUNIFORM");
	//       TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CHECKGRADIENTS");
	nnOptions.Append (":"); nnOptions.Append (layoutString);
	nnOptions.Append (":"); nnOptions.Append (trainingStrategyString);

        methodName = TString("NNnormalized");
        factory->BookMethod( TMVA::Types::kNN, methodName, nnOptions ); // NN
    }


    if (true)
    {
	TString layoutString ("Layout=TANH|100,TANH|50,LINEAR");

	TString training0 ("LearningRate=1e-2,Momentum=0.0,Repetitions=1,ConvergenceSteps=100,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Regularization=NONE,DropConfig=0.0+0.5+0.5+0.5,DropRepetitions=1,Multithreading=True");
	TString training1 ("LearningRate=1e-3,Momentum=0.0,Repetitions=1,ConvergenceSteps=20,BatchSize=30,TestRepetitions=7,WeightDecay=0.001,Regularization=L2,Multithreading=True,DropConfig=0.0+0.1+0.1+0.1,DropRepetitions=1");
	TString training2 ("LearningRate=1e-4,Momentum=0.0,Repetitions=1,ConvergenceSteps=20,BatchSize=40,TestRepetitions=7,WeightDecay=0.0001,Regularization=L2,Multithreading=True");
	TString training3 ("LearningRate=1e-5,Momentum=0.0,Repetitions=1,ConvergenceSteps=30,BatchSize=70,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True");

	TString trainingStrategyString ("TrainingStrategy=");
	trainingStrategyString += training0 + "|" + training1 + "|" + training2 + "|" + training3;

      
	TString nnOptions ("!H:!V:ErrorStrategy=CROSSENTROPY:VarTransform=P+G:WeightInitialization=XAVIERUNIFORM");
	nnOptions.Append (":"); nnOptions.Append (layoutString);
	nnOptions.Append (":"); nnOptions.Append (trainingStrategyString);

        methodName += TString("NNPG");
	factory->BookMethod( TMVA::Types::kNN, methodName, nnOptions ); // NN
    }
   
   
   
    factory->TrainAllMethods();
//    return std::make_pair(TString("hallo"),TString ("nix"));
    factory->TestAllMethods();
    factory->EvaluateAllMethods();

    //input->Close();
    outputFile->Close();

//    TMVA::TMVAGui (outfileName);
   
    delete factory;
    delete tree;
    switch (analysisType)
    {
    case AnalysisType::BACKGROUND:
        std::cout << "DONE BACKGROUND" << std::endl;
        break;
    case AnalysisType::DIRECT:
        std::cout << "DONE DIRECT" << std::endl;
        break;
    case AnalysisType::TRANSFORMED:
        std::cout << "DONE TRANSFORMED" << std::endl;
        break;
        
    };
    std::cout << "classification, return : " << outfileName << "  ,  " << methodName << std::endl;
    return std::make_pair (outfileName, methodName);
}