示例#1
0
文件: tmva.c 项目: dbarge/tauTo3mu
void TMVAClassification()
{
   TMVA::Tools::Instance();

   std::cout << "==> Start TMVAClassification" << std::endl;
   TString outfileName( "TMVA.root" );

   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
                                               "AnalysisType=Classification" );
   factory->AddVariable("LifeTime", 'F');
   factory->AddVariable("FlightDistance", 'F');
   factory->AddVariable("pt", 'F');
   
   TString fname = "../tau_data/training.root";
   TFile *input = TFile::Open( fname );
   
   TTree *tree     = (TTree*)input->Get("data");
   
   factory->AddTree(tree, "Signal", 1., "signal == 1", "Training");
   factory->AddTree(tree, "Signal", 1., "signal == 1", "Test");
   factory->AddTree(tree, "Background", 1., "signal == 0", "Training");
   factory->AddTree(tree, "Background", 1., "signal == 0", "Test");
   
   // gradient boosting training
   factory->BookMethod(TMVA::Types::kBDT, "GBDT",
                       "NTrees=40:BoostType=Grad:Shrinkage=0.01:MaxDepth=7:UseNvars=6:nCuts=20:MinNodeSize=10");
   factory->TrainAllMethods();
   input->Close();
   outputFile->Close();

   delete factory;
}
示例#2
0
文件: regressphi.C 项目: anantoni/CMG
void regressphi() {
   TMVA::Tools::Instance();
   std::cout << "==> Start TMVAClassification" << std::endl;
  
   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   TString outfileName( "TMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
   TMVA::Factory *factory = new TMVA::Factory( "mva", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar" );
   factory->AddVariable( "npv"                , 'F' ); 
   factory->AddVariable( "u"                  , 'F' ); 
   factory->AddVariable( "uphi"               , 'F' );
   factory->AddVariable( "chsumet/sumet"      , 'F' ); 
   factory->AddVariable( "tku"                , 'F' );
   factory->AddVariable( "tkuphi"             , 'F' );
   factory->AddVariable( "nopusumet/sumet"    , 'F' );
   factory->AddVariable( "nopuu"              , 'F' );
   factory->AddVariable( "nopuuphi"           , 'F' );
   factory->AddVariable( "pusumet/sumet"      , 'F' );
   factory->AddVariable( "pumet"              , 'F' );
   factory->AddVariable( "pumetphi"           , 'F' );
   factory->AddVariable( "pucsumet/sumet"     , 'F' );
   factory->AddVariable( "pucu"               , 'F' );
   factory->AddVariable( "pucuphi"            , 'F' );
   factory->AddVariable( "jspt_1"             , 'F' );
   factory->AddVariable( "jseta_1"            , 'F' );
   factory->AddVariable( "jsphi_1"            , 'F' );
   factory->AddVariable( "jspt_2"             , 'F' );
   factory->AddVariable( "jseta_2"            , 'F' );
   factory->AddVariable( "jsphi_2"            , 'F' );
   factory->AddVariable( "nalljet"            , 'I' );
   factory->AddVariable( "njet"               , 'I' );
    
   factory->AddTarget( "rphi_z-uphi+ 2.*TMath::Pi()*(rphi_z-uphi < -TMath::Pi()) - 2.*TMath::Pi()*(rphi_z-uphi > TMath::Pi())  " ); 
   TString  lName       = "../Jets/r11-dimu_nochs_v2.root";  TFile *lInput = TFile::Open(lName);
   TTree   *lRegress    = (TTree*)lInput    ->Get("Flat");

   Double_t lRWeight = 1.0;
   factory->AddRegressionTree( lRegress   , lRWeight );
   TCut lCut = "nbtag == 0"; //Cut to remove real MET
   //(rpt_z < 40 || (rpt_z > 40 && rpt_z+u1 < 40)) && nbtag == 0 "; ==> stronger cut to remove Real MET

   factory->PrepareTrainingAndTestTree( lCut,
					"nTrain_Regression=0:nTest_Regression=0:SplitMode=Block:NormMode=NumEvents:!V" );

   // Boosted Decision Trees
   factory->BookMethod( TMVA::Types::kBDT, "RecoilPhiRegress_data_clean2_njet",
			"!H:!V:VarTransform=None:nEventsMin=200:NTrees=100:BoostType=Grad:Shrinkage=0.1:MaxDepth=100:NNodesMax=100000:UseYesNoLeaf=F:nCuts=2000");//MaxDepth=100

   factory->TrainAllMethods();
   factory->TestAllMethods();
   factory->EvaluateAllMethods();
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;
   delete factory;
   //if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
示例#3
0
void REG::Loop() {

  if (fChain == 0) return;


  Long64_t nentries = fChain->GetEntriesFast();
  
  Long64_t nbytes = 0, nb = 0;
  TMVA::Tools::Instance();
  std::cout << std::endl;
  std::cout << "==> Start TMVARegression" << std::endl;
  TString outfileName( "TMVAReg.root" );
  TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

  TMVA::Factory *factory = new TMVA::Factory( "TMVARegression", outputFile, 
					      "!V:!Silent:Color:DrawProgressBar" );

  factory->AddVariable( "l1Pt", "Variable 1", "units", 'F' );
  factory->AddVariable( "l1Eta", "Variable 2", "units", 'F' );
  factory->AddVariable( "l1Phi", "Variable 1", "units", 'F' );
//  factory->AddVariable( "RhoL1", "Variable 2", "units", 'F' );

  
  factory->AddTarget( "ratio" ); 
  factory->AddRegressionTree(fChain, 1.0 );
  TCut mycut = "";
  factory->PrepareTrainingAndTestTree( mycut, 
				       "nTrain_Regression=10000:nTest_Regression=0:SplitMode=Block:NormMode=NumEvents:!V" );

  factory->BookMethod( TMVA::Types::kBDT, "BDT",
		       "!H:!V:NTrees=100:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" );
  factory->TrainAllMethods();
 // ---- Evaluate all MVAs using the set of test events
  //   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
  //  factory->EvaluateAllMethods();    

   // --------------------------------------------------------------
   
   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVARegression is done!" << std::endl;      

   delete factory;
}
void TMVAClassification( TString myMethodList = "" ) 
{

//    TString curDynamicPath( gSystem->GetDynamicPath() );
//    gSystem->SetDynamicPath( "/usr/local/bin/root/bin:" + curDynamicPath );

//    TString curIncludePath(gSystem->GetIncludePath());
//    gSystem->SetIncludePath( " -I /usr/local/bin/root/include " + curIncludePath );

//    // load TMVA shared library created in local release: for MAC OSX
//    if (TString(gSystem->GetBuildArch()).Contains("macosx") ) gSystem->Load( "libTMVA.so" );


   // gSystem->Load( "libTMVA" );
//   TMVA::Tools::Instance();

//    // welcome the user
//    TMVA::gTools().TMVAWelcomeMessage();
   
//    TMVAGlob::SetTMVAStyle();

//    // this loads the library
//    TMVA::Tools::Instance();

   //---------------------------------------------------------------
   // default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   Use["Cuts"]            = 1;
   // Use["Likelihood"]      = 1;
 
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;

   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 1;
      }
   }

   // Create a new root output file.
   TString outfileName( "TMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory will
   // then run the performance analysis for you.
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/ 
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in 
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, 
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" );

   // If you wish to modify default settings 
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
   // factory->AddVariable( "myvar1 := var1+var2", 'F' );
   // factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' );
   // factory->AddVariable( "var3",                "Variable 3", "units", 'F' );
   // factory->AddVariable( "var4",                "Variable 4", "units", 'F' );

   factory->AddVariable("deltaEta := deta", 'F');
   factory->AddVariable("deltaPhi := dphi", 'F');
   factory->AddVariable("sigmaIetaIeta := sieie", 'F');
   factory->AddVariable("HoverE := hoe", 'F');
   factory->AddVariable("trackIso := trackiso", 'F');
   factory->AddVariable("ecalIso := ecaliso", 'F');
   factory->AddVariable("hcalIso := hcaliso", 'F');
   //factory->AddVariable("nMissingHits := misshits", 'I');


   // You can add so-called "Spectator variables", which are not used in the MVA training, 
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
   // input variables, the response values of all trained MVAs, and the spectator variables
   factory->AddSpectator( "et",  'F' );
   factory->AddSpectator( "eta",  'F' );
   factory->AddSpectator( "phi",  'F' );


   // read training and test data
    TFile *input = TFile::Open( "SigElectrons.root" );
    TFile *inputB = TFile::Open( "BkgElectrons.root" );

   std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl;
   
   TTree *signal     = (TTree*)input->Get("ntuple");
   TTree *background = (TTree*)inputB->Get("ntuple");
   
   factory->AddSignalTree    ( signal,     1.0 );
   factory->AddBackgroundTree( background, 1.0 );

   
   // This would set individual event weights (the variables defined in the 
   // expression need to exist in the original TTree)
   //    for signal    : factory->SetSignalWeightExpression("weight1*weight2");
   //    for background: factory->SetBackgroundWeightExpression("weight1*weight2");
   //factory->SetBackgroundWeightExpression("weight");

   // Apply additional cuts on the signal and background samples (can be different)

   TCut mycuts = ""; 
   TCut mycutb = ""; 



   // tell the factory to use all remaining events in the trees after training for testing:
   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

   // If no numbers of events are given, half of the events in the tree are used for training, and 
   // the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );  
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut, 
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );  

   // ---- Book MVA methods
   //
   // please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts", 
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );
   
   // Likelihood
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", 
                           "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); 



   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
    factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
    factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();    

   // --------------------------------------------------------------
   
   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;      

   delete factory;

//   gROOT->ProcessLine(".x /usr/local/bin/root/tmva/test/correlations.C");
   gROOT->ProcessLine(".x /usr/local/bin/root/tmva/test/variables.C");
}
示例#5
0
////////////////////////////////////////////////////////////////////////////////
/// Main                                                                     ///
////////////////////////////////////////////////////////////////////////////////
void TrainRegressionFJ(TString myMethodList="")
{
    gROOT->SetBatch(1);
    gROOT->LoadMacro("HelperFunctions.h" );  // make functions visible to TTreeFormula

    if (!TString(gROOT->GetVersion()).Contains("5.34")) {
        std::cout << "INCORRECT ROOT VERSION! Please use 5.34:" << std::endl;
        std::cout << "source /uscmst1/prod/sw/cms/slc5_amd64_gcc462/lcg/root/5.34.02-cms/bin/thisroot.csh" << std::endl;
        std::cout << "Return without doing anything." << std::endl;
        return;
    }
    
    //TString curDynamicPath( gSystem->GetDynamicPath() );
    //gSystem->SetDynamicPath( "../lib:" + curDynamicPath );

    //TString curIncludePath(gSystem->GetIncludePath());
    //gSystem->SetIncludePath( " -I../include " + curIncludePath );

    // Load the library
    TMVA::Tools::Instance();


    //--------------------------------------------------------------------------
    // Default MVA methods to be trained + tested
    std::map<std::string, int> Use;

    // --- Mutidimensional likelihood and Nearest-Neighbour methods
    Use["PDERS"]           = 0;
    Use["PDEFoam"]         = 1;
    Use["KNN"]             = 1;
    //
    // --- Linear Discriminant Analysis
    Use["LD"]              = 1;
    //
    // --- Function Discriminant analysis
    Use["FDA_GA"]          = 1;
    Use["FDA_MC"]          = 0;
    Use["FDA_MT"]          = 0;
    Use["FDA_GAMT"]        = 0;
    //
    // --- Neural Network
    Use["MLP"]             = 1; 
    //
    // --- Support Vector Machine 
    Use["SVM"]             = 0;
    // 
    // --- Boosted Decision Trees
    Use["BDT"]             = 1;
    Use["BDT1"]            = 0;
    Use["BDTG"]            = 0;
    Use["BDTG1"]           = 0;

    //--------------------------------------------------------------------------
    std::cout << std::endl;
    std::cout << "==> Start TMVARegression" << std::endl;

    // Select methods (don't look at this code - not of interest)
    if (myMethodList != "") {
        for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

        std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
        for (UInt_t i=0; i<mlist.size(); i++) {
            std::string regMethod(mlist[i]);

            if (Use.find(regMethod) == Use.end()) {
                std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
                for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
                std::cout << std::endl;
                return;
            }
            Use[regMethod] = 1;
        }
    }

    //--------------------------------------------------------------------------
    // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
    TString outfileName( "TMVARegFJ.root" );
    TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

    // Create the factory object. Later you can choose the methods
    // whose performance you'd like to investigate. The factory will
    // then run the performance analysis for you.
    //
    // The first argument is the base of the name of all the
    // weightfiles in the directory weights/
    //
    // The second argument is the output file for the training results
    // All TMVA output can be suppressed by removing the "!" (not) in 
    // front of the "Silent" argument in the option string
    TMVA::Factory *factory = new TMVA::Factory( "TMVARegressionFJ", outputFile, 
                                                "!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=Regression" );

    // If you wish to modify default settings
    // (please check "src/Config.h" to see all available global options)
    //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
    //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

    const std::vector<std::string> & inputExpressions      = GetInputExpressionsFJReg();
    const std::vector<std::string> & inputExpressionLabels = GetInputExpressionLabelsFJReg();
    assert(inputExpressions.size() == inputExpressionLabels.size());

    // Define the input variables that shall be used for the MVA training
    // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
    //factory->AddVariable( "var1", "Variable 1", "units", 'F' );
    //factory->AddVariable( "var2", "Variable 2", "units", 'F' );
    
    for (UInt_t iexpr=0; iexpr!=inputExpressions.size(); iexpr++){
        Label label = MakeLabel(inputExpressionLabels.at(iexpr));
        TString expr = inputExpressions.at(iexpr);
        factory->AddVariable(expr, label.xlabel, label.unit, label.type);
    }

    // You can add so-called "Spectator variables", which are not used in the MVA training,
    // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
    // input variables, the response values of all trained MVAs, and the spectator variables
    //factory->AddSpectator( "spec1 := var1*2",  "Spectator 1", "units", 'F' );
    //factory->AddSpectator( "spec2 := var1*3",  "Spectator 2", "units", 'F' );

    // Add the variable carrying the regression target
    //factory->AddTarget( "fvalue" );
    factory->AddTarget( "fathFilterJets_genPt" );

    // It is also possible to declare additional targets for multi-dimensional regression, ie:
    // -- factory->AddTarget( "fvalue2" );
    // BUT: this is currently ONLY implemented for MLP

    //--------------------------------------------------------------------------
    // Read training and test data
    TFile *input(0);
    TString dirname = "skim_ZnnH_regression_fj/";
    TString prefix  = "skim_";
    TString suffix  = ".root";
    TTree *regTrainTree(0), *regTestTree(0);
    
    std::vector<std::string> processes;
    processes.push_back("ZnnH110");
    processes.push_back("ZnnH115");
    processes.push_back("ZnnH120");
    processes.push_back("ZnnH125");
    processes.push_back("ZnnH130");
    processes.push_back("ZnnH135");
    processes.push_back("ZnnH140");
    processes.push_back("ZnnH145");
    processes.push_back("ZnnH150");
#ifdef USE_WH
    processes.push_back("WlnH110");
    processes.push_back("WlnH115");
    processes.push_back("WlnH120");
    processes.push_back("WlnH125");
    processes.push_back("WlnH130");
    processes.push_back("WlnH135");
    processes.push_back("WlnH140");
    processes.push_back("WlnH145");
    processes.push_back("WlnH150");
#endif

    std::vector<TFile *> files;
    for (UInt_t i=0; i<processes.size(); i++){
        std::string process = processes.at(i);
        input = (TFile*) TFile::Open(dirname + prefix + process + suffix, "READ");
        if (!input) {
            std::cout << "ERROR: Could not open input file." << std::endl;
            exit(1);
        }
        std::cout << "--- TMVARegression           : Using input file: " << input->GetName() << std::endl;
        files.push_back(input);
        
        // --- Register the regression tree
        regTrainTree = (TTree*) input->Get("tree_train");
        regTestTree  = (TTree*) input->Get("tree_test");

        // Global event weights per tree (see below for setting event-wise weights)
        Double_t regWeight = 1.0;

        // You can add an arbitrary number of regression trees
        factory->AddRegressionTree(regTrainTree, regWeight, TMVA::Types::kTraining);
        factory->AddRegressionTree(regTestTree , regWeight, TMVA::Types::kTesting );
    }

    // Set individual event weights (the variables must exist in the original TTree)
    //factory->SetWeightExpression( "var1", "Regression" );

    // Apply additional cuts on the signal and background samples (can be different)
    TCut mycut = "fathFilterJets_genPt>10 && fathFilterJets_pt>15 && abs(fathFilterJets_eta)<2.5"; // this is to avoid 3rd filter jet without gen match
    //TCut mycut = "hJet_genPt[0] > 0. && hJet_genPt[1] > 0. && hJet_csv[0] > 0. && hJet_csv[1] > 0. && hJet_pt[0] > 20. && hJet_pt[1] > 20. && abs(hJet_eta[0])<2.5 && abs(hJet_eta[1])<2.5";

    // Tell the factory to use all remaining events in the trees after training for testing:
    factory->PrepareTrainingAndTestTree( mycut, "V:nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents" );

    // If no numbers of events are given, half of the events in the tree are used 
    // for training, and the other half for testing:
    //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=Random:!V" );

    // --- Book MVA methods
    //
    // Please lookup the various method configuration options in the corresponding cxx files, eg:
    // src/MethodCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    // it is possible to preset ranges in the option string in which the cut optimisation should be done:
    // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    // PDE - RS method
    if (Use["PDERS"])
        factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                             "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" );
    // And the options strings for the MinMax and RMS methods, respectively:
    //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );   
    //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );   

    if (Use["PDEFoam"])
        factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                             "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" );

    // K-Nearest Neighbour classifier (KNN)
    if (Use["KNN"])
        factory->BookMethod( TMVA::Types::kKNN, "KNN",
                             "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

    // Linear discriminant
    if (Use["LD"])
        factory->BookMethod( TMVA::Types::kLD, "LD", 
                             "!H:!V:VarTransform=None" );

    // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if (Use["FDA_MC"])
        factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                             "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" );

    if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas
        factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                             "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" );

    if (Use["FDA_MT"])
        factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                             "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

    if (Use["FDA_GAMT"])
        factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                             "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

    // Neural network (MLP)
    if (Use["MLP"])
        factory->BookMethod( TMVA::Types::kMLP, "MLP", 
                             "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" );

    // Support Vector Machine
    if (Use["SVM"])
        factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

    // Boosted Decision Trees
    if (Use["BDT"])
        factory->BookMethod( TMVA::Types::kBDT, "BDT",
                             "!H:V:NTrees=100:nEventsMin=30:NodePurityLimit=0.5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" );
//"!H:V:NTrees=60:nEventsMin=20:NodePurityLimit=0.5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30:DoBoostMonitor" );

    if (Use["BDT1"])
        factory->BookMethod( TMVA::Types::kBDT, "BDT1",
                             "!H:V:NTrees=100:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" );

    if (Use["BDTG"])
        factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                             "!H:V:NTrees=2000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.7:nCuts=200:MaxDepth=3:NNodesMax=15" );

    if (Use["BDTG1"])
        factory->BookMethod( TMVA::Types::kBDT, "BDTG1",
                             "!H:V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:MaxDepth=3:NNodesMax=15" );

    //--------------------------------------------------------------------------
    // Train MVAs using the set of training events
    factory->TrainAllMethods();

    // --- Evaluate all MVAs using the set of test events
    factory->TestAllMethods();

    // --- Evaluate and compare performance of all configured MVAs
    factory->EvaluateAllMethods();

    //--------------------------------------------------------------------------
    // Save the output
    outputFile->Close();

    std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
    std::cout << "==> TMVARegression is done!" << std::endl;

    for (UInt_t i=0; i<files.size(); i++)
        files.at(i)->Close();

    delete outputFile;
    delete factory;

    // Launch the GUI for the root macros
    //gROOT->SetMacroPath( "$ROOTSYS/tmva/macros/" );
    //gROOT->Macro( "$ROOTSYS/tmva/macros/TMVAlogon.C" );
    //gROOT->LoadMacro( "$ROOTSYS/tmva/macros/TMVAGui.C" );
    //if (!gROOT->IsBatch()) TMVARegGui( outfileName );
}
void TMVAClassification( TString myMethodList = "" )
{
   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //
   // if you like to use a method via the plugin mechanism, we recommend using
   //
   // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)
   // (an example is given for using the BDT as plugin (see below),
   // but of course the real application is when you write your own
   // method based)

   // this loads the library
   TMVA::Tools::Instance();

   //---------------------------------------------------------------
   // default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   Use["Cuts"]            = 0;
   Use["CutsD"]           = 0;
   Use["CutsPCA"]         = 0;
   Use["CutsGA"]          = 0;
   Use["CutsSA"]          = 0;
   // ---
   Use["Likelihood"]      = 1;
   Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
   Use["LikelihoodPCA"]   = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
   Use["LikelihoodKDE"]   = 0;
   Use["LikelihoodMIX"]   = 0;
   // ---
   Use["PDERS"]           = 0;
   Use["PDERSD"]          = 0;
   Use["PDERSPCA"]        = 0;
   Use["PDERSkNN"]        = 0; // depreciated until further notice
   Use["PDEFoam"]         = 0;
   // --
   Use["KNN"]             = 0;
   // ---
   Use["HMatrix"]         = 0;
   Use["Fisher"]          = 0;
   Use["FisherG"]         = 0;
   Use["BoostedFisher"]   = 0;
   Use["LD"]              = 0;
   // ---
   Use["FDA_GA"]          = 0;
   Use["FDA_SA"]          = 0;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   Use["FDA_MCMT"]        = 0;
   // ---
   Use["MLP"]             = 0; // this is the recommended ANN
   Use["MLPBFGS"]         = 0; // recommended ANN with optional training method
   Use["MLPBNN"]          = 0;  // recommended ANN with BFGS training method and bayesian regulator
   Use["CFMlpANN"]        = 0; // *** missing
   Use["TMlpANN"]         = 0;
   // ---
   Use["SVM"]             = 0;
   // ---
   Use["BDT"]             = 1;
   Use["BDTD"]            = 0;
   Use["BDTG"]            = 0;
   Use["BDTB"]            = 0;
   // ---
   Use["RuleFit"]         = 0;
   // ---
   Use["Plugin"]          = 0;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;

   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 0;
      }
   }

   // Create a new root output file.
   TString outfileName( "TMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory will
   // then run the performance analysis for you.
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );

   // If you wish to modify default settings
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
//   factory->AddVariable( "myvar1 := var1+var2", 'F' );
//   factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' );
//   factory->AddVariable( "var3",                "Variable 3", "units", 'F' );
//   factory->AddVariable( "var4",                "Variable 4", "units", 'F' );

   // You can add so-called "Spectator variables", which are not used in the MVA training,
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
   // input variables, the response values of all trained MVAs, and the spectator variables
//   factory->AddSpectator( "spec1:=var1*2",  "Spectator 1", "units", 'F' );
//   factory->AddSpectator( "spec2:=var1*3",  "Spectator 2", "units", 'F' );

   // read training and test data

factory->AddVariable("CScostheta",'F');

factory->AddVariable("ZRapidity",'F');

factory->AddVariable("REDmet",'F');
   if (ReadDataFromAsciiIFormat) {
      // load the signal and background event samples from ascii files
      // format in file must be:
      // var1/F:var2/F:var3/F:var4/F
      // 0.04551   0.59923   0.32400   -0.19170
      // ...

      TString datFileS = "tmva_example_sig.dat";
      TString datFileB = "tmva_example_bkg.dat";

      factory->SetInputTrees( datFileS, datFileB );
   }
   else {
      // load the signal and background event samples from ROOT trees
      TString fname = "./tmva_class_example.root";

      TString fname_Data7TeV_DoubleElectron2011B_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleElectron2011B_0.root";

      TString fname_Data7TeV_MuEG2011B_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_MuEG2011B_0.root";

      TString fname_ZH125 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH125.root";

      TString fname_SingleT_tW = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleT_tW.root";

      TString fname_SingleT_s = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleT_s.root";

      TString fname_Data7TeV_DoubleMu2011B_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleMu2011B_0.root";

      TString fname_Data7TeV_DoubleElectron2011A_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleElectron2011A_0.root";

      TString fname_ZH135 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH135.root";

      TString fname_DYJetsToLL = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//DYJetsToLL.root";

      TString fname_ZH115 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH115.root";

      TString fname_SingleTbar_t = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleTbar_t.root";

      TString fname_Data7TeV_DoubleElectron2011B_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleElectron2011B_1.root";

      TString fname_Data7TeV_DoubleMu2011A_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleMu2011A_1.root";

      TString fname_Data7TeV_MuEG2011A_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_MuEG2011A_1.root";

      TString fname_TTJets = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//TTJets.root";

      TString fname_SingleTbar_s = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleTbar_s.root";

      TString fname_WJetsToLNu = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//WJetsToLNu.root";

      TString fname_Data7TeV_DoubleElectron2011A_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleElectron2011A_1.root";

      TString fname_ZZ = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZZ.root";

      TString fname_ZH150 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH150.root";

      TString fname_Data7TeV_MuEG2011B_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_MuEG2011B_1.root";

      TString fname_WW = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//WW.root";

      TString fname_ZH105 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH105.root";

      TString fname_Data7TeV_DoubleMu2011A_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleMu2011A_0.root";

      TString fname_SingleTbar_tW = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleTbar_tW.root";

      TString fname_WZ = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//WZ.root";

      TString fname_Data7TeV_MuEG2011A_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_MuEG2011A_0.root";

      TString fname_Data7TeV_DoubleMu2011B_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleMu2011B_1.root";

      TString fname_ZH145 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH145.root";

      TString fname_SingleT_t = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleT_t.root";


      if (gSystem->AccessPathName( fname ))  // file does not exist in local directory
         gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root");

      TFile *input_Data7TeV_DoubleElectron2011B_0 = TFile::Open( fname_Data7TeV_DoubleElectron2011B_0 );

      TFile *input_Data7TeV_MuEG2011B_0 = TFile::Open( fname_Data7TeV_MuEG2011B_0 );

      TFile *input_ZH125 = TFile::Open( fname_ZH125 );

      TFile *input_SingleT_tW = TFile::Open( fname_SingleT_tW );

      TFile *input_SingleT_s = TFile::Open( fname_SingleT_s );

      TFile *input_Data7TeV_DoubleMu2011B_0 = TFile::Open( fname_Data7TeV_DoubleMu2011B_0 );

      TFile *input_Data7TeV_DoubleElectron2011A_0 = TFile::Open( fname_Data7TeV_DoubleElectron2011A_0 );

      TFile *input_ZH135 = TFile::Open( fname_ZH135 );

      TFile *input_DYJetsToLL = TFile::Open( fname_DYJetsToLL );

      TFile *input_ZH115 = TFile::Open( fname_ZH115 );

      TFile *input_SingleTbar_t = TFile::Open( fname_SingleTbar_t );

      TFile *input_Data7TeV_DoubleElectron2011B_1 = TFile::Open( fname_Data7TeV_DoubleElectron2011B_1 );

      TFile *input_Data7TeV_DoubleMu2011A_1 = TFile::Open( fname_Data7TeV_DoubleMu2011A_1 );

      TFile *input_Data7TeV_MuEG2011A_1 = TFile::Open( fname_Data7TeV_MuEG2011A_1 );

      TFile *input_TTJets = TFile::Open( fname_TTJets );

      TFile *input_SingleTbar_s = TFile::Open( fname_SingleTbar_s );

      TFile *input_WJetsToLNu = TFile::Open( fname_WJetsToLNu );

      TFile *input_Data7TeV_DoubleElectron2011A_1 = TFile::Open( fname_Data7TeV_DoubleElectron2011A_1 );

      TFile *input_ZZ = TFile::Open( fname_ZZ );

      TFile *input_ZH150 = TFile::Open( fname_ZH150 );

      TFile *input_Data7TeV_MuEG2011B_1 = TFile::Open( fname_Data7TeV_MuEG2011B_1 );

      TFile *input_WW = TFile::Open( fname_WW );

      TFile *input_ZH105 = TFile::Open( fname_ZH105 );

      TFile *input_Data7TeV_DoubleMu2011A_0 = TFile::Open( fname_Data7TeV_DoubleMu2011A_0 );

      TFile *input_SingleTbar_tW = TFile::Open( fname_SingleTbar_tW );

      TFile *input_WZ = TFile::Open( fname_WZ );

      TFile *input_Data7TeV_MuEG2011A_0 = TFile::Open( fname_Data7TeV_MuEG2011A_0 );

      TFile *input_Data7TeV_DoubleMu2011B_1 = TFile::Open( fname_Data7TeV_DoubleMu2011B_1 );

      TFile *input_ZH145 = TFile::Open( fname_ZH145 );

      TFile *input_SingleT_t = TFile::Open( fname_SingleT_t );


      std::cout << "--- TMVAClassification       : Using input_Data7TeV_DoubleElectron2011B_0 file: " << input_Data7TeV_DoubleElectron2011B_0->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_MuEG2011B_0 file: " << input_Data7TeV_MuEG2011B_0->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_ZH125 file: " << input_ZH125->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_SingleT_tW file: " << input_SingleT_tW->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_SingleT_s file: " << input_SingleT_s->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_DoubleMu2011B_0 file: " << input_Data7TeV_DoubleMu2011B_0->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_DoubleElectron2011A_0 file: " << input_Data7TeV_DoubleElectron2011A_0->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_ZH135 file: " << input_ZH135->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_DYJetsToLL file: " << input_DYJetsToLL->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_ZH115 file: " << input_ZH115->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_SingleTbar_t file: " << input_SingleTbar_t->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_DoubleElectron2011B_1 file: " << input_Data7TeV_DoubleElectron2011B_1->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_DoubleMu2011A_1 file: " << input_Data7TeV_DoubleMu2011A_1->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_MuEG2011A_1 file: " << input_Data7TeV_MuEG2011A_1->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_TTJets file: " << input_TTJets->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_SingleTbar_s file: " << input_SingleTbar_s->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_WJetsToLNu file: " << input_WJetsToLNu->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_DoubleElectron2011A_1 file: " << input_Data7TeV_DoubleElectron2011A_1->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_ZZ file: " << input_ZZ->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_ZH150 file: " << input_ZH150->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_MuEG2011B_1 file: " << input_Data7TeV_MuEG2011B_1->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_WW file: " << input_WW->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_ZH105 file: " << input_ZH105->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_DoubleMu2011A_0 file: " << input_Data7TeV_DoubleMu2011A_0->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_SingleTbar_tW file: " << input_SingleTbar_tW->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_WZ file: " << input_WZ->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_MuEG2011A_0 file: " << input_Data7TeV_MuEG2011A_0->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_Data7TeV_DoubleMu2011B_1 file: " << input_Data7TeV_DoubleMu2011B_1->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_ZH145 file: " << input_ZH145->GetName() << std::endl;

      std::cout << "--- TMVAClassification       : Using input_SingleT_t file: " << input_SingleT_t->GetName() << std::endl;


      TTree *signal_ZH145     = (TTree*)input_ZH145->Get("tmvatree");

      TTree *background_ZZ = (TTree*)input_ZZ->Get("tmvatree");


      // global event weights per tree (see below for setting event-wise weights)
      Double_t signalWeight     = 1.0;
      Double_t backgroundWeight = 1.0;

      // ====== register trees ====================================================
      //
      // the following method is the prefered one:
      // you can add an arbitrary number of signal or background trees
      factory->AddSignalTree    ( signal_ZH145,     1.0     );

      factory->AddBackgroundTree( background_ZZ, 1.0 );


      // To give different trees for training and testing, do as follows:
      //    factory->AddSignalTree( signal_ZH145TrainingTree, signal_ZH145TrainWeight, "Training" );

      //    factory->AddSignalTree( signal_ZH145TestTree,     signal_ZH145TestWeight,  "Test" );


      // Use the following code instead of the above two or four lines to add signal and background
      // training and test events "by hand"
      // NOTE that in this case one should not give expressions (such as "var1+var2") in the input
      //      variable definition, but simply compute the expression before adding the event
      //
      //    // --- begin ----------------------------------------------------------
      //    std::vector<Double_t> vars( 4 ); // vector has size of number of input variables
      //    Float_t  treevars[4];
      //    for (Int_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
      //    for (Int_t i=0; i<signal->GetEntries(); i++) {
      //       signal->GetEntry(i);
      //       for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
      //       // add training and test events; here: first half is training, second is testing
      //       // note that the weight can also be event-wise
      //       if (i < signal->GetEntries()/2) factory->AddSignalTrainingEvent( vars, signalWeight );
      //       else                            factory->AddSignalTestEvent    ( vars, signalWeight );
      //    }
      //
      //    for (Int_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
      //    for (Int_t i=0; i<background->GetEntries(); i++) {
      //       background->GetEntry(i);
      //       for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
      //       // add training and test events; here: first half is training, second is testing
      //       // note that the weight can also be event-wise
      //       if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight );
      //       else                                factory->AddBackgroundTestEvent    ( vars, backgroundWeight );
      //    }
      //    // --- end ------------------------------------------------------------
      //
      // ====== end of register trees ==============================================
   }

   // This would set individual event weights (the variables defined in the
   // expression need to exist in the original TTree)
   //    for signal    : factory->SetSignalWeightExpression("weight1*weight2");
   //    for background: factory->SetBackgroundWeightExpression("weight1*weight2");
   factory->SetBackgroundWeightExpression("Eweight*XS*BR*LUM*(1/NGE)*(B2/B3)*CUT");
   factory->SetSignalWeightExpression("Eweight*XS*BR*LUM*(1/NGE)*(B2/B3)*CUT");

   // Apply additional cuts on the signal and background samples (can be different)
TCut mycuts = "(CUT>2)";
TCut mycutb = "(CUT>2)";

   // tell the factory to use all remaining events in the trees after training for testing:

 factory->PrepareTrainingAndTestTree( mycuts, "SplitMode=random:!V" );
//                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

   // If no numbers of events are given, half of the events in the tree are used for training, and
   // the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut,
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );

   // ---- Book MVA methods
   //
   // please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["CutsD"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsD",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

   if (Use["CutsPCA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

   if (Use["CutsGA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                           "H:!V:FitMethod=GA:Seed=0:EffSel:Steps=50:Cycles=3:PopSize=1000:SC_steps=10:SC_rate=5:SC_factor=0.95" );

   if (Use["CutsSA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                           "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   // Likelihood
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                           "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

   // test the decorrelated likelihood
   if (Use["LikelihoodD"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

   if (Use["LikelihoodPCA"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 

   // test the new kernel density estimator
   if (Use["LikelihoodKDE"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 

   // test the mixed splines and kernel density estimator (depending on which variable)
   if (Use["LikelihoodMIX"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
                           "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

   // test the multi-dimensional probability density estimator
   // here are the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSkNN"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSkNN",
                           "!H:!V:VolumeRangeMode=kNN:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSD"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

   if (Use["PDERSPCA"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

   // Multi-dimensional likelihood estimator using self-adapting phase-space binning
   if (Use["PDEFoam"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                           "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:CutNmin=T:Nmin=100:Kernel=None:Compress=T" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN",
                           "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );
   // H-Matrix (chi2-squared) method
   if (Use["HMatrix"])
      factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" );

   // Fisher discriminant
   if (Use["Fisher"])
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=60:NsmoothMVAPdf=10" );

   // Fisher with Gauss-transformed input variables
   if (Use["FisherG"])
      factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

   // Composite classifier: ensemble (tree) of boosted Fisher classifiers
   if (Use["BoostedFisher"])
      factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2");

   // Linear discriminant (same as Fisher)
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None" );

   // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

   if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   if (Use["FDA_MT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   if (Use["FDA_MCMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

   // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );

   if (Use["MLPBFGS"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

   if (Use["MLPBNN"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators

   // CF(Clermont-Ferrand)ANN
   if (Use["CFMlpANN"])
      factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  

   // Tmlp(Root)ANN
   if (Use["TMlpANN"])
      factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDTG"]) // Gradient Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                           "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" );

   if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=1000:nEventsMin=400:MaxDepth=6:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   if (Use["BDTB"]) // Bagging
      factory->BookMethod( TMVA::Types::kBDT, "BDTB",
                           "!H:!V:NTrees=1000:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   if (Use["BDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTD",
                           "!H:!V:NTrees=1000:nEventsMin=400:MaxDepth=6:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );

   // RuleFit -- TMVA implementation of Friedman's method
   if (Use["RuleFit"])
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                           "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

   // For an example of the category classifier, see: TMVAClassificationCategory

   // --------------------------------------------------------------------------------------------------

   // As an example how to use the ROOT plugin mechanism, book BDT via
   // plugin mechanism
   if (Use["Plugin"]) {
         //
         // first the plugin has to be defined, which can happen either through the following line in the local or global .rootrc:
         //
         // # plugin handler          plugin name(regexp) class to be instanciated library        constructor format
         // Plugin.TMVA@@MethodBase:  ^BDT                TMVA::MethodBDT          TMVA.1         "MethodBDT(TString,TString,DataSet&,TString)"
         //
         // or by telling the global plugin manager directly
      gPluginMgr->AddHandler("TMVA@@MethodBase", "BDT", "TMVA::MethodBDT", "TMVA.1", "MethodBDT(TString,TString,DataSet&,TString)");
      factory->BookMethod( TMVA::Types::kPlugins, "BDT",
                           "!H:!V:NTrees=1000:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=50" );
   }

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros

gROOT->ProcessLine(".q;");
}
示例#7
0
void Classification()
{
   TMVA::Tools::Instance();
   TMVA::PyMethodBase::PyInitialize();

   TString outfileName("TMVA.root");
   TFile *outputFile = TFile::Open(outfileName, "RECREATE");

   TMVA::Factory *factory = new TMVA::Factory("TMVAClassification", outputFile,
         "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification");


   factory->AddVariable("myvar1 := var1+var2", 'F');
   factory->AddVariable("myvar2 := var1-var2", "Expression 2", "", 'F');
   factory->AddVariable("var3",                "Variable 3", "units", 'F');
   factory->AddVariable("var4",                "Variable 4", "units", 'F');


   factory->AddSpectator("spec1 := var1*2",  "Spectator 1", "units", 'F');
   factory->AddSpectator("spec2 := var1*3",  "Spectator 2", "units", 'F');


   TString fname = "./tmva_class_example.root";

   if (gSystem->AccessPathName(fname))    // file does not exist in local directory
      gSystem->Exec("curl -O http://root.cern.ch/files/tmva_class_example.root");

   TFile *input = TFile::Open(fname);

   std::cout << "--- TMVAClassification       : Using input file: " << input->GetName() << std::endl;

   // --- Register the training and test trees

   TTree *tsignal     = (TTree *)input->Get("TreeS");
   TTree *tbackground = (TTree *)input->Get("TreeB");

   // global event weights per tree (see below for setting event-wise weights)
   Double_t signalWeight     = 1.0;
   Double_t backgroundWeight = 1.0;

   // You can add an arbitrary number of signal or background trees
   factory->AddSignalTree(tsignal,     signalWeight);
   factory->AddBackgroundTree(tbackground, backgroundWeight);


   // Set individual event weights (the variables must exist in the original TTree)
   factory->SetBackgroundWeightExpression("weight");


   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5";

   // Tell the factory how to use the training and testing events
   factory->PrepareTrainingAndTestTree(mycuts, mycutb,
                                       "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=0:nTest_Background=0:SplitMode=Random:NormMode=NumEvents:!V");


   ///////////////////
   //Booking         //
   ///////////////////
   // Boosted Decision Trees

   //PyMVA methods
   factory->BookMethod(TMVA::Types::kPyRandomForest, "PyRandomForest",
                       "!V:NEstimators=150:Criterion=gini:MaxFeatures=auto:MaxDepth=3:MinSamplesLeaf=1:MinWeightFractionLeaf=0:Bootstrap=kTRUE");
   factory->BookMethod(TMVA::Types::kPyAdaBoost, "PyAdaBoost",
                       "!V:BaseEstimator=None:NEstimators=100:LearningRate=1:Algorithm=SAMME.R:RandomState=None");
   factory->BookMethod(TMVA::Types::kPyGTB, "PyGTB",
                       "!V:NEstimators=150:Loss=deviance:LearningRate=0.1:Subsample=1:MaxDepth=6:MaxFeatures='auto'");


   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();
   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

}
示例#8
0
void mytmvaClass( TString myMethodList = "" )
{
  // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
  // if you use your private .rootrc, or run from a different directory, please copy the
  // corresponding lines from .rootrc

  // methods to be processed can be given as an argument; use format:
  //
  // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
  //
  // if you like to use a method via the plugin mechanism, we recommend using
  //
  // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)

  //--------------------------------
  TMVA::Tools::Instance();

  // Default MVA methods to be trained + tested
  std::map<std::string,int> Use;

  // --- Cut optimisation
  Use["Cuts"]            = 0;
  Use["CutsD"]           = 0;
  Use["CutsPCA"]         = 0;
  Use["CutsGA"]          = 0;
  Use["CutsSA"]          = 0;
  //
  // --- 1-dimensional likelihood ("naive Bayes estimator")
  Use["Likelihood"]      = 0;
  Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
  Use["LikelihoodPCA"]   = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
  Use["LikelihoodKDE"]   = 0;
  Use["LikelihoodMIX"]   = 0;
  //
  // --- Mutidimensional likelihood and Nearest-Neighbour methods
  Use["PDERS"]           = 0;
  Use["PDERSD"]          = 0;
  Use["PDERSPCA"]        = 0;
  Use["PDEFoam"]         = 0;
  Use["PDEFoamBoost"]    = 0; // uses generalised MVA method boosting
  Use["KNN"]             = 0; // k-nearest neighbour method
  //
  // --- Linear Discriminant Analysis
  Use["LD"]              = 0; // Linear Discriminant identical to Fisher
  Use["Fisher"]          = 0;
  Use["FisherG"]         = 0;
  Use["BoostedFisher"]   = 0; // uses generalised MVA method boosting
  Use["HMatrix"]         = 0;
  //
  // --- Function Discriminant analysis
  Use["FDA_GA"]          = 0; // minimisation of user-defined function using Genetics Algorithm
  Use["FDA_SA"]          = 0;
  Use["FDA_MC"]          = 0;
  Use["FDA_MT"]          = 0;
  Use["FDA_GAMT"]        = 0;
  Use["FDA_MCMT"]        = 0;
  //
  // --- Neural Networks (all are feed-forward Multilayer Perceptrons)
  Use["MLP"]             = 0; // Recommended ANN
  Use["MLPBFGS"]         = 0; // Recommended ANN with optional training method
  Use["MLPBNN"]          = 0; // Recommended ANN with BFGS training method and bayesian regulator
  Use["CFMlpANN"]        = 0; // Depreciated ANN from ALEPH
  Use["TMlpANN"]         = 0; // ROOT's own ANN
  //
  // --- Support Vector Machine 
  Use["SVM"]             = 0;
  // 
  // --- Boosted Decision Trees
  Use["BDT"]             = 1; // uses Adaptive Boost
  Use["BDTG"]            = 0; // uses Gradient Boost
  Use["BDTB"]            = 0; // uses Bagging
  Use["BDTD"]            = 0; // decorrelation + Adaptive Boost
  // 
  // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
  Use["RuleFit"]         = 0;
  // ---------------------------------------------------------------

  std::cout << std::endl;
  std::cout << "==> Start TMVAClassification" << std::endl;

  // Select methods (Not of much interest)
  if (myMethodList != "") {
    for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

    std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
    for (UInt_t i=0; i<mlist.size(); i++) {
      std::string regMethod(mlist[i]);

      if (Use.find(regMethod) == Use.end()) {
	std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
	for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
	std::cout << std::endl;
	return;
      }
      Use[regMethod] = 1;
    }
  }

  // --------------------------------------------------------------------------------------------------

  // --- Here the preparation phase begins

  // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
  TString outfileName( "TMVA.root" );
  TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

  // Create the factory object. Later you can choose the methods
  // whose performance you'd like to investigate. The factory is 
  // the only TMVA object you have to interact with
  //
  // The first argument is the base of the name of all the
  // weightfiles in the directory weight/
  //
  // The second argument is the output file for the training results
  // All TMVA output can be suppressed by removing the "!" (not) in
  // front of the "Silent" argument in the option string
  TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
					      "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );

  // If you wish to modify default settings
  // (please check "src/Config.h" to see all available global options)
  //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
  //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

  // Define the input variables that shall be used for the MVA training
  // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"


  //Syntax --> "factory->AddVariable( "VarName", "UserDefinedLabel", "units", 'VarType' );" //Where VarType refers to float, int etc.

  Float_t nhiggsJetSubjets;
  Float_t njets;
  Float_t deltaPhiWH;
  Float_t jetPt[11];
  Float_t jetEta[11];
  Float_t jetPhi[11];
  Float_t jetE[11];
  Float_t higgsJetSubjetEta[3];
  Float_t higgsJetSubjetPhi[3];
  Float_t higgsJetSubjetPt[3];
  Float_t higgsJetSubjetE[3];
  Float_t Wphi;
  Float_t fatjetPt[15];
  Float_t fatjetEta[15];
  Float_t fatjetPhi[15];
  Float_t fatjetE[15];
  Float_t DeltaPhiWH;
  


  TLorentzVector fatjet(0,0,0,0);
  TLorentzVector subjet1(0,0,0,0);
  TLorentzVector subjet2(0,0,0,0);
  TLorentzVector subjet3(0,0,0,0);
  TLorentzVector akt[15];
  fatjet.SetPtEtaPhiE(fatjetPt[0],fatjetEta[0],fatjetPhi[0], fatjetE[0]);
  //   deltaPhiWH = std::fabs(Wphi-fatjetPhi[0]);
//   if(deltaPhiWH>TMath::Pi()){ std::fabs(deltaPhiWH = 2*TMath::Pi()-deltaPhiWH);} 

  
  
  factory->AddVariable( "deltaPhiWH", "deltaPhiWH", "", 'F' );
  factory->AddVariable( "jetPt.array[0]", "jetPt.array[0]", "", 'F' );
  factory->AddVariable( "fatjetPt[0].array[0]", "fatjetPt.array[0]", "", 'F' );
  //factory->AddVariable( "DeltaR[3][13]", "DeltaR[3][13]", "", 'F' );
  // factory->AddSpectator( "nhiggsJetSubjets", "nhiggsJetSubjets", "", 'F');
//   factory->AddSpectator( "njets", "", "njets", 'I');
//   factory->AddSpectator( "jetPt[0]", "jetPt[0]", "", 'F');
//   factory->AddSpectator( "jetPt[1]", "jetPt[1]", "", 'F');
//   factory->AddSpectator( "jetPt[2]", "jetPt[2]", "", 'F');
//   factory->AddSpectator( "jetPt[3]", "jetPt[3]", "", 'F');
//   factory->AddSpectator( "jetPt[4]", "jetPt[4]", "", 'F');
//   factory->AddSpectator( "jetPt[5]", "jetPt[5]", "", 'F');
//   factory->AddSpectator( "jetPt[6]", "jetPt[6]", "", 'F');
//   factory->AddSpectator( "jetPt[7]", "jetPt[7]", "", 'F');
//   factory->AddSpectator( "jetPt[8]", "jetPt[8]", "", 'F');
//   factory->AddSpectator( "jetPt[9]", "jetPt[9]", "", 'F');
//   factory->AddSpectator( "jetPt[10]", "jetPt[10]", "", 'F');
//   factory->AddSpectator( "jetEta[0]", "jetEta[0]", "", 'F');
//   factory->AddSpectator( "jetEta[1]", "jetEta[1]", "", 'F');
//   factory->AddSpectator( "jetEta[2]", "jetEta[2]", "", 'F');
//   factory->AddSpectator( "jetEta[3]", "jetEta[3]", "", 'F');
//   factory->AddSpectator( "jetEta[4]", "jetEta[4]", "", 'F');
//   factory->AddSpectator( "jetEta[5]", "jetEta[5]", "", 'F');
//   factory->AddSpectator( "jetEta[6]", "jetEta[6]", "", 'F');
//   factory->AddSpectator( "jetEta[7]", "jetEta[7]", "", 'F');
//   factory->AddSpectator( "jetEta[8]", "jetEta[8]", "", 'F');
//   factory->AddSpectator( "jetEta[9]", "jetEta[9]", "", 'F');
//   factory->AddSpectator( "jetEta[10]", "jetEta[10]", "", 'F');
//   factory->AddSpectator( "jetPhi[0]", "jetPhi[0]", "", 'F');
//   factory->AddSpectator( "jetPhi[1]", "jetPhi[1]", "", 'F');
//   factory->AddSpectator( "jetPhi[2]", "jetPhi[2]", "", 'F');
//   factory->AddSpectator( "jetPhi[3]", "jetPhi[3]", "", 'F');
//   factory->AddSpectator( "jetPhi[4]", "jetPhi[4]", "", 'F');
//   factory->AddSpectator( "jetPhi[5]", "jetPhi[5]", "", 'F');
//   factory->AddSpectator( "jetPhi[6]", "jetPhi[6]", "", 'F');
//   factory->AddSpectator( "jetPhi[7]", "jetPhi[7]", "", 'F');
//   factory->AddSpectator( "jetPhi[8]", "jetPhi[8]", "", 'F');
//   factory->AddSpectator( "jetPhi[9]", "jetPhi[9]", "", 'F');
//   factory->AddSpectator( "jetPhi[10]", "jetPhi[10]", "", 'F');
//   factory->AddSpectator("jetE[0]", "jetE[0]", "", 'F');
//   factory->AddSpectator("jetE[1]", "jetE[1]", "", 'F');
//   factory->AddSpectator("jetE[2]", "jetE[2]", "", 'F');
//   factory->AddSpectator("higgsJetSubjetEta[0]","higgsJetSubjetEta[0]", "", 'F');
//   factory->AddSpectator("higgsJetSubjetEta[1]","higgsJetSubjetEta[1]", "", 'F');
//   factory->AddSpectator("higgsJetSubjetEta[2]","higgsJetSubjetEta[2]", "", 'F');
//   factory->AddSpectator("higgsJetSubjetPhi[0]","higgsJetSubjetPhi[0]", "", 'F');
//   factory->AddSpectator("higgsJetSubjetPhi[1]","higgsJetSubjetPhi[1]", "", 'F');
//   factory->AddSpectator("higgsJetSubjetPhi[2]","higgsJetSubjetPhi[2]", "", 'F');
//   factory->AddSpectator("higgsJetSubjetPt[0]","higgsJetSubjetPt[0]", "", 'F');
//   factory->AddSpectator("higgsJetSubjetPt[1]","higgsJetSubjetPt[1]", "", 'F');
//   factory->AddSpectator("higgsJetSubjetPt[2]","higgsJetSubjetPt[2]", "", 'F');
//   factory->AddSpectator("higgsJetSubjetE[0]","higgsJetSubjetE[0]", "", 'F');
//   factory->AddSpectator("higgsJetSubjetE[1]","higgsJetSubjetE[1]", "", 'F');
//   factory->AddSpectator("higgsJetSubjetE[2]","higgsJetSubjetE[2]", "", 'F');
//   factory->AddSpectator("Wphi", "Wphi", "" , 'F');
//   factory->AddVariable("fatjetPt[0]", "fatjetPt[0]", "", 'F');
//   factory->AddSpectator("fatjetPhi[0]", "fatjetPhi[0]", "", 'F');
//   factory->AddSpectator("fatjetEta[0]", "fatjetEta[0]", "", 'F');
//   factory->AddSpectator("fatjetE[0]", "fatjetE[0]", "", 'F');
//   //if(std::fabs(Wphi-fatjetPhi[0])<TMath::Pi()){
//     factory->AddVariable("abs(Wphi-fatjetPhi[0])", "DeltaPhiWH", "", 'F'); 
//     //}
//   if ((abs(Wphi-fatjetPhi[0])>TMath::Pi())){
//     factory->AddVariable("(abs(DeltaPhiWH = 2*(TMath::Pi())-DeltaPhiWH))", "DeltaPhiWH", "", 'F');
//   }
  


  // Here, we add the lepton flag as a spectator so we can cut (further down) on lep == 0 for muons only or lep == 1 for electrons only.
  //factory->AddSpectator( "lepType[0]", "lepType[0]", "units", 'I');


 

  // You can add so-called "Spectator variables", which are not used in the MVA training,
  // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
  // input variables, the response values of all trained MVAs, and the spectator variables
  //			factory->AddSpectator( "spec1 := var1*2",  "Spectator 1", "units", 'F' );
  //			factory->AddSpectator( "spec2 := var1*3",  "Spectator 2", "units", 'F' );

  // --- Read training and test data
  // (it is also possible to use ASCII format as input -> see TMVA Users Guide)

  TString fsig_name_WH = "/Disk/speyside7/Grid/grid-files/rsmith/SummerProject/OverlapperInputFiles/PreCutsApplied/mc11_7TeV.128034.HerwigppPowHeg_Boosted_WH125lnubb.merge.NTUP_HSG5WH.Output_PreCuts-Original.root";
  TString fback_name_TT = "/Disk/speyside7/Grid/grid-files/rsmith/SummerProject/OverlapperInputFiles/PreCutsApplied/TT_NewBranch2.output_MERGE_PreCuts-Original.root";
  TString fback_name_Wbb = "/Disk/speyside7/Grid/grid-files/rsmith/SummerProject/OverlapperInputFiles/PreCutsApplied/mc11_7TeV.128861.SherpaWbb2J_lnu_Wpt100GeV_PreCuts-Original.root";


  TFile *sig_input_WH = TFile::Open( fsig_name_WH );
  TFile *back_input_TT = TFile::Open( fback_name_TT );
  TFile *back_input_Wbb = TFile::Open( fback_name_Wbb);
   
    
   // --- Register the training and test trees
  TTree *signal_WH     = (TTree*)sig_input_WH->Get("OutputTree");
  TTree *background_TT = (TTree*)back_input_TT->Get("OutputTree"); 
  TTree * background_Wbb = (TTree*)back_input_Wbb->Get("OutputTree");
  
  
 
  // --- global event weights per tree (see below for setting event-wise weights)
  Double_t signalWeight_WH     = 1;
  Double_t backgroundWeight_TT = 1;
  Double_t backgroundWeight_Wbb= 1;
  
  
  
  // --- Here can add an arbitrary number of signal or background trees
  factory->AddSignalTree    ( signal_WH,     signalWeight_WH     );
  factory->AddBackgroundTree( background_TT, backgroundWeight_TT );
  factory->AddBackgroundTree( background_Wbb, backgroundWeight_Wbb);


 Float_t nhiggsJetSubjets;
  Float_t njets;
  Float_t deltaPhiWH;
  Float_t jetPt[11];
  Float_t jetEta[11];
  Float_t jetPhi[11];
  Float_t jetE[11];
  Float_t higgsJetSubjetEta[3];
  Float_t higgsJetSubjetPhi[3];
  Float_t higgsJetSubjetPt[3];
  Float_t higgsJetSubjetE[3];
  Float_t Wphi;
  Float_t fatjetPt[15];
  Float_t fatjetEta[15];
  Float_t fatjetPhi[15];
  Float_t fatjetE[15];
  


  TLorentzVector fatjet(0,0,0,0);
  TLorentzVector subjet1(0,0,0,0);
  TLorentzVector subjet2(0,0,0,0);
  TLorentzVector subjet3(0,0,0,0);
  TLorentzVector akt[15];
  fatjet.SetPtEtaPhiE(fatjetPt[0],fatjetEta[0],fatjetPhi[0], fatjetE[0]);
  deltaPhiWH = std::fabs(Wphi-fatjetPhi[0]);
  if(deltaPhiWH>TMath::Pi()){ std::fabs(deltaPhiWH = 2*TMath::Pi()-deltaPhiWH);} 


  float deltaEta[3][11];
  float deltaPhi[3][11];
  float DeltaR[3][13];
  
  for(int j = 0; j<nhiggsJetSubjets; j++){
    for(int i = 0; i<njets; i++){
      deltaEta[j][i] = std::fabs(higgsJetSubjetEta[j]-jetEta[i]);
      deltaPhi[j][i] = std::fabs(higgsJetSubjetPhi[j]-jetPhi[i]);
      
      DeltaR[j][i] = std::sqrt(deltaEta[j][i]*deltaEta[j][i]+deltaPhi[j][i]*deltaPhi[j][i]);
    }
  }

   for (int i=0; i < njets; i++) {
	   akt[i].SetPtEtaPhiE(jetPt[i],jetEta[i],jetPhi[i], jetE[i]);
	 }

	 subjet1.SetPtEtaPhiE(higgsJetSubjetPt[0],higgsJetSubjetEta[0],higgsJetSubjetPhi[0], higgsJetSubjetE[0]);
	 subjet2.SetPtEtaPhiE(higgsJetSubjetPt[1],higgsJetSubjetEta[1],higgsJetSubjetPhi[1], higgsJetSubjetE[1]);
	 FatHiggs = subjet1 + subjet2;


	 if (nhiggsJetSubjets>2){
	   subjet3.SetPtEtaPhiE(higgsJetSubjetPt[2],higgsJetSubjetEta[2],higgsJetSubjetPhi[2], higgsJetSubjetE[2]);
	 }



  
   
  // To give different trees for training and testing, do:
  //    factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" );
  //    factory->AddSignalTree( signalTestTree,     signalTestWeight,  "Test" );
   

  // Set individual event weights (the variables must exist in the original TTree)
  //    for signal    : factory->SetSignalWeightExpression    ("weight1*weight2");
  //    for background: factory->SetBackgroundWeightExpression("wei


  // --- Apply additional cuts on the signal and background samples (can be different)
  // PLEASE NOTE: If no cuts are required, do NOT leave any spaces between quotation marks, otherwise an error will occur.
  TCut mycuts = "deltaPhiWH>-800"; //Signal Tree Cuts
  TCut mycutb = "deltaPhiWH>-800"; //Background Tree Cuts

  //cout << "DeltaPhiWH: " << deltaPhiWH << endl;
  

  // Tell the factory how to use the training and testing events
  //
  // If no numbers of events are given, half of the events in the tree are used 
  // for training, and the other half for testing:
  //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
  // To also specify the number of testing events, use:
  //    factory->PrepareTrainingAndTestTree( mycut,
  //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );
  factory->PrepareTrainingAndTestTree( mycuts, mycutb,
				       "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:!V:NormMode=NumEvents");

  // ---- Book MVA methods
  //
  // Please lookup the various method configuration options in the corresponding cxx files, eg:
  // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
  // it is possible to preset ranges in the option string in which the cut optimisation should be done:
  // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

  // --- Cut optimisation
  if (Use["Cuts"])
    factory->BookMethod( TMVA::Types::kCuts, "Cuts",
			 "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

  if (Use["CutsD"])
    factory->BookMethod( TMVA::Types::kCuts, "CutsD",
			 "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

  if (Use["CutsPCA"])
    factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
			 "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

  if (Use["CutsGA"])
    factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
			 "H:!V:FitMethod=GA:CutRangeMin[1]=0:CutRangeMax[1]=450:VarProp[1]=FMax:EffSel:Steps=60:Cycles=3:PopSize=2000:SC_steps=10:SC_rate=5:SC_factor=0.95" );

  if (Use["CutsSA"])
    factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
			 "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

  // Likelihood ("naive Bayes estimator")
  if (Use["Likelihood"])
    factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
			 "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

  // Decorrelated likelihood
  if (Use["LikelihoodD"])
    factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
			 "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

  // PCA-transformed likelihood
  if (Use["LikelihoodPCA"])
    factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
			 "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 

  // Use a kernel density estimator to approximate the PDFs
  if (Use["LikelihoodKDE"])
    factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
			 "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 

  // Use a variable-dependent mix of splines and kernel density estimator
  if (Use["LikelihoodMIX"])
    factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
			 "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

  // Test the multi-dimensional probability density estimator
  // here are the options strings for the MinMax and RMS methods, respectively:
  //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
  //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
  if (Use["PDERS"])
    factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
			 "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

  if (Use["PDERSD"])
    factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
			 "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

  if (Use["PDERSPCA"])
    factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
			 "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

  // Multi-dimensional likelihood estimator using self-adapting phase-space binning
  if (Use["PDEFoam"])
    factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
			 "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );

  if (Use["PDEFoamBoost"])
    factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost",
			 "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );

  // K-Nearest Neighbour classifier (KNN)
  if (Use["KNN"])
    factory->BookMethod( TMVA::Types::kKNN, "KNN",
			 "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

  // H-Matrix (chi2-squared) method
  if (Use["HMatrix"])
    factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" );

  // Linear discriminant (same as Fisher discriminant)
  if (Use["LD"])
    factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

  // Fisher discriminant (same as LD)
  if (Use["Fisher"])
    factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

  // Fisher with Gauss-transformed input variables
  if (Use["FisherG"])
    factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

  // Composite classifier: ensemble (tree) of boosted Fisher classifiers
  if (Use["BoostedFisher"])
    factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", 
			 "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" );

  // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
  if (Use["FDA_MC"])
    factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
			 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

  if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
    factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
			 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

  if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
    factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
			 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

  if (Use["FDA_MT"])
    factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
			 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

  if (Use["FDA_GAMT"])
    factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
			 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

  if (Use["FDA_MCMT"])
    factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
			 "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

  // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
  if (Use["MLP"])
    factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );

  if (Use["MLPBFGS"])
    factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

  if (Use["MLPBNN"])
    factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators

  // CF(Clermont-Ferrand)ANN
  if (Use["CFMlpANN"])
    factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  

  // Tmlp(Root)ANN
  if (Use["TMlpANN"])
    factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...

  // Support Vector Machine
  if (Use["SVM"])
    factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

  // Boosted Decision Trees
  if (Use["BDTG"]) // Gradient Boost
    factory->BookMethod( TMVA::Types::kBDT, "BDTG",
		"!H:!V:NTrees=350:nEventsMin=100:MaxDepth=3:BoostType=Grad:Shrinkage=0.05:UseBaggedGrad:GradBaggingFraction=0.51:nCuts=65:PruneStrength=2:PruneMethod=ExpectedError:NNodesMax=7" ); 
  //"!H:!V:NTrees=350:nEventsMin=100:MaxDepth=3:BoostType=Grad:Shrinkage=0.05:UseBaggedGrad:GradBaggingFraction=0.55:nCuts=50:PruneStrength=2:PruneMethod=ExpectedError:NNodesMax=10" ); //nCuts=20 && no pruning originally

  if (Use["BDT"])  // Adaptive Boost
    factory->BookMethod( TMVA::Types::kBDT, "BDT",
			 "NTrees=550:nEventsMin=110:MaxDepth=5.5:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=10:PruneStrength=6:PruneMethod=ExpectedError");

  if (Use["BDTB"]) // Bagging
    factory->BookMethod( TMVA::Types::kBDT, "BDTB",
			 "!H:!V:NTrees=950:nEventsMin=20:MaxDepth=10:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

  if (Use["BDTD"]) // Decorrelation + Adaptive Boost
    factory->BookMethod( TMVA::Types::kBDT, "BDTD",
			 "!H:!V:NTrees=300:nEventsMin=20:MaxDepth=10:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=10:PruneStrength=6:PruneMethod=ExpectedError:VarTransform=Decorrelate" );

  // RuleFit -- TMVA implementation of Friedman's method
  if (Use["RuleFit"])
    factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
			 "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );


  // --------------------------------------------------------------------------------------------------

  // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

  // factory->OptimizeAllMethods("SigEffAt001","Scan");
  // factory->OptimizeAllMethods("SigEffAt001","FitGA");

  // --------------------------------------------------------------------------------------------------

  // ---- Now you can tell the factory to train, test, and evaluate the MVAs

  // Train MVAs using the set of training events
  factory->TrainAllMethods();

  // ---- Evaluate all MVAs using the set of test events
  factory->TestAllMethods();

  // ----- Evaluate and compare performance of all configured MVAs
  factory->EvaluateAllMethods();

  // --------------------------------------------------------------

  // Save the output
  outputFile->Close();

  std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
  std::cout << "==> TMVAClassification is done!" << std::endl;

  delete factory;

  // Launch the GUI for the root macros
  if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
示例#9
0
void ZTMVAClassification( TString myMethodList = "" )
{
   

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   // --- Cut optimisation
   Use["Cuts"]            = 0;
   Use["CutsD"]           = 0;
   Use["CutsPCA"]         = 0;
   Use["CutsGA"]          = 0;
   Use["CutsSA"]          = 0;
   // 
   // --- 1-dimensional likelihood ("naive Bayes estimator")
   Use["Likelihood"]      = 0;
   Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
   Use["LikelihoodPCA"]   = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
   Use["LikelihoodKDE"]   = 0;
   Use["LikelihoodMIX"]   = 0;
   //
   // --- Mutidimensional likelihood and Nearest-Neighbour methods
   Use["PDERS"]           = 0;
   Use["PDERSD"]          = 0;
   Use["PDERSPCA"]        = 0;
   Use["PDEFoam"]         = 0;
   Use["PDEFoamBoost"]    = 0; // uses generalised MVA method boosting
   Use["KNN"]             = 0; // k-nearest neighbour method
   //
   // --- Linear Discriminant Analysis
   Use["LD"]              = 0; // Linear Discriminant identical to Fisher
   Use["Fisher"]          = 0;
   Use["FisherG"]         = 0;
   Use["BoostedFisher"]   = 0; // uses generalised MVA method boosting
   Use["HMatrix"]         = 0;
   //
   // --- Function Discriminant analysis
   Use["FDA_GA"]          = 0; // minimisation of user-defined function using Genetics Algorithm
   Use["FDA_SA"]          = 0;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   Use["FDA_MCMT"]        = 0;
   //
   // --- Neural Networks (all are feed-forward Multilayer Perceptrons)
   Use["MLP"]             = 1; // Recommended ANN
   Use["MLPBFGS"]         = 0; // Recommended ANN with optional training method
   Use["MLPBNN"]          = 0; // Recommended ANN with BFGS training method and bayesian regulator
   Use["CFMlpANN"]        = 0; // Depreciated ANN from ALEPH
   Use["TMlpANN"]         = 0; // ROOT's own ANN
   //
   // --- Support Vector Machine 
   Use["SVM"]             = 0;
   // 
   // --- Boosted Decision Trees
   Use["BDT"]             = 1; // uses Adaptive Boost
   Use["BDTG"]            = 1; // uses Gradient Boost
   Use["BDTB"]            = 0; // uses Bagging
   Use["BDTD"]            = 1; // decorrelation + Adaptive Boost
   Use["BDTF"]            = 0; // allow usage of fisher discriminant for node splitting 
   // 
   // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
   Use["RuleFit"]         = 0;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;



   // --------------------------------------------------------------------------------------------------

   // --- Here the preparation phase begins

   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   TString outfileName( "TMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );


   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );

   //factory->AddVariable( "maxpioneta", "maxpioneta", "", 'F' );
   //factory->AddVariable( "minpioneta", "minpioneta", "", 'F' );
   factory->AddVariable( "nTT", "nTT", "", 'F' );
   // factory->AddVariable( "pidpimin", "pidpimin", "", 'F' );
   // factory->AddVariable( "pidpimax", "pidpimax", "", 'F' );
   factory->AddVariable( "normxpt", "normxpt", "", 'F' );
   factory->AddVariable( "eta", "eta", "", 'F' );
   //factory->AddVariable( "phi", "phi", "", 'F' );
   //  factory->AddVariable( "normptsum", "normptsum", "", 'F' );
    //factory->AddVariable( "ptAsym", "ptAsym", "", 'F' );
   //factory->AddVariable( "dphimax", "dphimax", "", 'F' );
 //factory->AddVariable( "dphimin", "dphimin", "", 'F' );
   //factory->AddVariable( "drmax", "drmax", "", 'F' );
   // factory->AddVariable( "drmin", "drmin", "", 'F' );
    //    factory->AddVariable( "normpionp", "normpionp", "", 'F' );
    factory->AddVariable( "normminpionpt", "normminpionpt", "", 'F' );
    //factory->AddVariable( "normminpionp", "normminpionp", "", 'F' );
    factory->AddVariable( "normmaxpionpt", "normmaxpionpt", "", 'F' );
    //  factory->AddVariable( "normptj", "normptj", "", 'F' );
    //factory->AddVariable( "jmasspull", "jmasspull", "", 'F' );
    //factory->AddVariable( "vchi2dof", "vchi2dof", "", 'F' );
    //    factory->AddVariable("maxchi2","maxchi2","", 'F');  
    // factory->AddVariable("normr","normr","", 'F');    
    //    factory->AddVariable("normq","normq","", 'F'); 
    //factory->AddVariable("normminm","normminm","", 'F'); 
    factory->AddVariable("logipmax","logipmax","", 'F'); 
    factory->AddVariable("logipmin","logipmin","", 'F'); 
    factory->AddVariable("logfd","logfd",'F');
    factory->AddVariable("logvd","logvd",'F');
    //factory->AddVariable("pointAngle","pointingAngle",'F');
    factory->AddVariable("logvpi","",'F');
    //factory->AddVariable("logmaxprob","",'F');
    //factory->AddVariable("logminprob","",'F');
 
    factory->AddSpectator( "mReFit", "mReFit", "", 'D' );
    //    factory->AddSpectator( "Qdecay", "Qdecay", "",'F' );
    //  factory->AddSpectator( "m23", "m23", "",'F' );
    
    //   TFile * input_Background = new TFile("../back.root");
   TFile * input_Signal = new TFile("../cmx12.root");
   TFile * input_Background = new TFile("../background12.root");
   std::cout << "--- TMVAClassification       : Using input file for signal    : " << input_Signal->GetName() << std::endl;
   std::cout << "--- TMVAClassification       : Using input file for backgound : " << input_Background->GetName() << std::endl;
   
   // --- Register the training and test trees

   TTree *signal     = (TTree*)input_Signal->Get("psiCand");
   TTree *background = (TTree*)input_Background->Get("psiCand");
   
   // global event weights per tree (see below for setting event-wise weights)
   Double_t signalWeight     = 1.0;
   Double_t backgroundWeight = 1.0;
   
   // You can add an arbitrary number of signal or background trees
   factory->AddSignalTree    ( signal,     signalWeight     );
   factory->AddBackgroundTree( background, backgroundWeight );
   

   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = "QDecay < 300&&fdchi2 > 300"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   TCut mycutb = "QDecay < 300&&fdchi2> 300"; // for example: TCut mycutb = "abs(var1)<0.5";

   
   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

   // ---- Book MVA methods
   //
   // Please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["CutsD"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsD",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

   if (Use["CutsPCA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

   if (Use["CutsGA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                           "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );

   if (Use["CutsSA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                           "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   // Likelihood ("naive Bayes estimator")
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                           "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

   // Decorrelated likelihood
   if (Use["LikelihoodD"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
                           "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

   // PCA-transformed likelihood
   if (Use["LikelihoodPCA"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 

   // Use a kernel density estimator to approximate the PDFs
   if (Use["LikelihoodKDE"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 

   // Use a variable-dependent mix of splines and kernel density estimator
   if (Use["LikelihoodMIX"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
                           "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

   // Test the multi-dimensional probability density estimator
   // here are the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSD"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

   if (Use["PDERSPCA"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

   // Multi-dimensional likelihood estimator using self-adapting phase-space binning
   if (Use["PDEFoam"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                           "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );

   if (Use["PDEFoamBoost"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost",
                           "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN",
                           "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // H-Matrix (chi2-squared) method
   if (Use["HMatrix"])
      factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" );

   // Linear discriminant (same as Fisher discriminant)
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher discriminant (same as LD)
   if (Use["Fisher"])
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher with Gauss-transformed input variables
   if (Use["FisherG"])
      factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

   // Composite classifier: ensemble (tree) of boosted Fisher classifiers
   if (Use["BoostedFisher"])
      factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", 
                           "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" );

   // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

   if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   if (Use["FDA_MT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   if (Use["FDA_MCMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );


   // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
   if (Use["MLP"]){
     // factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );
     //factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=Norm:NCycles=600:HiddenLayers=N+5:TestRate=5" );
     // factory->BookMethod( TMVA::Types::kMLP, "MLPCE", "H:!V:NeuronType=sigmoid:VarTransform=Norm:NCycles=600:HiddenLayers=N+5:TestRate=5:EstimatorType=CE" );
     factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=sigmoid:VarTransform=Norm:NCycles=600:HiddenLayers=9:TestRate=5:EstimatorType=CE" );
     //   factory->BookMethod( TMVA::Types::kMLP, "MLPCE83", "H:!V:NeuronType=tanh:VarTransform=Norm:NCycles=600:HiddenLayers=8,3:TestRate=5:EstimatorType=CE" );
   }

   if (Use["MLPBFGS"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

   if (Use["MLPBNN"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators

   // CF(Clermont-Ferrand)ANN
   if (Use["CFMlpANN"])
      factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  

   // Tmlp(Root)ANN
   if (Use["TMlpANN"])
      factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDTG"]) { // Gradient Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                           "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5" );
      //  factory->BookMethod( TMVA::Types::kBDT, "BDTGI",
      //                     "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5:SeparationType=GiniIndexWithLaplace" );

      //     factory->BookMethod( TMVA::Types::kBDT, "BDTG6",
      //                      "!H:!V:NTrees=600:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.4:nCuts=20:NNodesMax=6" );
      //factory->BookMethod( TMVA::Types::kBDT, "BDTG2",
      //                    "!H:!V:NTrees=800:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.4:nCuts=20:NNodesMax=6" );
      factory->BookMethod( TMVA::Types::kBDT, "BDTG3",
                           "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.4:nCuts=20:NNodesMax=6" );
      // factory->BookMethod( TMVA::Types::kBDT, "BDTG4",
      //                     "!H:!V:NTrees=1200:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.4:nCuts=20:NNodesMax=6" );
      // factory->BookMethod( TMVA::Types::kBDT, "BDTG5",
      //                     "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.4:nCuts=20:NNodesMax=5" );

  }
   if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );


   if (Use["BDTB"]) // Bagging
      factory->BookMethod( TMVA::Types::kBDT, "BDTB",
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   if (Use["BDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTD",
                           "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );

   if (Use["BDTF"])  // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables
      factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher",
                           "!H:!V:NTrees=50:nEventsMin=150:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   // RuleFit -- TMVA implementation of Friedman's method
   if (Use["RuleFit"])
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                           "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

   // For an example of the category classifier usage, see: TMVAClassificationCategory

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

   // factory->OptimizeAllMethods("SigEffAt001","Scan");
   // factory->OptimizeAllMethods("ROCIntegral","GA");

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
   //  if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
//require mumucl>0.6
//opening angle >10
//coplanarity >90
//pang<90
void TMVAClassification_cc1pcoh_bdt_ver6noveract( TString myMethodList = "" )
{
    //---------------------------------------------------------------
    // This loads the library
    TMVA::Tools::Instance();

    // to get access to the GUI and all tmva macros
    TString thisdir = gSystem->DirName(gInterpreter->GetCurrentMacroName());
    gROOT->SetMacroPath(thisdir + ":" + gROOT->GetMacroPath());
    gROOT->ProcessLine(".L TMVAGui.C");

    // Default MVA methods to be trained + tested
    std::map<std::string,int> Use;

    // --- Cut optimisation
    Use["Cuts"]            = 1;
    Use["CutsD"]           = 1;
    Use["CutsPCA"]         = 0;
    Use["CutsGA"]          = 0;
    Use["CutsSA"]          = 0;
    //
    // --- 1-dimensional likelihood ("naive Bayes estimator")
    Use["Likelihood"]      = 1;
    Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
    Use["LikelihoodPCA"]   = 1; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
    Use["LikelihoodKDE"]   = 0;
    Use["LikelihoodMIX"]   = 0;
    //
    // --- Mutidimensional likelihood and Nearest-Neighbour methods
    Use["PDERS"]           = 1;
    Use["PDERSD"]          = 0;
    Use["PDERSPCA"]        = 0;
    Use["PDEFoam"]         = 1;
    Use["PDEFoamBoost"]    = 0; // uses generalised MVA method boosting
    Use["KNN"]             = 1; // k-nearest neighbour method
    //
    // --- Linear Discriminant Analysis
    Use["LD"]              = 1; // Linear Discriminant identical to Fisher
    Use["Fisher"]          = 0;
    Use["FisherG"]         = 0;
    Use["BoostedFisher"]   = 0; // uses generalised MVA method boosting
    Use["HMatrix"]         = 0;
    //
    // --- Function Discriminant analysis
    Use["FDA_GA"]          = 1; // minimisation of user-defined function using Genetics Algorithm
    Use["FDA_SA"]          = 0;
    Use["FDA_MC"]          = 0;
    Use["FDA_MT"]          = 0;
    Use["FDA_GAMT"]        = 0;
    Use["FDA_MCMT"]        = 0;
    //
    // --- Neural Networks (all are feed-forward Multilayer Perceptrons)
    Use["MLP"]             = 0; // Recommended ANN
    Use["MLPBFGS"]         = 0; // Recommended ANN with optional training method
    Use["MLPBNN"]          = 1; // Recommended ANN with BFGS training method and bayesian regulator
    Use["CFMlpANN"]        = 0; // Depreciated ANN from ALEPH
    Use["TMlpANN"]         = 0; // ROOT's own ANN
    //
    // --- Support Vector Machine
    Use["SVM"]             = 1;
    //
    // --- Boosted Decision Trees
    Use["BDT"]             = 1; // uses Adaptive Boost
    Use["BDTG"]            = 0; // uses Gradient Boost
    Use["BDTB"]            = 0; // uses Bagging
    Use["BDTD"]            = 0; // decorrelation + Adaptive Boost
    Use["BDTF"]            = 0; // allow usage of fisher discriminant for node splitting
    //
    // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
    Use["RuleFit"]         = 1;
    
    // ---------------------------------------------------------------
    // Choose method
    std::cout << std::endl;
    std::cout << "==> Start TMVAClassification" << std::endl;

    // Select methods (don't look at this code - not of interest)
    if (myMethodList != "") {
        for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

        std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
        for (UInt_t i=0; i<mlist.size(); i++) {
            std::string regMethod(mlist[i]);

            if (Use.find(regMethod) == Use.end()) {
                std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
                for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
                std::cout << std::endl;
                return;
            }
            Use[regMethod] = 1;
        }
    }

    // ---------------------------------------------------------------
    // --- Here the preparation phase begins

    // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
    TString outfileName( "TMVA_cc1pcoh_bdt_ver6noveract.root" );//newchange
    TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

    // Create the factory object.
    TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification_ver6noveract", outputFile,//newchange
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );

    
    // Add variable
    //sprintf(select,  "Ntrack==2&&mumucl>0.6&&pmucl>0.25&&pang<90&&muang_t<15 && veract*7.66339869e-2<34");
    //factory->AddVariable( "Ntrack", 'F' );
    factory->AddVariable( "mumucl", 'F' );
    factory->AddVariable( "pmucl", 'F' );
    factory->AddVariable( "pang_t", 'F' );//use pang instead of pang_t
    factory->AddVariable( "muang_t", 'F' );
    //factory->AddVariable( "veract", 'F' );
    factory->AddVariable( "ppe", 'F');
    factory->AddVariable( "mupe", 'F');
    factory->AddVariable( "range", 'F');
    factory->AddVariable( "coplanarity", 'F');
    factory->AddVariable( "opening", 'F');//newadd

    // Add spectator
    factory->AddSpectator( "fileIndex", 'I' );
    factory->AddSpectator( "nuE", 'F' );
    factory->AddSpectator( "inttype", 'I' );
    factory->AddSpectator( "norm", 'F' );
    factory->AddSpectator( "totcrsne", 'F' );
    factory->AddSpectator( "veract", 'F' );
    factory->AddSpectator( "pang", 'F' );
    factory->AddSpectator( "mupdg", 'I' );
    factory->AddSpectator( "ppdg", 'I' );

    // ---------------------------------------------------------------
    // --- Get weight
    TString fratioStr="/home/kikawa/macros/nd34_tuned_11bv3.1_250ka.root";
    
    
    
    // ---------------------------------------------------------------
    // --- Add sample
    TString fsignalStr="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmva/pm_merged_ccqe_tot.root";
    TString fbarStr="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmva/pmbar_merged_ccqe.root";
    TString fbkgStr="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmva/wall_merged_ccqe_tot.root";
    TString fbkg2Str="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmva/ingrid_merged_nd3_ccqe_tot.root";
    /*TString fsignalStr="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmvafix/pm_merged_ccqe_tot.root";
    TString fbarStr="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmvafix/pmbar_merged_ccqe.root";
    TString fbkgStr="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmvafix/wall_merged_ccqe_tot.root";
    TString fbkg2Str="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmvafix/ingrid_merged_nd3_ccqe_tot.root";*/

    
    TFile *pfileSignal = new TFile(fsignalStr);
    TFile *pfileBar = new TFile(fbarStr);
    TFile *pfileBkg = new TFile(fbkgStr);
    TFile *pfileBkg2 = new TFile(fbkg2Str);
    TFile *pfileRatio = new TFile(fratioStr);
    
    TTree *ptree_sig  = (TTree*)pfileSignal->Get("tree");
    TTree *ptree_bar  = (TTree*)pfileBar->Get("tree");
    TTree *ptree_bkg   = (TTree*)pfileBkg->Get("tree");
    TTree *ptree_bkg2  = (TTree*)pfileBkg2->Get("tree");
    
    // POT normalization
    const int   nmcFile  = 3950;
    const int   nbarFile  = 986;
    const int   nbkgFile  = 55546;//(31085+24461);
    const int   nbkg2File  = 7882;//(3941+3941);
    

   
    // global event weights per tree (see below for setting event-wise weights)
    // adding for signal sample
    // using this as standard and add other later
    Double_t signalWeight_sig     = 1.0;
    Double_t backgroundWeight_sig = 1.0;
   
    factory->AddSignalTree    ( ptree_sig,     signalWeight_sig );
    factory->AddBackgroundTree( ptree_sig, backgroundWeight_sig );
    
    // Add Numubar sample
    //Double_t signalWeight_bar     = nmcFile/float(nbarFile);
    Double_t backgroundWeight_bar = nmcFile/float(nbarFile);
    
    //factory->AddSignalTree    ( ptree_bar,     signalWeight_bar );
    factory->AddBackgroundTree( ptree_bar, backgroundWeight_bar );
    
    // Add wall background
    //Double_t signalWeight_bkg     = nmcFile/float(nbkgFile);
    Double_t backgroundWeight_bkg = nmcFile/float(nbkgFile);
    
    //factory->AddSignalTree    ( ptree_bkg,     signalWeight_bkg );
    factory->AddBackgroundTree( ptree_bkg, backgroundWeight_bkg );
    
    // Add INGRID background
    //Double_t signalWeight_bkg2     = nmcFile/float(nbkg2File);
    Double_t backgroundWeight_bkg2 = nmcFile/float(nbkg2File);
    
    //factory->AddSignalTree    ( ptree_bkg2,     signalWeight_bkg2 );
    factory->AddBackgroundTree( ptree_bkg2, backgroundWeight_bkg2 );
    
   
   
    //factory->SetSignalWeightExpression    ("norm*totcrsne*2.8647e-13");
    //factory->SetBackgroundWeightExpression( "norm*totcrsne*2.8647e-13" );

   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = "Ntrack==2 && abs(inttype)==16 && fileIndex==1 && pang<90 && mumucl>0.6 && opening>10 && coplanarity>90 && pmucl>0.2"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   TCut mycutb = "Ntrack==2 && (abs(inttype)!=16 || fileIndex>1) && pang<90 && mumucl>0.6 && opening>10 && coplanarity>90 && pmucl>0.2"; // for example: TCut mycutb = "abs(var1)<0.5";

   // Tell the factory how to use the training and testing events
   //
   // If no numbers of events are given, half of the events in the tree are used 
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut,
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );
   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

   // ---- Book MVA methods
   //
   // Please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["CutsD"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsD",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

   if (Use["CutsPCA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

   if (Use["CutsGA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                           "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );

   if (Use["CutsSA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                           "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   // Likelihood ("naive Bayes estimator")
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                           "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

   // Decorrelated likelihood
   if (Use["LikelihoodD"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
                           "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

   // PCA-transformed likelihood
   if (Use["LikelihoodPCA"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 

   // Use a kernel density estimator to approximate the PDFs
   if (Use["LikelihoodKDE"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 

   // Use a variable-dependent mix of splines and kernel density estimator
   if (Use["LikelihoodMIX"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
                           "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

   // Test the multi-dimensional probability density estimator
   // here are the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSD"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

   if (Use["PDERSPCA"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

   // Multi-dimensional likelihood estimator using self-adapting phase-space binning
   if (Use["PDEFoam"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                           "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );

   if (Use["PDEFoamBoost"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost",
                           "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN",
                           "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // H-Matrix (chi2-squared) method
   if (Use["HMatrix"])
      factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" );

   // Linear discriminant (same as Fisher discriminant)
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher discriminant (same as LD)
   if (Use["Fisher"])
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher with Gauss-transformed input variables
   if (Use["FisherG"])
      factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

   // Composite classifier: ensemble (tree) of boosted Fisher classifiers
   if (Use["BoostedFisher"])
      factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", 
                           "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" );

   // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

   if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   if (Use["FDA_MT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   if (Use["FDA_MCMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

   // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );

   if (Use["MLPBFGS"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

   if (Use["MLPBNN"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators

   // CF(Clermont-Ferrand)ANN
   if (Use["CFMlpANN"])
      factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  

   // Tmlp(Root)ANN
   if (Use["TMlpANN"])
      factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDTG"]) // Gradient Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                           "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" );

   /*if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );*/
    if (Use["BDT"])  // Adaptive Boost
        factory->BookMethod( TMVA::Types::kBDT, "BDT",
                            "!H:!V:NTrees=850:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20" );

   if (Use["BDTB"]) // Bagging
      factory->BookMethod( TMVA::Types::kBDT, "BDTB",
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" );

   if (Use["BDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTD",
                           "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" );

   if (Use["BDTF"])  // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables
      factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher",
                           "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" );

   // RuleFit -- TMVA implementation of Friedman's method
   if (Use["RuleFit"])
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                           "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

   // For an example of the category classifier usage, see: TMVAClassificationCategory

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

   // ---- STILL EXPERIMENTAL and only implemented for BDT's ! 
   // factory->OptimizeAllMethods("SigEffAt001","Scan");
   // factory->OptimizeAllMethods("ROCIntegral","FitGA");

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

    
   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
   //if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
void TMVAClassificationCategory()
{
    //---------------------------------------------------------------
    // Example for usage of different event categories with classifiers

    std::cout << std::endl << "==> Start TMVAClassificationCategory" << std::endl;

    bool batchMode = false;

    // Create a new root output file.
    TString outfileName( "TMVA.root" );
    TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

    // Create the factory object (see TMVAClassification.C for more information)

    std::string factoryOptions( "!V:!Silent:Transformations=I;D;P;G,D" );
    if (batchMode) factoryOptions += ":!Color:!DrawProgressBar";

    TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationCategory", outputFile, factoryOptions );

    // Define the input variables used for the MVA training
    factory->AddVariable( "var1", 'F' );
    factory->AddVariable( "var2", 'F' );
    factory->AddVariable( "var3", 'F' );
    factory->AddVariable( "var4", 'F' );

    // You can add so-called "Spectator variables", which are not used in the MVA training,
    // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
    // input variables, the response values of all trained MVAs, and the spectator variables
    factory->AddSpectator( "eta" );

    // Load the signal and background event samples from ROOT trees
    TFile *input(0);
    TString fname( "" );
    if (UseOffsetMethod) fname = "data/toy_sigbkg_categ_offset.root";
    else                 fname = "data/toy_sigbkg_categ_varoff.root";
    if (!gSystem->AccessPathName( fname )) {
        // first we try to find tmva_example.root in the local directory
        std::cout << "--- TMVAClassificationCategory: Accessing " << fname << std::endl;
        input = TFile::Open( fname );
    }

    if (!input) {
        std::cout << "ERROR: could not open data file: " << fname << std::endl;
        exit(1);
    }

    TTree *signal     = (TTree*)input->Get("TreeS");
    TTree *background = (TTree*)input->Get("TreeB");

    /// Global event weights per tree (see below for setting event-wise weights)
    Double_t signalWeight     = 1.0;
    Double_t backgroundWeight = 1.0;

    /// You can add an arbitrary number of signal or background trees
    factory->AddSignalTree    ( signal,     signalWeight     );
    factory->AddBackgroundTree( background, backgroundWeight );

    // Apply additional cuts on the signal and background samples (can be different)
    TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
    TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5";

    // Tell the factory how to use the training and testing events
    factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                         "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

    // ---- Book MVA methods

    // Fisher discriminant
    factory->BookMethod( TMVA::Types::kFisher, "Fisher", "!H:!V:Fisher" );

    // Likelihood
    factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                         "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

    // --- Categorised classifier
    TMVA::MethodCategory* mcat = 0;

    // The variable sets
    TString theCat1Vars = "var1:var2:var3:var4";
    TString theCat2Vars = (UseOffsetMethod ? "var1:var2:var3:var4" : "var1:var2:var3");

    // Fisher with categories
    TMVA::MethodBase* fiCat = factory->BookMethod( TMVA::Types::kCategory, "FisherCat","" );
    mcat = dynamic_cast<TMVA::MethodCategory*>(fiCat);
    mcat->AddMethod( "abs(eta)<=1.3", theCat1Vars, TMVA::Types::kFisher, "Category_Fisher_1","!H:!V:Fisher" );
    mcat->AddMethod( "abs(eta)>1.3",  theCat2Vars, TMVA::Types::kFisher, "Category_Fisher_2","!H:!V:Fisher" );

    // Likelihood with categories
    TMVA::MethodBase* liCat = factory->BookMethod( TMVA::Types::kCategory, "LikelihoodCat","" );
    mcat = dynamic_cast<TMVA::MethodCategory*>(liCat);
    mcat->AddMethod( "abs(eta)<=1.3",theCat1Vars, TMVA::Types::kLikelihood,
                     "Category_Likelihood_1","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );
    mcat->AddMethod( "abs(eta)>1.3", theCat2Vars, TMVA::Types::kLikelihood,
                     "Category_Likelihood_2","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

    // ---- Now you can tell the factory to train, test, and evaluate the MVAs

    // Train MVAs using the set of training events
    factory->TrainAllMethods();

    // ---- Evaluate all MVAs using the set of test events
    factory->TestAllMethods();

    // ----- Evaluate and compare performance of all configured MVAs
    factory->EvaluateAllMethods();

    // --------------------------------------------------------------

    // Save the output
    outputFile->Close();

    std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
    std::cout << "==> TMVAClassificationCategory is done!" << std::endl;

    // Clean up
    delete factory;

    // Launch the GUI for the root macros
    if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
示例#12
0
void classifyBDT(TString inputVariables = "trainingVars.txt",
                 TString signalName = "/mnt/hscratch/dabercro/skims2/BDT_Signal.root",
                 TString backName = "/mnt/hscratch/dabercro/skims2/BDT_Background.root") {
  TMVA::Tools::Instance();
  std::cout << "==> Start TMVAClassification" << std::endl;

   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   TString outfileName( "TMVA/TMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationCategory", outputFile,
					       "!V:!Silent:Color:DrawProgressBar:Transformations=I;N" );

   // A very simple MVA (feel free to uncomment and comment what you like) => as a rule of thumb 10-20 variables is where people start to get worried about total number

   ifstream configFile;
   configFile.open(inputVariables.Data());
   TString tempFormula;

   configFile >> tempFormula;  // Is the name of the BDT
   while(!configFile.eof()){
     configFile >> tempFormula;
     if(tempFormula != ""){
       factory->AddVariable(tempFormula,'F');
     }
   }

   TString lVars;

   // TCut lCut   = "jet1qg2<2.&&jet1pt>250.&&jet1pullAngle>-5.";// < 10 && jet1mass_m2 > 60 && jet1mass_m2 < 120";
   // TCut lCut = "passZ > 3  && fjet1pt > 250 && fjet1MassPruned < 120 && fatjetid < 2";
   TCut lCut   = "abs(fjet1PartonId)!=24&&abs(fjet1PartonId)!=23";
   // std::string lEventCut = "event % 2 == 1";
   // lCut += lEventCut.c_str();

   // TCut lSCut = "passT > 0   && fjet1pt > 250 && fjet1MassPruned < 120 && abs(fjet1PartonId) == 24&& fatjetid < 2";
   TCut lSCut   = "abs(fjet1PartonId)==24||abs(fjet1PartonId)==23";
   // lSCut += lEventCut.c_str();

   TCut cleanCut = "fjet1QGtagSub2 > -10 && fjet1PullAngle > -4 && abs(fjet1pt/fjet1MassTrimmed)<200 && abs(fjet1pt/fjet1MassPruned)<200";

   TFile *lSAInput = TFile::Open(signalName);
   TTree   *lSASignal    = (TTree*)lSAInput    ->Get("DMSTree"); 
   TFile *lSBInput = TFile::Open(backName);
   TTree   *lSBSignal    = (TTree*)lSBInput    ->Get("DMSTree"); 
   
   Double_t lSWeight = 1.0;
   Double_t lBWeight = 1.0;
   gROOT->cd( outfileName+TString(":/") );   
   factory->AddSignalTree    ( lSASignal, lSWeight );
   
   gROOT->cd( outfileName+TString(":/") );   
   factory->AddBackgroundTree( lSBSignal, lBWeight );
   
   factory->SetWeightExpression("weight");
   std::stringstream pSignal,pBackground;
   pSignal << "nTrain_Signal="<< lSASignal->GetEntries() << ":nTrain_Background=" << lSBSignal->GetEntries();
   // factory->PrepareTrainingAndTestTree( lSCut, lCut,(pSignal.str()+":SplitMode=Block:NormMode=NumEvents:!V").c_str() );
   factory->PrepareTrainingAndTestTree(lSCut&&cleanCut,lCut&&cleanCut,"nTrain_Signal=0:nTrain_Background=0:SplitMode=Alternate:NormMode=NumEvents:!V");
   std::string lName = "alpha_VBF";
   TString lBDTDef   = "!H:!V:NTrees=400:BoostType=Grad:Shrinkage=0.1:UseBaggedGrad=F:nCuts=2000:NNodesMax=10000:MaxDepth=5:UseYesNoLeaf=F:nEventsMin=200";
//    TString lBDTDef   = "!H:!V:NTrees=400:BoostType=Grad:Shrinkage=0.1:UseBaggedGrad=F:nCuts=2000:MaxDepth=5:UseYesNoLeaf=F:MinNodeSize=0.086:NegWeightTreatment=IgnoreNegWeightsInTraining";
   factory->BookMethod(TMVA::Types::kBDT,"BDT_simple_alpha",lBDTDef);   
   factory->TrainAllMethods();
   factory->TestAllMethods();
   factory->EvaluateAllMethods();
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;
   delete factory;
   //if (!gROOT->IsBatch()) TMVAGui( outfileName );
   //TString lBDTDef   = "!H:!V:NTrees=100:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad=F:nCuts=2000:NNodesMax=10000:MaxDepth=3:SeparationType=GiniIndex";
}
示例#13
0
/**********************************************************************************
 * Project   : TMVA - a ROOT-integrated toolkit for multivariate data analysis    *
 * Package   : TMVA                                                               *
 * Root Macro: TMVAClassification                                                 *
 *                                                                                *
 * This macro provides examples for the training and testing of the               *
 * TMVA classifiers.                                                              *
 *                                                                                *
 * As input data is used a toy-MC sample consisting of four Gaussian-distributed  *
 * and linearly correlated input variables.                                       *
 *                                                                                *
 * The methods to be used can be switched on and off by means of booleans, or     *
 * via the prompt command, for example:                                           *
 *                                                                                *
 *    root -l ./TMVAClassification.C\(\"Fisher,Likelihood\"\)                     *
 *                                                                                *
 * (note that the backslashes are mandatory)                                      *
 * If no method given, a default set of classifiers is used.                      *
 *                                                                                *
 * The output file "TMVA.root" can be analysed with the use of dedicated          *
 * macros (simply say: root -l <macro.C>), which can be conveniently              *
 * invoked through a GUI that will appear at the end of the run of this macro.    *
 * Launch the GUI via the command:                                                *
 *                                                                                *
 *    root -l ./TMVAGui.C                                                         *
 *                                                                                *
 **********************************************************************************/
void TMVAClassification( TString myMethodList = "")
{
   TTree *signal     = (TTree *)gDirectory->Get("VertexG");

   if (! signal) { std::cout << "No signal TTree" << std::endl; return;}

   TTree *background = (TTree *)gDirectory->Get("VertexB");

   if (! background) { std::cout << "No background TTree" << std::endl; return;}

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // Default MVA methods to be trained + tested
   std::map<std::string, int> Use;

   // --- Cut optimisation
   Use["Cuts"]            = 1;
   Use["CutsD"]           = 1;
   Use["CutsPCA"]         = 0;
   Use["CutsGA"]          = 0;
   Use["CutsSA"]          = 0;
   //
   // --- 1-dimensional likelihood ("naive Bayes estimator")
   Use["Likelihood"]      = 1;
   Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
   Use["LikelihoodPCA"]   = 1; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
   Use["LikelihoodKDE"]   = 0;
   Use["LikelihoodMIX"]   = 0;
   //
   // --- Mutidimensional likelihood and Nearest-Neighbour methods
   Use["PDERS"]           = 1;
   Use["PDERSD"]          = 0;
   Use["PDERSPCA"]        = 0;
   Use["PDEFoam"]         = 1;
   Use["PDEFoamBoost"]    = 0; // uses generalised MVA method boosting
   Use["KNN"]             = 1; // k-nearest neighbour method
   //
   // --- Linear Discriminant Analysis
   Use["LD"]              = 1; // Linear Discriminant identical to Fisher
   Use["Fisher"]          = 0;
   Use["FisherG"]         = 0;
   Use["BoostedFisher"]   = 0; // uses generalised MVA method boosting
   Use["HMatrix"]         = 0;
   //
   // --- Function Discriminant analysis
   Use["FDA_GA"]          = 1; // minimisation of user-defined function using Genetics Algorithm
   Use["FDA_SA"]          = 0;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   Use["FDA_MCMT"]        = 0;
   //
   // --- Neural Networks (all are feed-forward Multilayer Perceptrons)
   Use["MLP"]             = 0; // Recommended ANN
   Use["MLPBFGS"]         = 0; // Recommended ANN with optional training method
   Use["MLPBNN"]          = 1; // Recommended ANN with BFGS training method and bayesian regulator
   Use["CFMlpANN"]        = 0; // Depreciated ANN from ALEPH
   Use["TMlpANN"]         = 0; // ROOT's own ANN
   //
   // --- Support Vector Machine
   Use["SVM"]             = 1;
   //
   // --- Boosted Decision Trees
   Use["BDT"]             = 1; // uses Adaptive Boost
   Use["BDTG"]            = 0; // uses Gradient Boost
   Use["BDTB"]            = 0; // uses Bagging
   Use["BDTD"]            = 0; // decorrelation + Adaptive Boost
   Use["BDTF"]            = 0; // allow usage of fisher discriminant for node splitting
   Use["myBDTD"]          = 1; // mine
   //
   // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
   Use["RuleFit"]         = 1;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;

   // Select methods (don't look at this code - not of interest)
   if (myMethodList != "") {
      for (std::map<std::string, int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );

      for (size_t i = 0; i < mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;

            for (std::map<std::string, int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";

            std::cout << std::endl;
            return;
         }

         Use[regMethod] = 1;
      }
   }

   // --------------------------------------------------------------------------------------------------

   // --- Here the preparation phase begins

   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   TString outfileName( "TMVA.root" );
   TFile *outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory is
   // the only TMVA object you have to interact with
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
         "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );

   // If you wish to modify default settings
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // load the signal and background event samples from ROOT trees

   std::cout << " starts ... " << std::endl;
   // global event weights per tree (see below for setting event-wise weights)
   //   Float_t w;
   double signalWeight     = 1.0;
   double backgroundWeight = 1.0;

   std::cout << " signalWeight = " << signalWeight << " backWeight = " << backgroundWeight << std::endl;
   factory->AddSignalTree( signal,    signalWeight     );
   factory->AddBackgroundTree( background, backgroundWeight );

   TString separator(":");
   TString Vnames(vnames);
   TObjArray *array = Vnames.Tokenize(separator);

   std::vector<std::string> inputVars;
   TIter next(array);
   TObjString *objs;

   while ((objs = (TObjString *) next())) {
      //    std::cout << objs->GetString() << std::endl;
      TString name(objs->GetString());

      if (name == "BEMC") continue;

      if (name == "noBEMC") continue;

      factory->AddVariable(name, 'F');
   }

   // This would set individual event weights (the variables defined in the
   // expression need to exist in the original TTree)
   //    for signal    : factory->SetSignalWeightExpression("weight1*weight2");
   //    for background: factory->SetBackgroundWeightExpression("weight1*weight2");
   // commented JB : 04/26 ??
   //factory->dSetBackgroundWeightExpression("weight");

   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = "";
   TCut mycutb = "";

   // Tell the factory how to use the training and testing events
   //
   // If no numbers of events are given, half of the events in the tree are used
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
   // To also specify the number of testing events, use:
   //factory->PrepareTrainingAndTestTree( mycuts,mycutb,"NSigTrain=9000:NBkgTrain=50000:NSigTest=9000:NBkgTest=50000:SplitMode=Random:!V" );
   factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=4900:nTrain_Background=49000:nTest_Signal=4900:nTest_Background=49000:SplitMode=Random:!V"); // for KFVertex
   //   factory->PrepareTrainingAndTestTree( mycuts, mycutb,"nTrain_Signal=20000:nTrain_Background=40000:nTest_Signal=20000:nTest_Background=40000:SplitMode=Random:!V"); // for PPV

   // ---- Book MVA methods
   //
   // Please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["CutsD"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsD",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

   if (Use["CutsPCA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

   if (Use["CutsGA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                           "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );

   if (Use["CutsSA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                           "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   // Likelihood ("naive Bayes estimator")
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                           "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

   // Decorrelated likelihood
   if (Use["LikelihoodD"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
                           "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

   // PCA-transformed likelihood
   if (Use["LikelihoodPCA"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" );

   // Use a kernel density estimator to approximate the PDFs
   if (Use["LikelihoodKDE"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" );

   // Use a variable-dependent mix of splines and kernel density estimator
   if (Use["LikelihoodMIX"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
                           "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" );

   // Test the multi-dimensional probability density estimator
   // here are the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSD"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

   if (Use["PDERSPCA"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

   // Multi-dimensional likelihood estimator using self-adapting phase-space binning
   if (Use["PDEFoam"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                           "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );

   if (Use["PDEFoamBoost"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost",
                           "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN",
                           "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // H-Matrix (chi2-squared) method
   if (Use["HMatrix"])
      factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" );

   // Linear discriminant (same as Fisher discriminant)
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher discriminant (same as LD)
   if (Use["Fisher"])
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher with Gauss-transformed input variables
   if (Use["FisherG"])
      factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

   // Composite classifier: ensemble (tree) of boosted Fisher classifiers
   if (Use["BoostedFisher"])
      factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher",
                           "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" );

   // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

   if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   if (Use["FDA_MT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   if (Use["FDA_MCMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

   // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );

   if (Use["MLPBFGS"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

   if (Use["MLPBNN"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators

   // CF(Clermont-Ferrand)ANN
   if (Use["CFMlpANN"])
      factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...

   // Tmlp(Root)ANN
   if (Use["TMlpANN"])
      factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDTG"]) // Gradient Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                           "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5" );

   if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );


   if (Use["BDTB"]) // Bagging
      factory->BookMethod( TMVA::Types::kBDT, "BDTB",
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   if (Use["BDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTD",
                           "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );

   if (Use["myBDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTDTEST",
                           "!H:!V:NTrees=1000:nEventsMin=400:MaxDepth=6:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );

   if (Use["BDTF"])  // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables
      factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher",
                           "!H:!V:NTrees=50:nEventsMin=150:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   // RuleFit -- TMVA implementation of Friedman's method
   if (Use["RuleFit"])
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                           "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

   // For an example of the category classifier usage, see: TMVAClassificationCategory

   //   TMVA::IMethod* category         = factory->BookMethod( TMVA::Types::kCategory,"Category","" );

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events
#if 0
   factory->OptimizeAllMethods("SigEffAt001", "Scan");
   factory->OptimizeAllMethods("ROCIntegral", "GA");
#endif
   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;
}
示例#14
0
void test2(){
  //---------------------------------------------------------------
  // This loads the library
  TMVA::Tools::Instance();
  TString outfileName( "trainingBDT_tZq.root" );
  TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
  TMVA::Factory *factory = new TMVA::Factory( "BDT_trainning_tzq", outputFile,"!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );
  
  
  
  TFile *input_sig      = TFile::Open( "../TreeReader/outputroot/histofile_tZq.root" );
  TFile *input_wz       = TFile::Open( "../TreeReader/outputroot/histofile_WZ.root" );
  
  
  TTree *signal            = (TTree*)input_sig->Get("Ttree_tZq");
  TTree *background     = (TTree*)input_wz->Get("Ttree_WZ");
  
  factory->AddSignalTree    ( signal,	   1.);
  factory->AddBackgroundTree( background,  1.);
  
  
  std::vector<TString > varList;
  varList.push_back("tree_cosThetaStar");;
  varList.push_back("tree_topMass");     
  varList.push_back("tree_totMass");     
  varList.push_back("tree_deltaPhilb");  
  varList.push_back("tree_deltaRlb");    
  varList.push_back("tree_deltaRTopZ");  
  varList.push_back("tree_asym");        
  varList.push_back("tree_Zpt");         
  varList.push_back("tree_ZEta");        
  varList.push_back("tree_topPt");       
  varList.push_back("tree_topEta");      
  varList.push_back("tree_NJets");       
  varList.push_back("tree_NBJets");	 
  varList.push_back("tree_deltaRZl");	 
  varList.push_back("tree_deltaPhiZmet");
  varList.push_back("tree_btagDiscri");  
  
  varList.push_back("tree_totPt");	
  varList.push_back("tree_totEta");	
  
  
  varList.push_back("tree_leptWPt");	 
  varList.push_back("tree_leptWEta");	 
  varList.push_back("tree_leadJetPt");   
  varList.push_back("tree_leadJetEta");  
  varList.push_back("tree_deltaRZleptW");
  varList.push_back("tree_deltaPhiZleptW");
  
  
  varList.push_back("tree_met" );
  varList.push_back("tree_mTW" );
  
  
  for(unsigned int i=0; i< varList.size() ; i++) factory->AddVariable( varList[i].Data(),    'F');
  
  factory->SetSignalWeightExpression    ("tree_EvtWeight");
  factory->SetBackgroundWeightExpression("tree_EvtWeight");
   
  
  // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5";

   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
   
   
   
   //factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );
//   factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:nEventsMin=100:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );
   factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:nEventsMin=100:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );

 


   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );

  
  
  
}
示例#15
0
void TMVAClassification( std::string selectionName, std::string charge, TString myMethodList = "" ) 
{
   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the 
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //
   // if you like to use a method via the plugin mechanism, we recommend using
   // 
   // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)
   // (an example is given for using the BDT as plugin (see below),
   // but of course the real application is when you write your own
   // method based)

   // this loads the library
   TMVA::Tools::Instance();

   //---------------------------------------------------------------
   // default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   Use["Cuts"]            = 1;
   Use["CutsD"]           = 0;
   Use["CutsPCA"]         = 0;
   Use["CutsGA"]          = 0;
   Use["CutsSA"]          = 0;
   // ---
   Use["Likelihood"]      = 0;
   Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
   Use["LikelihoodPCA"]   = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
   Use["LikelihoodKDE"]   = 0;
   Use["LikelihoodMIX"]   = 0;
   // ---
   Use["PDERS"]           = 0;
   Use["PDERSD"]          = 0;
   Use["PDERSPCA"]        = 0;
   Use["PDERSkNN"]        = 0; // depreciated until further notice
   Use["PDEFoam"]         = 0;
   // --
   Use["KNN"]             = 0;
   // ---
   Use["HMatrix"]         = 0;
   Use["Fisher"]          = 0;
   Use["FisherG"]         = 0;
   Use["BoostedFisher"]   = 0;
   Use["LD"]              = 0;
   // ---
   Use["FDA_GA"]          = 0;
   Use["FDA_SA"]          = 0;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   Use["FDA_MCMT"]        = 0;
   // ---
   Use["MLP"]             = 0; // this is the recommended ANN
   Use["MLPBFGS"]         = 0; // recommended ANN with optional training method
   Use["CFMlpANN"]        = 0; // *** missing
   Use["TMlpANN"]         = 0; 
   // ---
   Use["SVM"]             = 0;
   // ---
   Use["BDT"]             = 0;
   Use["BDTD"]            = 0;
   Use["BDTG"]            = 0;
   Use["BDTB"]            = 0;
   // ---
   Use["RuleFit"]         = 0;
   // ---
   Use["Plugin"]          = 0;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;

   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 1;
      }
   }

   // Create a new root output file.
   TString outfileName( "TMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory will
   // then run the performance analysis for you.
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/ 
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in 
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, 
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" );

   // If you wish to modify default settings 
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
   //factory->AddVariable( "MET"      , "ME_{T}", "GeV", 'F');
   //factory->AddVariable( "TMath::Max(pT1,pT2)"      , "Lead Lepton p_{T}", "GeV", 'F');
   factory->AddVariable( "HT"       , "H_{T}", "GeV", 'F');
   //factory->AddVariable( "M3"       , "M_{3}", "GeV", 'F');
   factory->AddVariable( "TMath::Min(pT1,pT2)"      , "Sublead Lepton p_{T}", "GeV", 'F');
   //factory->AddVariable( "NbJ"      , "N B Jets", "", 'I');
   //factory->AddVariable( "NbJmed"   , "N B Jets (medium)", "", 'I');
   //factory->AddVariable( "NJ"       , "N Jets", "", 'I');

   // You can add so-called "Spectator variables", which are not used in the MVA training, 
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
   // input variables, the response values of all trained MVAs, and the spectator variables
   //factory->AddSpectator( "spec1:=var1*2",  "Spectator 1", "units", 'F' );
   //factory->AddSpectator( "spec2:=var1*3",  "Spectator 2", "units", 'F' );

   // read training and test data
   if (ReadDataFromAsciiIFormat) {
      // load the signal and background event samples from ascii files
      // format in file must be:
      // var1/F:var2/F:var3/F:var4/F
      // 0.04551   0.59923   0.32400   -0.19170
      // ...

      TString datFileS = "tmva_example_sig.dat";
      TString datFileB = "tmva_example_bkg.dat";

      factory->SetInputTrees( datFileS, datFileB );
   }
   else {
      // load the signal and background event samples from ROOT trees
      TFile *input(0);
      //TString fname = "../macros/tmva_example.root";
      //TString fname = "opt_ttW_Apr10_Iso005_NoZVeto_jet20.root";
      TString fname = "opt_ttW_Nov20_muDetIso0p05_elDetIso0p05_jet20_withZveto_optimization.root";
      //TString fname = "opt_ttW_" + selectionName + ".root";
      if (!gSystem->AccessPathName( fname )) {
         input = TFile::Open( fname ); // check if file in local directory exists
      } 
      else { 
         input = TFile::Open( "http://root.cern.ch/files/tmva_class_example.root" ); // if not: download from ROOT server
      }

      if (!input) {
         std::cout << "ERROR: could not open data file" << std::endl;
         exit(1);
      }
      std::cout << "--- TMVAClassification       : Using input file: " << input->GetName() << std::endl;

      
      TTree* opt_tree = (TTree*)input->Get("tree_opt");

      TFile* signalFile = TFile::Open("/shome/mdunser/workspace/CMSSW_5_2_5/src/DiLeptonAnalysis/NTupleProducer/macros/plots/Nov26_muPFIso0p05_elPFIso0p05_jet20_withZveto/TTbarW_Yields.root");      
      TTree *signal     = (TTree*)signalFile->Get("SigEvents");

      TChain* background = new TChain("SigEvents");
      background->Add("/shome/mdunser/workspace/CMSSW_5_2_5/src/DiLeptonAnalysis/NTupleProducer/macros/plots/Nov26_muPFIso0p05_elPFIso0p05_jet20_withZveto/TTJets_Yields.root");
      background->Add("/shome/mdunser/workspace/CMSSW_5_2_5/src/DiLeptonAnalysis/NTupleProducer/macros/plots/Nov26_muPFIso0p05_elPFIso0p05_jet20_withZveto/WZTo3LNu_Yields.root");

      //TTree *background = (TTree*)opt_tree->CopyTree("SName==\"TTJets\" || SName==\"DYJets\" || SName==\"WZTo3LNu\"");
      //TTree *background = (TTree*)opt_tree->CopyTree("SName==\"DYJets\"");


      // global event weights per tree (see below for setting event-wise weights)
      Double_t signalWeight     = 1.0;
      Double_t backgroundWeight = 1.0;

      // ====== register trees ====================================================
      //
      // the following method is the prefered one:
      // you can add an arbitrary number of signal or background trees
      factory->AddSignalTree    ( signal,     signalWeight     );
      factory->AddBackgroundTree( background, backgroundWeight );

      // To give different trees for training and testing, do as follows:
      //    factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" );
      //    factory->AddSignalTree( signalTestTree,     signalTestWeight,  "Test" );

      // Use the following code instead of the above two or four lines to add signal and background 
      // training and test events "by hand"
      // NOTE that in this case one should not give expressions (such as "var1+var2") in the input 
      //      variable definition, but simply compute the expression before adding the event
      // 
      //    // --- begin ----------------------------------------------------------
      //    std::vector<Double_t> vars( 4 ); // vector has size of number of input variables
      //    Float_t  treevars[4];
      //    for (Int_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
      //    for (Int_t i=0; i<signal->GetEntries(); i++) {
      //       signal->GetEntry(i);
      //       for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
      //       // add training and test events; here: first half is training, second is testing
      //       // note that the weight can also be event-wise	
      //       if (i < signal->GetEntries()/2) factory->AddSignalTrainingEvent( vars, signalWeight ); 
      //       else                            factory->AddSignalTestEvent    ( vars, signalWeight ); 
      //    }
      //
      //    for (Int_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
      //    for (Int_t i=0; i<background->GetEntries(); i++) {
      //       background->GetEntry(i); 
      //       for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
      //       // add training and test events; here: first half is training, second is testing
      //       // note that the weight can also be event-wise	
      //       if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight ); 
      //       else                                factory->AddBackgroundTestEvent    ( vars, backgroundWeight ); 
      //    }
      //    // --- end ------------------------------------------------------------
      //
      // ====== end of register trees ==============================================
   }
   
   // This would set individual event weights (the variables defined in the 
   // expression need to exist in the original TTree)
   //    for signal    : factory->SetSignalWeightExpression("weight1*weight2");
   //    for background: factory->SetBackgroundWeightExpression("weight1*weight2");
   //factory->SetSignalWeightExpression("eventWeight");
   factory->SetBackgroundWeightExpression("1./SLumi");

   // Apply additional cuts on the signal and background samples (can be different)

   TCut mycuts;
   TCut mycutb;

   if( charge == "plus" ) {
     mycuts = "Charge==1 && SystFlag==0 && NJ>=3 && pT1>20. && pT2>20. && NbJmed>0 && TLCat==0 && Flavor<3 && PassZVeto==1"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
     mycutb = "Charge==1 && SystFlag==0 && NJ>=3 && pT1>20. && pT2>20. && NbJmed>0 && TLCat==0 && Flavor<3 && PassZVeto==1"; // for example: TCut mycutb = "abs(var1)<0.5";
   } else if( charge == "minus" ) {
     mycuts = "Charge==-1 && SystFlag==0 && NJ>=3 && pT1>20. && pT2>20. && NbJmed>0 && TLCat==0 && Flavor<3 && PassZVeto==1"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
     mycutb = "Charge==-1 && SystFlag==0 && NJ>=3 && pT1>20. && pT2>20. && NbJmed>0 && TLCat==0 && Flavor<3 && PassZVeto==1"; // for example: TCut mycutb = "abs(var1)<0.5";
   } else if( charge == "all" ) {
     mycuts = "              SystFlag==0 && NJ>=3 && pT1>20. && pT2>20. && NbJmed>0 && TLCat==0 && Flavor<3 && PassZVeto==1"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
     mycutb = "              SystFlag==0 && NJ>=3 && pT1>20. && pT2>20. && NbJmed>0 && TLCat==0 && Flavor<3 && PassZVeto==1"; // for example: TCut mycutb = "abs(var1)<0.5";
   } else {
     std::cout << "only 'plus' and 'minus' and 'all' are allowed for charge." <<std::endl;
     return;
   }

   //if( btagMed_presel_ ) {
   //  mycuts += "NbJmed>0"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   //  mycutb += "NbJmed>0"; // for example: TCut mycutb = "abs(var1)<0.5";
   //}


   // tell the factory to use all remaining events in the trees after training for testing:
   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

   // If no numbers of events are given, half of the events in the tree are used for training, and 
   // the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );  
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut, 
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );  

   // ---- Book MVA methods
   //
   // please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"]) {

      std::string bookConditions;
      bookConditions = "H:!V:FitMethod=MC";
      bookConditions += ":VarProp[0]=FMax"; //HT
      bookConditions += ":VarProp[1]=FMax"; //pt2

      bookConditions += ":EffSel:SampleSize=500000000";
      //bookConditions += ":EffSel:SampleSize=50000";

      factory->BookMethod( TMVA::Types::kCuts, "Cuts", bookConditions.c_str() );
      //factory->BookMethod( TMVA::Types::kCuts, "Cuts", 
      //                     "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   }

   if (Use["CutsD"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsD", 
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

   if (Use["CutsPCA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", 
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

   if (Use["CutsGA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                           "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );
   
   if (Use["CutsSA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                           "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );
   
   // Likelihood
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", 
                           "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); 

   // test the decorrelated likelihood
   if (Use["LikelihoodD"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", 
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); 

   if (Use["LikelihoodPCA"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", 
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 
 
   // test the new kernel density estimator
   if (Use["LikelihoodKDE"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", 
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 

   // test the mixed splines and kernel density estimator (depending on which variable)
   if (Use["LikelihoodMIX"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", 
                           "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

   // test the multi-dimensional probability density estimator
   // here are the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );   
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );   
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS", 
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSkNN"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSkNN", 
                           "!H:!V:VolumeRangeMode=kNN:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSD"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", 
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

   if (Use["PDERSPCA"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", 
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

   // Multi-dimensional likelihood estimator using self-adapting phase-space binning
   if (Use["PDEFoam"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", 
                           "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:CutNmin=T:Nmin=100:Kernel=None:Compress=T" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN", 
                           "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );
   // H-Matrix (chi2-squared) method
   if (Use["HMatrix"])
      factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" ); 

   // Fisher discriminant   
   if (Use["Fisher"])
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=60:NsmoothMVAPdf=10" );

   // Fisher with Gauss-transformed input variables
   if (Use["FisherG"])
      factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

   // Composite classifier: ensemble (tree) of boosted Fisher classifiers
   if (Use["BoostedFisher"])
      factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2");

   // Linear discriminant (same as Fisher)
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None" );

	// Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );
   
   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

   if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   if (Use["FDA_MT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   if (Use["FDA_MCMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

   // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=N+5:TestRate=10:EpochMonitoring" );

   if (Use["MLPBFGS"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=N+5:TestRate=10:TrainingMethod=BFGS:!EpochMonitoring" );


   // CF(Clermont-Ferrand)ANN
   if (Use["CFMlpANN"])
      factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  
  
   // Tmlp(Root)ANN
   if (Use["TMlpANN"])
      factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...
  
   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );
   
   // Boosted Decision Trees
   if (Use["BDTG"]) // Gradient Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTG", 
                           "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" );

   if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT", 
                           "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );
   
   if (Use["BDTB"]) // Bagging
      factory->BookMethod( TMVA::Types::kBDT, "BDTB", 
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   if (Use["BDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTD", 
                           "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );
   
   // RuleFit -- TMVA implementation of Friedman's method
   if (Use["RuleFit"])
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                           "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );
   
   // For an example of the category classifier, see: TMVAClassificationCategory

   // --------------------------------------------------------------------------------------------------

   // As an example how to use the ROOT plugin mechanism, book BDT via
   // plugin mechanism
   if (Use["Plugin"]) {
         //
         // first the plugin has to be defined, which can happen either through the following line in the local or global .rootrc:
         //
         // # plugin handler          plugin name(regexp) class to be instanciated library        constructor format
         // Plugin.TMVA@@MethodBase:  ^BDT                TMVA::MethodBDT          TMVA.1         "MethodBDT(TString,TString,DataSet&,TString)"
         // 
         // or by telling the global plugin manager directly
      gPluginMgr->AddHandler("TMVA@@MethodBase", "BDT", "TMVA::MethodBDT", "TMVA.1", "MethodBDT(TString,TString,DataSet&,TString)");
      factory->BookMethod( TMVA::Types::kPlugins, "BDT",
                           "!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=50" );
   }

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();    


   if (Use["Cuts"]) {

    for( unsigned iEff=1; iEff<11; ++iEff ) {

       TMVA::IMethod* method = (TMVA::IMethod*)factory->GetMethod("Cuts");
       TMVA::MethodCuts* cuts = dynamic_cast<TMVA::MethodCuts*>(method);

       std::string optcutsdir = "optcuts_" + selectionName + "_" + charge;
       std::string mkdir_command = "mkdir -p " + optcutsdir;
       system(mkdir_command.c_str());
       char cutsFileName[500];
       sprintf( cutsFileName, "%s/cuts_Seff%d.txt", optcutsdir.c_str(), 10*iEff );

       ofstream ofs(cutsFileName);

       std::vector<Double_t> cutsMin, cutsMax;
       cuts->GetCuts((float)iEff*0.10, cutsMin, cutsMax);

       bool found_pT1 = false;
       bool found_pT2 = false;
       bool found_NJ = false;
       bool found_NbJ = false;
       bool found_NbJmed = false;

       for( unsigned iCut=0; iCut<cutsMin.size(); ++iCut) {
         TString varName = factory->DefaultDataSetInfo().GetVariableInfo(iCut).GetInternalName();
         if( varName=="TMath_Min_pT1,pT2_") {
           ofs << "pT1 " << cutsMin[iCut] << " " << cutsMax[iCut] << std::endl;
           ofs << "pT2 " << cutsMin[iCut] << " " << cutsMax[iCut] << std::endl;
           found_pT1 = true;
           found_pT2 = true;
         } else {
           ofs << varName << " "  << cutsMin[iCut] << " " << cutsMax[iCut] << std::endl;
         }
         if( varName=="pT1" ) found_pT1 = true;
         if( varName=="pT2" ) found_pT2 = true;
         if( varName=="NJ" ) found_NJ = true;
         if( varName=="NbJ" ) found_NbJ = true;
         if( varName=="NbJmed" ) found_NbJmed = true;
       }

       // preselection cuts (if not optimized):
       if( !found_pT1 ) ofs << "pT1 20. 100000." << std::endl;
       if( !found_pT2 ) ofs << "pT2 20. 100000." << std::endl;
       if( !found_NJ )  ofs << "NJ 3 100000." << std::endl;
       if( !found_NbJ ) ofs << "NbJ 1 100000." << std::endl;
       if( !found_NbJmed && btagMed_presel_ ) ofs << "NbJmed 1 100000." << std::endl;

       if( charge=="plus" )
         ofs << "Charge 1 10" << std::endl;
       else if( charge=="minus" )
         ofs << "Charge -10 0" << std::endl;

       ofs.close();

     }  // for eff

   } // if cuts 

   // --------------------------------------------------------------
   
   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;      

   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
void TMVAClassification( ) 
{
   // this loads the library
   TMVA::Tools::Instance();
   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;

   // Create a new root output file.
   TString outfileName( "TMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object.
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, 
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" );

   // ---------- input variables 
   factory->AddVariable("iso12x12", 'F');
   factory->AddVariable("iso4x4", 'F');
   factory->AddVariable("isoLshaped", 'F');
   factory->AddVariable("PUM0", 'F');

   // ---------- spectators ----------------
   factory->AddSpectator("pt", "F");
   factory->AddSpectator("eta", "F");
   factory->AddSpectator("phi", "F");
   factory->AddSpectator("e", "F");

   // read training and test data
   TString fSig = "DrellYan_Zee.root";
   TString fBkg = "MinBias.root";

   TFile *fileSig = TFile::Open( fSig );
   TFile *fileBkg = TFile::Open( fBkg );
   TTree *signal     = (TTree*)fileSig->Get("tree");
   TTree *background = (TTree*)fileBkg->Get("tree");
   

   // global event weights per tree (see below for setting event-wise weights)
   Double_t signalWeight     = 1.0;
   Double_t backgroundWeight = 1.0;

   // ====== register trees ====================================================
   // you can add an arbitrary number of signal or background trees
   factory->AddSignalTree    ( signal,     signalWeight     );
   factory->AddBackgroundTree( background, backgroundWeight );
   TCut mycuts = "pt > 10"; 
   TCut mycutb = "pt > 10"; 


   // tell the factory to use all remaining events in the trees after training for testing:
   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

   // If no numbers of events are given, half of the events in the tree are used for training, and 
   // the other half for testing:

   // ---- Book MVA methods
   // Boosted Decision Trees
   factory->BookMethod( TMVA::Types::kBDT, "BDT", 
                        "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );
   
   // ---- Train MVAs using the set of training events
   factory->TrainAllMethods();
   
   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();    

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;      

   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
示例#17
0
void TMVAtest1()
{

  // ---------------------------------------------------------------
  std::cout << std::endl;
  std::cout << "==> Start TMVAClassification" << std::endl;
  // --------------------------------------------------------------------------------------------------
  
  // --- Here the preparation phase begins
  
  // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
  TString TrainName = "TMVA";
  
  TString outfileName( TrainName+".root" );
  TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
  
  // Create the factory object. Later you can choose the methods
  // whose performance you'd like to investigate. The factory is 
  // the only TMVA object you have to interact with
  //
  // The first argument is the base of the name of all the
  // weightfiles in the directory weight/
  //
  // The second argument is the output file for the training results
  // All TMVA output can be suppressed by removing the "!" (not) in
  // front of the "Silent" argument in the option string
  TMVA::Factory *factory = new TMVA::Factory( TrainName, outputFile,
					      "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );
  
  // If you wish to modify default settings
  // (please check "src/Config.h" to see all available global options)
  //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
  //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";
  
  // Define the input variables that shall be used for the MVA training
  // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
  // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
  
  factory->AddVariable ("ptphoton", 'F');
//  factory->AddVariable ("etaphoton", 'F');
  factory->AddVariable ("ptmuon", 'F');
// factory->AddVariable ("etamuon", 'F');
  factory->AddVariable ("ptjet", 'F');
//  factory->AddVariable ("etajet", 'F');
//  factory->AddVariable ("masstop", 'F');
//  factory->AddVariable ("mtw", 'F');
  factory->AddVariable ("deltaRphotonjet", 'F');
  factory->AddVariable ("deltaRphotonmuon", 'F');
//  factory->AddVariable ("ht", 'F');
//  factory->AddVariable ("photonmuonmass", 'F');
  factory->AddVariable ("costopphoton", 'F');
//  factory->AddVariable ("topphotonmass", 'F');
//factory->AddVariable ("pttop", 'F');
//factory->AddVariable ("etatop", 'F');
  factory->AddVariable ("jetmultiplicity", 'F');
//  factory->AddVariable ("bjetmultiplicity", 'F');
  factory->AddVariable ("deltaphiphotonmet", 'F');
  factory->AddVariable ("cvsdiscriminant", 'F');
//  factory->AddVariable ("leptoncharge", 'F');


  //Load the signal and background event samples from ROOT trees
  
//  TFile *inputSTrain(0);
//  TFile *inputBTrain(0);
  TString sigFileTrain  = "SIGNALtGu.root";
  TString bkgFileTrain = "WPHJET.root";
  TString ttbarFileTrain1 ="TTBAR1.root";
  TString ttbarFileTrain2 ="TTBAR2.root";
  TString ttbarFileTrain3 ="TTBAR3.root";
  TString TTGFileTrain ="TTG.root";
  TString WWFileTrain ="WW.root";
  TString WZFileTrain ="WZ.root";
  TString ZZFileTrain ="ZZ.root";
  TString ZGAMMAFileTrain ="ZGAMMA.root";


//  TString bkgFileTrain = "TTG.root";
  
  TFile *inputSTrain  = TFile::Open( sigFileTrain );
 if (!inputSTrain) {
      std::cout << "ERROR: could not open data file" << std::endl;
      exit(1);
   } 
  TFile *inputBTrain = TFile::Open( bkgFileTrain );
  TFile *inputBttbar1 = TFile::Open( ttbarFileTrain1);
  TFile *inputBttbar2 = TFile::Open( ttbarFileTrain2);
  TFile *inputBttbar3 = TFile::Open( ttbarFileTrain3);
  TFile *inputTTG= TFile::Open(TTGFileTrain );
  TFile *inputWW= TFile::Open(WWFileTrain );
  TFile *inputWZ= TFile::Open(WZFileTrain );
  TFile *inputZZ= TFile::Open(ZZFileTrain );
  TFile *inputZGAMMA= TFile::Open( ZGAMMAFileTrain);

if (!inputBTrain) {
      std::cout << "ERROR: could not open data file" << std::endl;
      exit(1);
   }

 
  std::cout << "--- TMVAnalysis    : Accessing Signal Train: " << sigFileTrain << std::endl;
  std::cout << "--- TMVAnalysis    : Accessing Background Train: " << bkgFileTrain << std::endl;
  
  TTree *signalTrain      = (TTree*)inputSTrain->FindObjectAny("atq");
  TTree *backgroundTrain = (TTree*)inputBTrain->FindObjectAny("atq");
  TTree *ttbarbackground1 = (TTree*)inputBttbar1->FindObjectAny("atq");
  TTree *ttbarbackground2 = (TTree*)inputBttbar2->FindObjectAny("atq");
  TTree *ttbarbackground3 = (TTree*)inputBttbar3->FindObjectAny("atq");
  TTree *TTGbackground = (TTree*)inputTTG->FindObjectAny("atq");
  TTree *WWbackground = (TTree*)inputWW->FindObjectAny("atq");
  TTree *WZbackground = (TTree*)inputWZ->FindObjectAny("atq");
  TTree *ZZbackground = (TTree*)inputZZ->FindObjectAny("atq");
  TTree *ZGAMMAbackground = (TTree*)inputZGAMMA->FindObjectAny("atq");

//  TTree *signalTrain      = (TTree*)inputSTrain->FindObjectAny("insidetopmass");
//  TTree *backgroundTrain = (TTree*)inputBTrain->FindObjectAny("insidetopmass");
  
 // factory->AddSignalTree( signalTrain, 1);
//cout<<"reza1";
  
 // factory->AddBackgroundTree( backgroundTrain, 1, );
//cout<<"reza2";

 factory->SetInputTrees(signalTrain,backgroundTrain,1,1);
 factory->AddBackgroundTree( ttbarbackground1, 1.1);
 factory->AddBackgroundTree( ttbarbackground2, 1.1);
// factory->AddBackgroundTree( ttbarbackground3, 0.3);
 factory->AddBackgroundTree( TTGbackground, 1);
 factory->AddBackgroundTree( WWbackground, 1);
 factory->AddBackgroundTree(WZbackground , 1);
 factory->AddBackgroundTree(ZZbackground , 1);
 factory->AddBackgroundTree(ZGAMMAbackground , 1);

  // Set xs-weight
//  factory->SetSignalWeightExpression    ("weight");
  //factory->SetBackgroundWeightExpression("weight");
  
  // Apply additional cuts on the signal and background samples (can be different)
  TCut mycut = "";
 // TCut mycutb = ""; 
  
  Int_t nSignalTrain = signalTrain->GetEntries();
  
  Int_t nBackTrain  = backgroundTrain->GetEntries();
  factory->PrepareTrainingAndTestTree( "","", "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=0:nTest_Background=0:SplitMode=Random:SplitSeed=88!V" );

  
//factory->PrepareTrainingAndTestTree( "", "",
//				       "nTrain_Signal=100:nTrain_Background=100:nTest_Signal=100:nTest_Background=100:!V" );

//  factory->PrepareTrainingAndTestTree( mycuts, mycutb,
				       //":NSigTrain=:NBkgTrain=:NSigTest=:NBkgTest=:SplitMode=Alternate:!V" );  
				       //":nTrain_Signal=10000:nTest_Signal=2260:nTrain_Background=100000:nTest_Background=100000:SplitMode=Alternate:!V" );  
//				       ":nTrain_Signal=nSignalTrain:nTrain_Background=nBackTrain:!V");
  //":nTrain_Signal=nSignalTrain:nTest_Signal=nSignalTest:nTrain_Background=nBackTrain:nTest_Background=nBackTest:SplitMode=Block:!V");
  //":SplitMode=Alternate:!V");
  //":nTrain_Signal=:nTest_Signal=:nTrain_Background=:nTest_Background=:SplitMode=Block:!V");
  //":SplitMode=Alternate:!V" );  
  
  // ---- Book MVA methods
//  factory->BookMethod( TMVA::Types::kBDT, "BDT",
//		       "!H:!V:NTrees=300:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=-1:NNodesMax=5:UseNvars=4:PruneStrength=5:PruneMethod=CostComplexity:MaxDepth=4" );
//factory->BookMethod( TMVA::Types::kBDT, "BDT",
 //                      "!H:!V:NTrees=400:BoostType=AdaBoost:AdaBoostBeta=0.5:MaxDepth=3:SeparationType=GiniIndex:PruneMethod=NoPruning" );
factory->BookMethod( TMVA::Types::kBDT, "BDT",
                       "!H:!V:NTrees=400:BoostType=AdaBoost:AdaBoostBeta=0.8:MaxDepth=3:SeparationType=GiniIndex:PruneMethod=NoPruning" );


//  factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5");
 // factory->BookMethod(TMVA::Types::kFisher, "Fisher", "H:!V:Fisher");   
  // ---- Now you can tell the factory to train, test, and evaluate the MVAs
// factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); 
// factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001" );
// factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit","H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); 

  
// Train MVAs using the set of training events
  factory->TrainAllMethods();
  
   // ---- Evaluate all MVAs using the set of test events
  factory->TestAllMethods();
  
  // ----- Evaluate and compare performance of all configured MVAs
  factory->EvaluateAllMethods();
  
  // --------------------------------------------------------------
  
  // Save the output
  outputFile->Close();
  
  std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
  std::cout << "==> TMVAClassification is done!" << std::endl;
  
  delete factory;
  
  // Launch the GUI for the root macros
//  if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
示例#18
0
void TMVAClassification( TString eventsToTrain = "0", const TString & region = "barrel", const TString index = "", TString myMethodList = "BDT")
{

  std::cout << "running classification for " << region << " for " << myMethodList << std::endl;

  if( region != "barrel" && region != "endcaps" ) {
    std::cout << "Error, region can only be barrel or endcaps. Selected region was: " << region << std::endl;
    exit(1);
  }
  if( index != "" && index != "0" && index != "1" && index != "2" ) {
    std::cout << "Error, index can only be \"\", \"0\", \"1\" or \"2\". Selected index was: " << index << std::endl;
    exit(1);
  }


   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //
   // if you like to use a method via the plugin mechanism, we recommend using
   //
   // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)
   // (an example is given for using the BDT as plugin (see below),
   // but of course the real application is when you write your own
   // method based)

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   // --- Cut optimisation
   Use["Cuts"]            = 0;
   Use["CutsD"]           = 0;
   Use["CutsPCA"]         = 0;
   Use["CutsGA"]          = 0;
   Use["CutsSA"]          = 0;
   // 
   // --- 1-dimensional likelihood ("naive Bayes estimator")
   Use["Likelihood"]      = 0;
   Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
   Use["LikelihoodPCA"]   = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
   Use["LikelihoodKDE"]   = 0;
   Use["LikelihoodMIX"]   = 0;
   //
   // --- Mutidimensional likelihood and Nearest-Neighbour methods
   Use["PDERS"]           = 0;
   Use["PDERSD"]          = 0;
   Use["PDERSPCA"]        = 0;
   Use["PDEFoam"]         = 0;
   Use["PDEFoamBoost"]    = 0; // uses generalised MVA method boosting
   Use["KNN"]             = 0; // k-nearest neighbour method
   //
   // --- Linear Discriminant Analysis
   Use["LD"]              = 0; // Linear Discriminant identical to Fisher
   Use["Fisher"]          = 0;
   Use["FisherG"]         = 0;
   Use["BoostedFisher"]   = 0; // uses generalised MVA method boosting
   Use["HMatrix"]         = 0;
   //
   // --- Function Discriminant analysis
   Use["FDA_GA"]          = 0; // minimisation of user-defined function using Genetics Algorithm
   Use["FDA_SA"]          = 0;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   Use["FDA_MCMT"]        = 0;
   //
   // --- Neural Networks (all are feed-forward Multilayer Perceptrons)
   Use["MLP"]             = 0; // Recommended ANN
   Use["MLPBFGS"]         = 0; // Recommended ANN with optional training method
   Use["MLPBNN"]          = 0; // Recommended ANN with BFGS training method and bayesian regulator
   Use["CFMlpANN"]        = 0; // Depreciated ANN from ALEPH
   Use["TMlpANN"]         = 0; // ROOT's own ANN
   //
   // --- Support Vector Machine 
   Use["SVM"]             = 0;
   // 
   // --- Boosted Decision Trees
   Use["BDT"]             = 1; // uses Adaptive Boost
   Use["BDTG"]            = 0; // uses Gradient Boost
   Use["BDTB"]            = 0; // uses Bagging
   Use["BDTD"]            = 0; // decorrelation + Adaptive Boost
   Use["BDTF"]            = 0; // allow usage of fisher discriminant for node splitting 
   // 
   // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
   Use["RuleFit"]         = 0;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;

   // Select methods (don't look at this code - not of interest)
   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
	    std::exit(2);
         }
         Use[regMethod] = 1;
      }
   }


   // Input and output file names
   TString fnameTrainS = "BsMC12_barrel_preselection";
   TString fnameTrainB = "Barrel_preselection";
   TString fnameTestS = "BsMC12_barrel_preselection";
   TString fnameTestB = "Barrel_preselection";
   TString outputFileName = "TMVA_barrel";
   TString weightDirName = "barrel";
   if( region == "endcaps" ) {
     fnameTrainS = "BsMC12_endcaps_preselection";
     fnameTrainB = "Endcaps_preselection";
     fnameTestS = "BsMC12_endcaps_preselection";
     fnameTestB = "Endcaps_preselection";
     outputFileName = "TMVA_endcaps";
     weightDirName = "endcaps";
   }
   if( index != "" ) {
     fnameTrainS += "_"+index;
     fnameTrainB += "_"+index;
     TString indexTest = "";
     // The test index is the train index +1 (2+1 -> 0)
     if( index == "0" ) indexTest = "1";
     else if( index == "1" ) indexTest = "2";
     else if( index == "2" ) indexTest = "0";
     fnameTestS += "_"+indexTest;
     fnameTestB += "_"+indexTest;
     outputFileName += "_"+index;
     weightDirName += index;
   }

   fnameTrainS     = rootDir + fnameTrainS    + ".root";
   fnameTrainB     = rootDir + fnameTrainB    + ".root";
   fnameTestS      = rootDir + fnameTestS     + ".root";
   fnameTestB      = rootDir + fnameTestB     + ".root";
   outputFileName  = rootDir + outputFileName + ".root";
   weightDirName   = weightsDir + weightDirName + "Weights";


   // --------------------------------------------------------------------------------------------------

   // --- Here the preparation phase begins

   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   TString outfileName(outputFileName);
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory is 
   // the only TMVA object you have to interact with
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );

   // If you wish to modify default settings
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";
   // (TMVA::gConfig().GetIONames()).fWeightFileDir = outputFileName;
   (TMVA::gConfig().GetIONames()).fWeightFileDir = weightDirName;

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]

   bool useNewMuonID = false;

   factory->AddVariable( "fls3d",      "fls3d", "", 'F' );
   factory->AddVariable( "alpha",      "alpha", "", 'F' );
   factory->AddVariable( "pvips",      "pvips", "", 'F' );
   factory->AddVariable( "iso",        "iso", "", 'F' );
   factory->AddVariable( "m1iso",      "m1iso", "", 'F' );
   factory->AddVariable( "m2iso",      "m2iso", "", 'F' );
   factory->AddVariable( "chi2dof",    "chi2/dof", "", 'F' );
   if( region == "barrel" ) {
     factory->AddVariable( "eta",      "eta", "", 'F' );
     factory->AddVariable( "maxdoca",  "maxdoca", "cm", 'F' );
   }
   else {
     factory->AddVariable( "pt",       "pt", "GeV/c", 'F' );
     factory->AddVariable( "pvip",     "pvip", "cm", 'F' );
   }
   factory->AddVariable( "docatrk",    "docatrk", "cm", 'F' );
   // factory->AddVariable( "pt",         "pt", "GeV/c", 'F' );
   // factory->AddVariable( "closetrk",   "closetrk", "", 'I' );
   // factory->AddVariable( "y",                              "y", "", 'F' );
   // factory->AddVariable( "l3d",                            "l3d", "cm", 'F' );
   // factory->AddVariable( "cosAlphaXY",                     "cosAlphaXY", "", 'F' );
   // factory->AddVariable( "mu1_dxy",                        "mu1_dxy", "cm", 'F' );
   // factory->AddVariable( "mu2_dxy",                        "mu2_dxy", "cm", 'F' );

   if( useNewMuonID ) {
     // New Muon-id
     factory->AddVariable( "mu1_MVAMuonID",                  "mu1_MVAMuonID", "", 'F');
     factory->AddVariable( "mu2_MVAMuonID",                  "mu2_MVAMuonID", "", 'F');
   }

   // Extra variables
   // factory->AddVariable( "mu1_pt",                           "mu1_pt", "GeV/c", 'F' );
   // factory->AddVariable( "mu2_pt",                           "mu2_pt", "GeV/c", 'F' );
   // factory->AddVariable( "pvw8",                             "pvw8", "", 'F' );
   // factory->AddVariable( "cosAlpha3D",                       "cosAlpha3D", "", 'F' );
   // factory->AddVariable( "countTksOfPV",                     "countTksOfPV", "", 'I' );
   // factory->AddVariable( "ctauErrPV",                        "ctauErrPV", "", 'F' );
   // factory->AddVariable( "ctauPV",                           "ctauPV", "", 'F' );
   // factory->AddVariable( "dcaxy",                            "dcaxy", "", 'F' );

   // factory->AddVariable( "mu1_glbTrackProb",                 "mu1_glbTrackProb", "", 'F' );
   // factory->AddVariable( "mu1_nChi2",                        "mu1_nChi2", "", 'F' );
   // factory->AddVariable( "mu1_nMuSegs",                      "mu1_nMuSegs", "", 'F' );
   // factory->AddVariable( "mu1_nMuSegsCln",                   "mu1_nMuSegsCln", "", 'F' );
   // factory->AddVariable( "mu1_nPixHits",                     "mu1_nPixHits", "", 'F' );
   // factory->AddVariable( "mu1_nTrHits",                      "mu1_nTrHits", "", 'F' );
   // factory->AddVariable( "mu1_segComp",                      "mu1_segComp", "", 'F' );
   // factory->AddVariable( "mu1_trkEHitsOut",                  "mu1_trkEHitsOut", "", 'F' );
   // factory->AddVariable( "mu1_trkVHits",                     "mu1_trkVHits", "", 'F' );
   // factory->AddVariable( "mu1_validFrac",                    "mu1_validFrac", "", 'F' );
   // factory->AddVariable( "mu1_chi2LocMom",                   "mu1_chi2LocMom", "", 'F' );
   // factory->AddVariable( "mu1_chi2LocPos",                   "mu1_chi2LocPos", "", 'F' );

   // factory->AddVariable( "mu2_glbTrackProb",                 "mu2_glbTrackProb", "", 'F' );
   // factory->AddVariable( "mu2_nChi2",                        "mu2_nChi2", "", 'F' );
   // factory->AddVariable( "mu2_nMuSegs",                      "mu2_nMuSegs", "", 'F' );
   // factory->AddVariable( "mu2_nMuSegsCln",                   "mu2_nMuSegsCln", "", 'F' );
   // factory->AddVariable( "mu2_nPixHits",                     "mu2_nPixHits", "", 'F' );
   // factory->AddVariable( "mu2_nTrHits",                      "mu2_nTrHits", "", 'F' );
   // factory->AddVariable( "mu2_segComp",                      "mu2_segComp", "", 'F' );
   // factory->AddVariable( "mu2_trkEHitsOut",                  "mu2_trkEHitsOut", "", 'F' );
   // factory->AddVariable( "mu2_trkVHits",                     "mu2_trkVHits", "", 'F' );
   // factory->AddVariable( "mu2_validFrac",                    "mu2_validFrac", "", 'F' );
   // factory->AddVariable( "mu2_chi2LocMom",                   "mu2_chi2LocMom", "", 'F' );
   // factory->AddVariable( "mu2_chi2LocPos",                   "mu2_chi2LocPos", "", 'F' );



   // factory->AddVariable( "l3d := ctauPV*pt/mass",            "l3d", "cm", 'F' );
   // factory->AddVariable( "l3dSig := ctauPV/ctauErrPV",       "l3dSig", "", 'F' );

   // You can add so-called "Spectator variables", which are not used in the MVA training,
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
   // input variables, the response values of all trained MVAs, and the spectator variables
   // factory->AddSpectator( "spec1 := mass*2",  "Spectator 1", "units", 'F' );
   // factory->AddSpectator( "spec2 := mass*3",  "Spectator 2", "units", 'F' );
   factory->AddSpectator( "mass",                            "mass", "GeV/c^{2}", 'F' );


   // Read training and test data
   // (it is also possible to use ASCII format as input -> see TMVA Users Guide)

   if (gSystem->AccessPathName( fnameTrainS )) {  // file does not exist in local directory
     std::cout << "Did not access " << fnameTrainS << " exiting." << std::endl;
     std::exit(4);
   }

     //gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root");
   
   TFile *inputTrainS = TFile::Open( fnameTrainS );
   TFile *inputTrainB = TFile::Open( fnameTrainB );
   TFile *inputTestS  = TFile::Open( fnameTestS  );
   TFile *inputTestB  = TFile::Open( fnameTestB  );
   // --- Register the training and test trees
   TTree *signalTrainTree     = (TTree*)inputTrainS->Get("probe_tree");
   TTree *backgroundTrainTree = (TTree*)inputTrainB->Get("probe_tree");
   TTree *signalTestTree     = (TTree*)inputTestS->Get("probe_tree");
   TTree *backgroundTestTree = (TTree*)inputTestB->Get("probe_tree");
   // global event weights per tree (see below for setting event-wise weights)
   Double_t signalTrainWeight     = 1.0;
   Double_t backgroundTrainWeight = 1.0;
   Double_t signalTestWeight     = 1.0;
   Double_t backgroundTestWeight = 1.0;
   // Decide if using the split and mixing or the full trees
   if( fnameTrainS == fnameTestS ) {
     if( fnameTrainB != fnameTestB ) {
       std::cout << "This macro cannot handle cases where the same signal sample is used for training and testing, but different background samples are used.";
       exit(1);
     }
     std::cout << "--- TMVAClassification       : Using input file: " << inputTrainS->GetName() << std::endl;
     std::cout << "--- and file: " << inputTrainB->GetName() << std::endl;
     // You can add an arbitrary number of signal or background trees
     factory->AddSignalTree    ( signalTrainTree,     signalTrainWeight     );
     factory->AddBackgroundTree( backgroundTrainTree, backgroundTrainWeight );
   }
   else {
     if( fnameTrainB == fnameTestB ) {
       std::cout << "This macro cannot handle cases where the same background sample is used for training and testing, but different signal samples are used.";
       exit(1);
     }
     std::cout << "--- TMVAClassification       : Using input file: " << inputTrainS->GetName() << std::endl;
     std::cout << "--- and file: " << inputTrainB->GetName() << " for training and" << std::endl;
     std::cout << "--- input file: " << inputTestS->GetName() << std::endl;
     std::cout << "--- and file: " << inputTestB->GetName() << " for testing." << std::endl;
     // To give different trees for training and testing, do as follows:
     factory->AddSignalTree( signalTrainTree,     signalTrainWeight, "Training" );
     factory->AddSignalTree( signalTestTree,      signalTestWeight,  "Test" );
     factory->AddBackgroundTree( backgroundTrainTree, backgroundTrainWeight, "Training" );
     factory->AddBackgroundTree( backgroundTestTree,  backgroundTestWeight,  "Test" );
   }

   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = "";
   TCut mycutb = "";

   // Tell the factory how to use the training and testing events
   //
   // If no numbers of events are given, half of the events in the tree are used 
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut,
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );




   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal="+eventsToTrain+":nTrain_Background="+eventsToTrain+":SplitMode=Random:NormMode=NumEvents:!V" );
   // factory->PrepareTrainingAndTestTree( mycuts, mycutb,
   //                                      "nTrain_Signal=3000:nTrain_Background=3000:nTest_Signal=3000:nTest_Background=3000:SplitMode=Random:NormMode=NumEvents:!V" );
   // factory->PrepareTrainingAndTestTree( mycuts, mycutb,
   //                                     "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );

   // ---- Book MVA methods
   //
   // Please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["CutsD"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsD",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

   if (Use["CutsPCA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

   if (Use["CutsGA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                           "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );

   if (Use["CutsSA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                           "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   // Likelihood ("naive Bayes estimator")
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                           "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

   // Decorrelated likelihood
   if (Use["LikelihoodD"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
                           "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

   // PCA-transformed likelihood
   if (Use["LikelihoodPCA"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 

   // Use a kernel density estimator to approximate the PDFs
   if (Use["LikelihoodKDE"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 

   // Use a variable-dependent mix of splines and kernel density estimator
   if (Use["LikelihoodMIX"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
                           "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

   // Test the multi-dimensional probability density estimator
   // here are the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSD"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

   if (Use["PDERSPCA"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

   // Multi-dimensional likelihood estimator using self-adapting phase-space binning
   if (Use["PDEFoam"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                           "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );

   if (Use["PDEFoamBoost"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost",
                           "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN",
                           "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // H-Matrix (chi2-squared) method
   if (Use["HMatrix"])
      factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" );

   // Linear discriminant (same as Fisher discriminant)
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher discriminant (same as LD)
   if (Use["Fisher"])
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher with Gauss-transformed input variables
   if (Use["FisherG"])
      factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

   // Composite classifier: ensemble (tree) of boosted Fisher classifiers
   if (Use["BoostedFisher"])
      factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", 
                           "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" );

   // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

   if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   if (Use["FDA_MT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   if (Use["FDA_MCMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

   // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
   if (Use["MLP"])
     // factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );
     factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+8:TestRate=5:!UseRegulator" );

   if (Use["MLPBFGS"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

   if (Use["MLPBNN"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators

   // CF(Clermont-Ferrand)ANN
   if (Use["CFMlpANN"])
      factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  

   // Tmlp(Root)ANN
   if (Use["TMlpANN"])
      factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDTG"]) // Gradient Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                           "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5" );

   if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=800:nEventsMin=50:MaxDepth=2:BoostType=AdaBoost:AdaBoostBeta=1:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:NNodesMax=5" );


   if (Use["BDTB"]) // Bagging
      factory->BookMethod( TMVA::Types::kBDT, "BDTB",
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   if (Use["BDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTD",
                           "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );

   if (Use["BDTF"])  // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables
      factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher",
                           "!H:!V:NTrees=50:nEventsMin=150:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   // RuleFit -- TMVA implementation of Friedman's method
   if (Use["RuleFit"])
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                           "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

   // For an example of the category classifier usage, see: TMVAClassificationCategory

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

   // factory->OptimizeAllMethods("SigEffAt001","Scan");
   // factory->OptimizeAllMethods("ROCIntegral","GA");

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   std::cout << "Training all methods" << std::endl;
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   std::cout << "Testing all methods" << std::endl;
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   std::cout << "Evaluating all methods" << std::endl;
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
示例#19
0
int main( int argc, char* argv[] ) {


    std::cout << std::endl << std::endl;
    std::cout << "------------------------------------------------------" << std::endl;
    std::cout << "|                                                    |" << std::endl;
    std::cout << "|                                                    |" << std::endl;
    std::cout << "|               Running runZGAnalysis                |" << std::endl;
    std::cout << "|                                                    |" << std::endl;
    std::cout << "|                                                    |" << std::endl;
    std::cout << "------------------------------------------------------" << std::endl;
    std::cout << std::endl << std::endl;




    if( argc<2 ) {
        std::cout << "USAGE: ./runZGAnalysis [configFileName] [data/MC]" << std::endl;
        std::cout << "Exiting." << std::endl;
        exit(11);
    }


    std::string configFileName(argv[1]);
    ZGConfig cfg(configFileName);



    bool onlyData = false;
    bool onlyMC   = false;
    bool onlySignal = false;
    bool noSignals = false;
    if( argc > 2 ) {

        std::string dataMC(argv[2]);
        if( dataMC=="data" ) onlyData = true;
        else if( dataMC=="MC" || dataMC=="mc" ) onlyMC = true;
        else if( dataMC=="mcbg" || dataMC=="mcBG" || dataMC=="MCBG" || dataMC=="mc_bg" || dataMC=="MC_BG" ) {
            onlyMC = true;
            noSignals = true;
        } else if( dataMC=="signal" ) onlySignal = true;
        else {
            std::cout << "-> You passed a second argument that isn't 'data', nor 'MC', nor 'signal', so I don't know what to do about it." << std::endl;
        }

    } else {

        std::cout << "-> Will run on both data and MC." << std::endl;

    }


    if( onlyMC ) {
        std::cout << "-> Will run only on MC." << std::endl;
        if( noSignals ) {
            std::cout << "-> Will skip signal." << std::endl;
        }
    }

    if( onlyData ) {
        std::cout << "-> Will run only on data." << std::endl;
    }


    std::string outputdir = cfg.getEventYieldDir();
    system(Form("mkdir -p %s", outputdir.c_str()));


    std::string outfileName(Form("%s/trees_tmp.root", outputdir.c_str()));

    TFile* outfile = TFile::Open(outfileName.c_str(), "update");
    outfile->cd();





    if( !onlyData && !onlySignal ) { // run on MC


        std::string samplesFileName = "../samples/samples_" + cfg.mcSamples() + ".dat";
        std::cout << std::endl << std::endl;
        std::cout << "-> Loading samples from file: " << samplesFileName << std::endl;


        std::vector<ZGSample> fSamples = ZGSample::loadSamples(samplesFileName);
        //std::vector<ZGSample> fSamples = ZGSample::loadSamples(samplesFileName, 100, 999);
        if( fSamples.size()==0 ) {
            std::cout << "There must be an error: samples is empty!" << std::endl;
            exit(120);
        }


        //addTreeToFile( outfile, "zg", fSamples, cfg);
        addTreeToFile( outfile, "zg", fSamples, cfg, 851, 852 );
        addTreeToFile( outfile, "dy", fSamples, cfg, 700, 710 );
        //addTreeToFile( outfile, "top", fSamples, cfg, 300, 499 ); // irrelevant


        std::cout << "-> Done looping on MC samples." << std::endl;


    } // if MC samples



    // load signal samples, if any
    if( cfg.mcSamples()!="" && cfg.additionalStuff()!="noSignals" && !noSignals && !onlyData ) {

        std::string samplesFileName = "../samples/samples_" + cfg.mcSamples() + ".dat";
        std::cout << std::endl << std::endl;
        std::cout << "-> Loading signal samples from file: " << samplesFileName << std::endl;

        std::vector<ZGSample> fSamples = ZGSample::loadSamples(samplesFileName, 1000); // only signal (id>=1000)


        if( fSamples.size()==0 ) {

            std::cout << "No signal samples found, skipping." << std::endl;

        } else {

            for( unsigned i=0; i<fSamples.size(); ++i )
                addTreeToFile( outfile, fSamples[i], cfg );

        } // if samples != 0

    } // if mc samples



    if( !(cfg.dummyAnalysis()) && cfg.dataSamples()!="" && !onlyMC  && !onlySignal ) {


        std::string samplesFile_data = "../samples/samples_" + cfg.dataSamples() + ".dat";

        std::cout << std::endl << std::endl;
        std::cout << "-> Loading data from file: " << samplesFile_data << std::endl;

        std::vector<ZGSample> samples_data = ZGSample::loadSamples(samplesFile_data);
        if( samples_data.size()==0 ) {
            std::cout << "There must be an error: samples_data is empty!" << std::endl;
            exit(1209);
        }


        addTreeToFile( outfile, "data" , samples_data, cfg );

        std::cout << "-> Done looping on data." << std::endl;

    }


    outfile->Close();

    std::string finalFileName = outputdir + "/trees.root";
    system( Form("cp %s %s", outfileName.c_str(), finalFileName.c_str()) );

    std::cout << "-> Wrote trees to file: " << finalFileName << std::endl;



    return 0;

}
示例#20
0
void TMVAClassificationHwwNtuple( TString myMethodList = "" )
{
   // This loads the library
   TMVA::Tools::Instance();

    gROOT->ProcessLine(".L TMVAGui.C");


   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   // --- Cut optimisation
   Use["Cuts"]            = 1;
   Use["CutsD"]           = 0;
   Use["CutsPCA"]         = 0;
   Use["CutsGA"]          = 0;
   Use["CutsSA"]          = 0;
   // 
   Use["BDT"]             = 1; // uses Adaptive Boost
   Use["BDTG"]            = 0; // uses Gradient Boost
   Use["BDTB"]            = 0; // uses Bagging
   Use["BDTD"]            = 0; // decorrelation + Adaptive Boost
   Use["BDTF"]            = 0; // allow usage of fisher discriminant for node splitting 
   // 
   // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
   Use["RuleFit"]         = 0;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;
   // Select methods (don't look at this code - not of interest)
   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 1;
	 cout<<regMethod<<" is on"<<endl;
      }
   }
   // -------------------------------------------------------------------------

   // --- Here the preparation phase begins

   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   TString outfileName( "TMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // For one variable
   //TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
   //                                            "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification" );
   // For Multiple Variables
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );
   //factory->AddVariable( "pt1",                "LeadLepton pt", "", 'F' );
   //factory->AddVariable( "pt2",                "TailLepton pt", "", 'F' );
   factory->AddVariable( "pfmet",                "MissingEt", "", 'F' );
   factory->AddVariable( "mpmet",              "Minimum Proj. Met", "", 'F' );
   factory->AddVariable( "dphill",             "DeltPhiOfLepLep", "", 'F' );
   //factory->AddVariable( "mll",                "DiLepton Mass", "", 'F' );
   factory->AddVariable( "ptll",               "DiLepton pt", "", 'F' );
   //
   // You can add so-called "Spectator variables", which are not used in the MVA training,
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
   // input variables, the response values of all trained MVAs, and the spectator variables
   //factory->AddSpectator( "spec1 := var1*2",  "Spectator 1", "units", 'F' );
   //factory->AddSpectator( "spec2 := var1*3",  "Spectator 2", "units", 'F' );
   //
   //factory->AddSpectator( "mWW",                "Higgs Mass", "", 'F' );
   factory->AddSpectator( "pt1",                "LeadLepton pt", "", 'F' );
   factory->AddSpectator( "pt2",                "TailLepton pt", "", 'F' );
   factory->AddSpectator( "pfmet",                "MissingEt", "", 'F' );
   factory->AddSpectator( "mpmet",              "Minimum Proj. Met", "", 'F' );
   factory->AddSpectator( "dphill",             "DeltPhiOfLepLep", "", 'F' );
   factory->AddSpectator( "mll",                "DiLepton Mass", "", 'F' );
   factory->AddSpectator( "ptll",               "DiLepton pt", "", 'F' );
   // Read training and test data
   // (it is also possible to use ASCII format as input -> see TMVA Users Guide)
   //TString fname = "./tmva_class_example.root";
   //TString fname = "/afs/cern.ch/work/s/salee/private/HWWwidth/HWW/GGVvAnalyzer/MkNtuple/Hw1Int8TeV/MkNtuple.root";
   //TString fname = "/terranova_0/HWWwidth/HWW/GGVvAnalyzer/MkNtuple/Hw1Int8TeV/MkNtuple.root";
   
   //if (gSystem->AccessPathName( fname ))  // file does not exist in local directory
    // exit(-1);
      //gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root");
   
   //TFile *input = TFile::Open( fname );
   //TFile *SB_OnPeak = TFile::Open("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntOnPeak_8TeV.root");
   //TTree *SB_OnPeak_Tree = (TTree*)SB_OnPeak->Get("latino");
   
   TChain *S_Chain = new TChain("latino");
   TChain *C_Chain = new TChain("latino");
   TChain *SCI_Chain = new TChain("latino");
   TChain *qqWW_Chain = new TChain("latino");

   S_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_SigOnPeak_8TeV.root");
   S_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_SigShoulder_8TeV.root");
   S_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_SigTail_8TeV.root");
   SCI_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntOnPeak_8TeV.root");
   SCI_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntShoulder_8TeV.root");
   SCI_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntTail_8TeV.root");
   C_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw25_CotHead_8TeV.root");
   C_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw25_CotTail_8TeV.root");

   qqWW_Chain->Add("/afs/cern.ch/user/m/maiko/work/public/Tree/tree_skim_wwmin/nominals/latino_000_WWJets2LMad.root");
   
   // --- Register the training and test trees

   // You can add an arbitrary number of signal or background trees
   factory->AddSignalTree    ( S_Chain  );
   factory->AddBackgroundTree( qqWW_Chain );
   factory->AddBackgroundTree( C_Chain );
   // Classification training and test data in ROOT tree format with signal and background events being located in the same tree
   //factory->SetInputTrees(SCI_Chain, GenOffCut, GenOnCut);
   
   // To give different trees for training and testing, do as follows:
   //    factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" );
   //    factory->AddSignalTree( signalTestTree,     signalTestWeight,  "Test" );
   
   factory->SetWeightExpression          ("2.1*puW*baseW*effW*triggW*19.468");
   //factory->SetSignalWeightExpression    ("2.1*puW*baseW*effW*triggW*19.468");
   //factory->SetBackgroundWeightExpression("puW*baseW*effW*triggW*19.468");

   //factory->PrepareTrainingAndTestTree( ChanCommOff,
   //                                     "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=None:!V" );
                                        //"nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V";
   factory->PrepareTrainingAndTestTree( ChanCommOff0J,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=None:!V" );
   // ---- Book MVA methods
   //
   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:V:NTrees=850:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );
                           //"!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );

   // For an example of the category classifier usage, see: TMVAClassificationCategory

   // -----------------------------------------------------------------------------------------

   // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

   // ---- STILL EXPERIMENTAL and only implemented for BDT's ! 
   // factory->OptimizeAllMethods("SigEffAt001","Scan");
   // factory->OptimizeAllMethods("ROCIntegral","FitGA");

   // -----------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
   //if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
示例#21
0
void TMVAClassificationElecTau(std::string ordering_ = "Pt", std::string bkg_ = "qqH115vsWZttQCD") {

    TMVA::Tools::Instance();

    TString outfileName( "TMVAElecTau"+ordering_+"Ord_"+bkg_+".root" );
    TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

    TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationElecTau"+ordering_+"Ord_"+bkg_, outputFile,
            "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" );
    factory->AddVariable( "pt1", "pT-tag1", "GeV/c"         , 'F'  );
    factory->AddVariable( "pt2", "pT-tag2", "GeV/c"         , 'F'  );
    factory->AddVariable( "Deta","|y-tag1 - y-tag2|",""     , 'F'  );
    //factory->AddVariable( "opposite:=abs(eta1*eta2)/eta1/eta2","sign1*sign2",""             , 'F'  );
    //factory->AddVariable( "Dphi", "#Delta#phi" ,""             , 'F'  );
    factory->AddVariable( "Mjj", "M(tag1,tag2)", "GeV/c^{2}"  , 'F'  );

    factory->AddSpectator( "eta1",  "#eta_{tag1}" , 'F' );
    factory->AddSpectator( "eta2",  "#eta_{tag2}" , 'F' );

    factory->SetWeightExpression( "sampleWeight" );

    TString fSignalName              = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleVBFH115-powheg-PUS1_Open_ElecTauStream.root";
    TString fBackgroundNameDYJets    = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleZjets-alpgen-PUS1_Open_ElecTauStream.root";
    TString fBackgroundNameWJets     = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleWJets-madgraph-PUS1_Open_ElecTauStream.root";
    TString fBackgroundNameQCD       = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleQCD_Open_ElecTauStream.root";
    TString fBackgroundNameTTbar     = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleTTJets-madgraph-PUS1_Open_ElecTauStream.root";


    TFile *fSignal(0);
    TFile *fBackgroundDYJets(0);
    TFile *fBackgroundWJets(0);
    TFile *fBackgroundQCD(0);
    TFile *fBackgroundTTbar(0);

    fSignal           = TFile::Open( fSignalName );
    fBackgroundDYJets = TFile::Open( fBackgroundNameDYJets );
    fBackgroundWJets  = TFile::Open( fBackgroundNameWJets );
    fBackgroundQCD    = TFile::Open( fBackgroundNameQCD );
    fBackgroundTTbar  = TFile::Open( fBackgroundNameTTbar );

    if(!fSignal || !fBackgroundDYJets || !fBackgroundWJets || !fBackgroundQCD || !fBackgroundTTbar) {
        std::cout << "ERROR: could not open files" << std::endl;
        exit(1);
    }

    TString tree = "outTree"+ordering_+"Ord";

    TCut mycuts = "";
    TCut mycutb = "";

    TCut cutA  = "pt1>0 && tightestHPSWP>0";
    TCut cutB  = "pt1>0 && combRelIsoLeg1<0.1";
    TCut cutBl = "pt1>0 && combRelIsoLeg1<0.3";
    TCut cutC  = "pt1>0 && diTauCharge==0";
    TCut cutD  = "pt1>0 && MtLeg1<40";

    // select events for training
    TFile* dummy = new TFile("dummy.root","RECREATE");
    TH1F* allEvents = new TH1F("allEvents","",1,-10,10);
    float totalEvents, cutEvents;

    // signal: all
    TTree *signal           = ((TTree*)(fSignal->Get(tree)))->CopyTree(cutA&&cutB&&cutC&&cutD);
    cout << "Copied signal tree with full selection: " << ((TTree*)(fSignal->Get(tree)))->GetEntries() << " --> "  << signal->GetEntries()  << endl;
    allEvents->Reset();
    signal->Draw("eta1>>allEvents","sampleWeight");
    cutEvents  = allEvents->Integral();
    Double_t signalWeight =   1.0;
    cout << "Signal: expected yield " << cutEvents << " -- weight " << signalWeight << endl;

    // Z+jets: all
    TTree *backgroundDYJets = ((TTree*)(fBackgroundDYJets->Get(tree)))->CopyTree(cutA&&cutB&&cutC&&cutD);
    cout << "Copied DYJets tree with full selection: " << ((TTree*)(fBackgroundDYJets->Get(tree)))->GetEntries() << " --> "  << backgroundDYJets->GetEntries()  << endl;
    allEvents->Reset();
    backgroundDYJets->Draw("eta1>>allEvents","sampleWeight");
    cutEvents  = allEvents->Integral();
    Double_t backgroundDYJetsWeight = 1.0;
    cout << "ZJets: expected yield " << cutEvents << " -- weight " << backgroundDYJetsWeight << endl;

    // W+jets: iso+Mt
    TTree *backgroundWJets  = ((TTree*)(fBackgroundWJets->Get(tree)))->CopyTree(cutB&&cutD);
    cout << "Copied WJets tree with iso+Mt selection: " << ((TTree*)(fBackgroundWJets->Get(tree)))->GetEntries() << " --> "  << backgroundWJets->GetEntries()  << endl;
    allEvents->Reset();
    backgroundWJets->Draw("eta1>>allEvents","sampleWeight");
    totalEvents  = allEvents->Integral();
    allEvents->Reset();
    backgroundWJets->Draw("eta1>>allEvents","sampleWeight*(tightestHPSWP>0 && diTauCharge==0)");
    cutEvents  = allEvents->Integral();
    Double_t backgroundWJetsWeight  =  cutEvents / totalEvents;
    cout << "WJets: expected yield " << cutEvents  << " -- weight " << backgroundWJetsWeight << endl;

    // QCD: Mt+loose iso
    TTree *backgroundQCD    = ((TTree*)(fBackgroundQCD->Get(tree)))->CopyTree(cutD&&cutBl);
    cout << "Copied QCD tree with Mt selection: " << ((TTree*)(fBackgroundQCD->Get(tree)))->GetEntries() << " --> "  << backgroundQCD->GetEntries()  << endl;
    allEvents->Reset();
    backgroundQCD->Draw("eta1>>allEvents","sampleWeight");
    totalEvents  = allEvents->Integral();
    allEvents->Reset();
    backgroundQCD->Draw("eta1>>allEvents","sampleWeight*(tightestHPSWP>0 && diTauCharge==0 && combRelIsoLeg1<0.1)");
    cutEvents  = allEvents->Integral();
    Double_t backgroundQCDWeight  =  cutEvents / totalEvents;
    cout << "QCD: expected yield " << cutEvents  << " -- weight "  << backgroundQCDWeight << endl;


    // TTbar: iso+Mt
    TTree *backgroundTTbar  = ((TTree*)(fBackgroundTTbar->Get(tree)))->CopyTree(cutB&&cutD);
    cout << "Copied TTbar tree with iso+Mt selection: " << ((TTree*)(fBackgroundTTbar->Get(tree)))->GetEntries() << " --> "  << backgroundTTbar->GetEntries()  << endl;
    allEvents->Reset();
    backgroundTTbar->Draw("eta1>>allEvents","sampleWeight");
    totalEvents  = allEvents->Integral();
    allEvents->Reset();
    backgroundTTbar->Draw("eta1>>allEvents","sampleWeight*(tightestHPSWP>0 && diTauCharge==0)");
    cutEvents  = allEvents->Integral();
    Double_t backgroundTTbarWeight  =  cutEvents / totalEvents;
    cout << "TTbar: expected yield "  << cutEvents  << " -- weight " << backgroundTTbarWeight << endl;


    delete allEvents;


    factory->AddSignalTree    ( signal,           signalWeight           );
    //factory->AddBackgroundTree( backgroundDYJets, backgroundDYJetsWeight );
    //factory->AddBackgroundTree( backgroundWJets,  backgroundWJetsWeight  );
    factory->AddBackgroundTree( backgroundQCD,    backgroundQCDWeight    );
    //factory->AddBackgroundTree( backgroundTTbar,  backgroundTTbarWeight  );


    factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                         "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=1:nTest_Background=1:SplitMode=Random:NormMode=NumEvents:!V" );

    factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                         "!H:!V:FitMethod=GA:EffSel:CutRangeMin[0]=25.:CutRangeMax[0]=999:CutRangeMin[1]=25.:CutRangeMax[1]=999.:CutRangeMin[2]=1.0:CutRangeMax[2]=9.:CutRangeMin[3]=100:CutRangeMax[3]=7000:VarProp=FSmart" );

    /*
    factory->BookMethod( TMVA::Types::kBDT, "BDT",
    	       "!H:!V:NTrees=200:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );
    */

    factory->TrainAllMethods();

    factory->TestAllMethods();

    factory->EvaluateAllMethods();

    outputFile->Close();

    std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
    std::cout << "==> TMVAClassification is done!" << std::endl;

    delete factory;

    //if (!gROOT->IsBatch()) TMVAGui( outfileName );

}
void TMVAClassificationCategory() 
{
   //---------------------------------------------------------------

   std::cout << std::endl << "==> Start TMVAClassificationCategory" << std::endl;

   bool batchMode(false);

   // Create a new root output file.
   TString outfileName( "TMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory will
   // then run the performance analysis for you.
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/ 
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in 
   // front of the "Silent" argument in the option string
   std::string factoryOptions( "!V:!Silent:Transformations=I;D;P;G,D" );
   if (batchMode) factoryOptions += ":!Color:!DrawProgressBar";

   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationCategory", outputFile, factoryOptions );

   // If you wish to modify default settings 
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
   factory->AddVariable( "var1", 'F' );
   factory->AddVariable( "var2", 'F' );
   factory->AddVariable( "var3", 'F' );
   factory->AddVariable( "var4", 'F' );

   // You can add so-called "Spectator variables", which are not used in the MVA training, 
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
   // input variables, the response values of all trained MVAs, and the spectator variables
   factory->AddSpectator( "eta" );

   // load the signal and background event samples from ROOT trees
   TFile *input(0);
   TString fname( "" );
   if (UseOffsetMethod) fname = "../execs/data/toy_sigbkg_categ_offset.root";
   else                 fname = "../execs/data/toy_sigbkg_categ_varoff.root";
   if (!gSystem->AccessPathName( fname )) {
      // first we try to find tmva_example.root in the local directory
      std::cout << "--- TMVAClassificationCategory: Accessing " << fname << std::endl;
      input = TFile::Open( fname );
   } 

   if (!input) {
      std::cout << "ERROR: could not open data file: " << fname << std::endl;
      exit(1);
   }

   TTree *signal     = (TTree*)input->Get("TreeS");
   TTree *background = (TTree*)input->Get("TreeB");

   /// global event weights per tree (see below for setting event-wise weights)
   Double_t signalWeight     = 1.0;
   Double_t backgroundWeight = 1.0;
   
   /// you can add an arbitrary number of signal or background trees
   factory->AddSignalTree    ( signal,     signalWeight     );
   factory->AddBackgroundTree( background, backgroundWeight );
   
   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5";

   // tell the factory to use all remaining events in the trees after training for testing:
   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

   // Fisher discriminant   
   factory->BookMethod( TMVA::Types::kFisher, "Fisher", "!H:!V:Fisher" );

   // Likelihood
   factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", 
                        "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); 

   // Categorised classifier
   TMVA::MethodCategory* mcat = 0;
   
   // the variable sets
   TString theCat1Vars = "var1:var2:var3:var4";
   TString theCat2Vars = (UseOffsetMethod ? "var1:var2:var3:var4" : "var1:var2:var3");

   // the Fisher 
   TMVA::MethodBase* fiCat = factory->BookMethod( TMVA::Types::kCategory, "FisherCat","" );
   mcat = dynamic_cast<TMVA::MethodCategory*>(fiCat);
   mcat->AddMethod("abs(eta)<=1.3",theCat1Vars, TMVA::Types::kFisher,"Category_Fisher_1","!H:!V:Fisher");
   mcat->AddMethod("abs(eta)>1.3", theCat2Vars, TMVA::Types::kFisher,"Category_Fisher_2","!H:!V:Fisher");

   // the Likelihood
   TMVA::MethodBase* liCat = factory->BookMethod( TMVA::Types::kCategory, "LikelihoodCat","" );
   mcat = dynamic_cast<TMVA::MethodCategory*>(liCat);
   mcat->AddMethod("abs(eta)<=1.3",theCat1Vars, TMVA::Types::kLikelihood,"Category_Likelihood_1","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50");
   mcat->AddMethod("abs(eta)>1.3", theCat2Vars, TMVA::Types::kLikelihood,"Category_Likelihood_2","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50");

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();    

   // --------------------------------------------------------------
   
   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassificationCategory is done!" << std::endl;      

   // Clean up
   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
// Analysis_Lifetime::Setup()
Analysis::RetType Analysis_Lifetime::Setup(ArgList& analyzeArgs, DataSetList* datasetlist,
                            TopologyList* PFLin, DataFileList* DFLin, int debugIn)
{
  // Get Keywords
  FileName outfileName( analyzeArgs.GetStringKey("out") );
  std::string setname = analyzeArgs.GetStringKey("name");
  bool sortSets = (!analyzeArgs.hasKey("nosort"));
  windowSize_ = analyzeArgs.getKeyInt("window", -1);
  averageonly_ = analyzeArgs.hasKey("averageonly");
  cumulative_ = analyzeArgs.hasKey("cumulative");
  deltaAvg_ = analyzeArgs.hasKey("delta");
  cut_ = analyzeArgs.getKeyDouble("cut", 0.5);
  fuzzCut_ = analyzeArgs.getKeyInt("fuzz", -1);
  if (fuzzCut_ < 1) fuzzCut_ = -1;
  normalizeCurves_ = !analyzeArgs.hasKey("rawcurve");
  if (analyzeArgs.hasKey("greater"))
    Compare_ = Compare_GreaterThan;
  else if (analyzeArgs.hasKey("less"))
    Compare_ = Compare_LessThan;
  else
    Compare_ = Compare_GreaterThan;
  // Select datasets from remaining args
  if (inputDsets_.AddSetsFromArgs( analyzeArgs.RemainingArgs(), *datasetlist )) {
    mprinterr("Error: lifetime: Could not add data sets.\n");
    return Analysis::ERR;
  }
  // Sort data sets
  if (sortSets) inputDsets_.SortArray1D(); 

  // Create output datasets
  DataFile* outfile = 0;
  DataFile* maxfile = 0;
  DataFile* avgfile = 0;
  if (setname.empty())
    setname = datasetlist->GenerateDefaultName( "lifetime" );
  if ( windowSize_ != -1) {
    outfile = DFLin->AddDataFile(outfileName, analyzeArgs);
    if (!averageonly_ && outfile != 0) {
      maxfile = DFLin->AddDataFile(outfileName.DirPrefix() + "max." + 
                                   outfileName.Base(), analyzeArgs);
      avgfile = DFLin->AddDataFile(outfileName.DirPrefix() + "avg." + 
                                   outfileName.Base(), analyzeArgs);
    }
    int didx = 0;
    for (Array1D::const_iterator set = inputDsets_.begin(); set != inputDsets_.end(); ++set)
    {
      MetaData md(setname, didx);
      md.SetLegend( (*set)->Meta().Legend() );
      DataSet_1D* outSet = (DataSet_1D*)datasetlist->AddSet( DataSet::FLOAT, md );
      if (CheckDsetError(outSet, "output", (*set)->legend())) 
        return Analysis::ERR;
      outputDsets_.push_back( outSet );
      if (outfile != 0) outfile->AddDataSet( outSet );
      if (!averageonly_) {
        // MAX
        md.SetAspect("max");
        outSet = (DataSet_1D*)datasetlist->AddSet(DataSet::INTEGER, md);
        if (CheckDsetError(outSet, "lifetime max", (*set)->legend()))
          return Analysis::ERR;
        maxDsets_.push_back( outSet );
        if (maxfile != 0) maxfile->AddDataSet( outSet );
        // AVG
        md.SetAspect("avg");
        outSet = (DataSet_1D*)datasetlist->AddSet(DataSet::FLOAT, md);
        if (CheckDsetError(outSet, "lifetime avg", (*set)->legend()))
          return Analysis::ERR;
        avgDsets_.push_back( outSet );
        if (avgfile != 0) avgfile->AddDataSet( outSet );
      }
      ++didx;
    }
    // Set step to window size.
    std::string fileArgs = "xstep " + integerToString( windowSize_ ); 
    if (outfile != 0) outfile->ProcessArgs( fileArgs );
    if (maxfile != 0) maxfile->ProcessArgs( fileArgs );
    if (avgfile != 0) avgfile->ProcessArgs( fileArgs );
  }
  // Lifetime curves
  DataFile* crvfile = 0;
  if (!averageonly_) {
    if (!outfileName.empty()) {
      crvfile = DFLin->AddDataFile(outfileName.DirPrefix() + "crv." + 
                                   outfileName.Base(), analyzeArgs);
    }
    MetaData md(setname, "curve");
    for (int didx = 0; didx != (int)inputDsets_.size(); didx++)
    {
      md.SetIdx(didx);
      DataSet_1D* outSet = (DataSet_1D*)datasetlist->AddSet(DataSet::DOUBLE, md);
      if (CheckDsetError(outSet, "lifetime curve", inputDsets_[didx]->legend()))
        return Analysis::ERR;
      curveSets_.push_back( outSet );
      if (crvfile != 0) crvfile->AddDataSet( outSet );
    }
  }
  // Non-window output file
  if (!averageonly_ && windowSize_ == -1) {
    standalone_ = DFLin->AddCpptrajFile( outfileName, "Lifetimes", DataFileList::TEXT, true );
    if (standalone_ == 0) return Analysis::ERR;
  } else
    standalone_ = 0; 

  if (!averageonly_)
    mprintf("    LIFETIME: Calculating average lifetime using a cutoff of %f", cut_);
  else
    mprintf("    LIFETIME: Calculating only averages");
  mprintf(" of data in %i sets\n", inputDsets_.size());
  if (!sortSets) mprintf("\tInput data sets will not be sorted.\n");
  if (debugIn > 0)
    for (Array1D::const_iterator set = inputDsets_.begin(); set != inputDsets_.end(); ++set)
      mprintf("\t%s\n", (*set)->legend());
  if (Compare_ == Compare_GreaterThan) 
    mprintf("\tValues greater than %f are considered present.\n", cut_);
  else
    mprintf("\tValues less than %f are considered present.\n", cut_);
  if (windowSize_ != -1) {
    mprintf("\tAverage of data over windows will be saved to sets named %s\n",
            setname.c_str());
    mprintf("\tWindow size for averaging: %i\n", windowSize_);
    if (cumulative_)
      mprintf("\tCumulative averages will be saved.\n");
    if (deltaAvg_)
      mprintf("\tChange of average from previous average will be saved.\n");
  }
  if (outfile != 0) {
    mprintf("\tOutfile: %s", outfile->DataFilename().full());
    if (!averageonly_ && outfile != 0)
      mprintf(", %s, %s", maxfile->DataFilename().base(), avgfile->DataFilename().base());
    mprintf("\n");
  }
  if (!averageonly_) {
    if (crvfile != 0)
      mprintf("\tLifetime curves output: %s\n", crvfile->DataFilename().base());
    if (normalizeCurves_)
      mprintf("\tLifetime curves will be normalized.\n");
    else
      mprintf("\tLifetime curves will not be normalized.\n");
  }
  if (fuzzCut_ != -1)
    mprintf("\tFuzz value of %i frames will be used.\n", fuzzCut_);
  return Analysis::OK;
}
void TMVAClassification( TString myMethodList = "" )
{
   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //
   // if you like to use a method via the plugin mechanism, we recommend using
   //
   // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)
   // (an example is given for using the BDT as plugin (see below),
   // but of course the real application is when you write your own
   // method based)

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // to get access to the GUI and all tmva macros
   //TString thisdir = gSystem->DirName(gInterpreter->GetCurrentMacroName());
   //gROOT->SetMacroPath(thisdir + ":" + gROOT->GetMacroPath());
   //gROOT->ProcessLine(".L TMVAGui.C");


   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   // --- Cut optimisation
   Use["Cuts"]            = 0;
   Use["CutsD"]           = 0;
   Use["CutsPCA"]         = 0;
   Use["CutsGA"]          = 0;
   Use["CutsSA"]          = 0;
   //
   // --- 1-dimensional likelihood ("naive Bayes estimator")
   Use["Likelihood"]      = 0;
   Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
   Use["LikelihoodPCA"]   = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
   Use["LikelihoodKDE"]   = 0;
   Use["LikelihoodMIX"]   = 0;
   //
   // --- Mutidimensional likelihood and Nearest-Neighbour methods
   Use["PDERS"]           = 0;
   Use["PDERSD"]          = 0;
   Use["PDERSPCA"]        = 0;
   Use["PDEFoam"]         = 0;
   Use["PDEFoamBoost"]    = 0; // uses generalised MVA method boosting
   Use["KNN"]             = 0; // k-nearest neighbour method
   //
   // --- Linear Discriminant Analysis
   Use["LD"]              = 1; // Linear Discriminant identical to Fisher
   Use["Fisher"]          = 0;
   Use["FisherG"]         = 0;
   Use["BoostedFisher"]   = 0; // uses generalised MVA method boosting
   Use["HMatrix"]         = 0;
   //
   // --- Function Discriminant analysis
   Use["FDA_GA"]          = 0; // minimisation of user-defined function using Genetics Algorithm
   Use["FDA_SA"]          = 0;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   Use["FDA_MCMT"]        = 0;
   //
   // --- Neural Networks (all are feed-forward Multilayer Perceptrons)
   Use["MLP"]             = 0; // Recommended ANN
   Use["MLPBFGS"]         = 0; // Recommended ANN with optional training method
   Use["MLPBNN"]          = 0; // Recommended ANN with BFGS training method and bayesian regulator
   Use["CFMlpANN"]        = 0; // Depreciated ANN from ALEPH
   Use["TMlpANN"]         = 0; // ROOT's own ANN
   //
   // --- Support Vector Machine
   Use["SVM"]             = 0;
   //
   // --- Boosted Decision Trees
   Use["BDT"]             = 0; // uses Adaptive Boost
   Use["BDTG"]            = 0; // uses Gradient Boost
   Use["BDTB"]            = 0; // uses Bagging
   Use["BDTD"]            = 0; // decorrelation + Adaptive Boost
   Use["BDTF"]            = 0; // allow usage of fisher discriminant for node splitting
   //
   // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
   Use["RuleFit"]         = 0;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;

   // Select methods (don't look at this code - not of interest)
   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 1;
      }
   }

   // --------------------------------------------------------------------------------------------------

   // --- Here the preparation phase begins

   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   TString outfileName( "TMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory is
   // the only TMVA object you have to interact with
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );

   // If you wish to modify default settings
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
   // factory->AddVariable( "myvar1 := var1+var2", 'F' );
   // factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' );
   // factory->AddVariable( "var3",                "Variable 3", "units", 'F' );
   // factory->AddVariable( "var4",                "Variable 4", "units", 'F' );

   factory->AddVariable( "pho_ecalClusterIsoR4", "pho_ecalClusterIsoR4", "units", 'F' );
   factory->AddVariable( "pho_hcalRechitIsoR4", "pho_hcalRechitIsoR4", "units", 'F' );
   factory->AddVariable( "pho_trackIsoR4PtCut20", "pho_trackIsoR4PtCut20", "units", 'F' );
   factory->AddVariable( "phoHoverE", "phoHoverE", "units", 'F' );
   factory->AddVariable( "phoSigmaIEtaIEta_2012", "phoSigmaIEtaIEta_2012", "units", 'F' );

   // You can add so-called "Spectator variables", which are not used in the MVA training,
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
   // input variables, the response values of all trained MVAs, and the spectator variables
   // factory->AddSpectator( "spec1 := var1*2",  "Spectator 1", "units", 'F' );
   // factory->AddSpectator( "spec2 := var1*3",  "Spectator 2", "units", 'F' );

   // Read training and test data
   // (it is also possible to use ASCII format as input -> see TMVA Users Guide)
   // TString fname = "./tmva_class_example.root";
   TString fname = "/net/hisrv0001/home/juliusbl/alex/cut/cutTree.root";

   if (gSystem->AccessPathName( fname ))  // file does not exist in local directory
      gSystem->Exec("curl -O http://root.cern.ch/files/tmva_class_example.root");

   TFile *input = TFile::Open( fname );

   std::cout << "--- TMVAClassification       : Using input file: " << input->GetName() << std::endl;

   // --- Register the training and test trees

   TTree *signal     = (TTree*)input->Get("cutT");
   TTree *background = (TTree*)input->Get("cutT");

   // global event weights per tree (see below for setting event-wise weights)
   Double_t signalWeight     = 1.0;
   Double_t backgroundWeight = 1.0;

   // You can add an arbitrary number of signal or background trees
   factory->AddSignalTree    ( signal,     signalWeight     );
   factory->AddBackgroundTree( background, backgroundWeight );

   // To give different trees for training and testing, do as follows:
   //    factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" );
   //    factory->AddSignalTree( signalTestTree,     signalTestWeight,  "Test" );

   // Use the following code instead of the above two or four lines to add signal and background
   // training and test events "by hand"
   // NOTE that in this case one should not give expressions (such as "var1+var2") in the input
   //      variable definition, but simply compute the expression before adding the event
   //
   //     // --- begin ----------------------------------------------------------
   //     std::vector<Double_t> vars( 4 ); // vector has size of number of input variables
   //     Float_t  treevars[4], weight;
   //
   //     // Signal
   //     for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
   //     for (UInt_t i=0; i<signal->GetEntries(); i++) {
   //        signal->GetEntry(i);
   //        for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
   //        // add training and test events; here: first half is training, second is testing
   //        // note that the weight can also be event-wise
   //        if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight );
   //        else                              factory->AddSignalTestEvent    ( vars, signalWeight );
   //     }
   //
   //     // Background (has event weights)
   //     background->SetBranchAddress( "weight", &weight );
   //     for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
   //     for (UInt_t i=0; i<background->GetEntries(); i++) {
   //        background->GetEntry(i);
   //        for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
   //        // add training and test events; here: first half is training, second is testing
   //        // note that the weight can also be event-wise
   //        if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight );
   //        else                                factory->AddBackgroundTestEvent    ( vars, backgroundWeight*weight );
   //     }
         // --- end ------------------------------------------------------------
   //
   // --- end of tree registration

   // Set individual event weights (the variables must exist in the original TTree)
   //    for signal    : factory->SetSignalWeightExpression    ("weight1*weight2");
   //    for background: factory->SetBackgroundWeightExpression("weight1*weight2");
   // factory->SetBackgroundWeightExpression( "weight" );

   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = "subid==0"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   TCut mycutb = "subid==1"; // for example: TCut mycutb = "abs(var1)<0.5";

   // Tell the factory how to use the training and testing events
   //
   // If no numbers of events are given, half of the events in the tree are used
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut,
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );
   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

   // ---- Book MVA methods
   //
   // Please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["CutsD"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsD",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

   if (Use["CutsPCA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

   if (Use["CutsGA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                           "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );

   if (Use["CutsSA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                           "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   // Likelihood ("naive Bayes estimator")
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                           "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

   // Decorrelated likelihood
   if (Use["LikelihoodD"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
                           "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

   // PCA-transformed likelihood
   if (Use["LikelihoodPCA"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" );

   // Use a kernel density estimator to approximate the PDFs
   if (Use["LikelihoodKDE"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" );

   // Use a variable-dependent mix of splines and kernel density estimator
   if (Use["LikelihoodMIX"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
                           "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" );

   // Test the multi-dimensional probability density estimator
   // here are the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSD"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

   if (Use["PDERSPCA"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

   // Multi-dimensional likelihood estimator using self-adapting phase-space binning
   if (Use["PDEFoam"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                           "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );

   if (Use["PDEFoamBoost"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost",
                           "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN",
                           "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // H-Matrix (chi2-squared) method
   if (Use["HMatrix"])
      factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" );

   // Linear discriminant (same as Fisher discriminant)
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher discriminant (same as LD)
   if (Use["Fisher"])
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher with Gauss-transformed input variables
   if (Use["FisherG"])
      factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

   // Composite classifier: ensemble (tree) of boosted Fisher classifiers
   if (Use["BoostedFisher"])
      factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher",
                           "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" );

   // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

   if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   if (Use["FDA_MT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   if (Use["FDA_MCMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

   // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );

   if (Use["MLPBFGS"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

   if (Use["MLPBNN"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators

   // CF(Clermont-Ferrand)ANN
   if (Use["CFMlpANN"])
      factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...

   // Tmlp(Root)ANN
   if (Use["TMlpANN"])
      factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDTG"]) // Gradient Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                           "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" );

   if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );

   if (Use["BDTB"]) // Bagging
      factory->BookMethod( TMVA::Types::kBDT, "BDTB",
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" );

   if (Use["BDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTD",
                           "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" );

   if (Use["BDTF"])  // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables
      factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher",
                           "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" );

   // RuleFit -- TMVA implementation of Friedman's method
   if (Use["RuleFit"])
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                           "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

   // For an example of the category classifier usage, see: TMVAClassificationCategory

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

   // ---- STILL EXPERIMENTAL and only implemented for BDT's !
   // factory->OptimizeAllMethods("SigEffAt001","Scan");
   // factory->OptimizeAllMethods("ROCIntegral","FitGA");

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
   //if (!gROOT->IsBatch())
     // gROOT->ProcessLine(TString::Format("TMVAGui(\"%s\")", outfileName.Data()));

   // efficiencies( TString fin = "TMVA.root", Int_t type = 2, Bool_t useTMVAStyle = kTRUE );
}
示例#25
0
void testBDT(){


   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();
/*
  TString outfileName( "TMVA.root" );
  TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

  TMVA::Factory *factory = new TMVA::Factory( "testBDT", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );


 
   // global event weights per tree (see below for setting event-wise weights)
   //Double_t signalWeight     = 0.003582;
   //Double_t backgroundWeight = 0.0269;
   
   Double_t signalWeight     = 1;
   Double_t backgroundWeight = 1;
   
   TFile *input_sig = TFile::Open( "signal_exclusif.root" );
   TFile *input_wz = TFile::Open( "bruit_w_z.root" );
   
   TTree *signal     = (TTree*)input_sig->Get("tree");
   TTree *background = (TTree*)input_wz->Get("tree");

   // You can add an arbitrary number of signal or background trees
   factory->AddSignalTree    ( signal,     signalWeight     );
   factory->AddBackgroundTree( background, backgroundWeight );
   
   
   factory->AddVariable("PT_z"   , 'F');
   factory->AddVariable("ASYM"    , 'F');
   factory->AddVariable("PHI_lw_b", 'F');
   factory->AddVariable("M_top", 'F');
   */
   
   
   
   TString outfileName( "bdtTMVA_FCNC_tZ.root" );
  TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

  TMVA::Factory *factory = new TMVA::Factory( "doBDT_FCNC_tZ", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );


 
   // global event weights per tree (see below for setting event-wise weights)
   //Double_t signalWeight     = 0.003582;
   //Double_t backgroundWeight = 0.0269;
   
   Double_t signalWeight     = 1;
   Double_t backgroundWeight = 1;
   
   TFile *input_sig = TFile::Open( "proof.root" );
   TFile *input_wz = TFile::Open( "proof.root" );
   
   TTree *signal     = (TTree*)input_sig->Get("Ttree_FCNCkut");
   
   
   TTree *background_WZ = (TTree*)input_wz->Get("Ttree_WZ");
   /*TTree *background_ZZ = (TTree*)input_wz->Get("Ttree_ZZ");
   TTree *background_WW = (TTree*)input_wz->Get("Ttree_WW");
   
   TTree *background_TTbar  = (TTree*)input_wz->Get("Ttree_TTbar");
   TTree *background_Zjets  = (TTree*)input_wz->Get("Ttree_Zjets");
   TTree *background_Wjets  = (TTree*)input_wz->Get("Ttree_Wjets");
   TTree *background_TtW    = (TTree*)input_wz->Get("Ttree_TtW");
   TTree *background_TbartW = (TTree*)input_wz->Get("Ttree_TbartW");*/

   // You can add an arbitrary number of signal or background trees
   factory->AddSignalTree    ( signal,            signalWeight     );
   factory->AddBackgroundTree( background_WZ,     backgroundWeight );
   /*factory->AddBackgroundTree( background_ZZ,     backgroundWeight );
   factory->AddBackgroundTree( background_WW,     backgroundWeight );
   factory->AddBackgroundTree( background_TTbar,  backgroundWeight );
   factory->AddBackgroundTree( background_Zjets,  backgroundWeight );
   factory->AddBackgroundTree( background_Wjets,  backgroundWeight );
   factory->AddBackgroundTree( background_TtW,    backgroundWeight );
   factory->AddBackgroundTree( background_TbartW, backgroundWeight );*/
   
   
   factory->AddVariable("tree_topMass",    'F');
   factory->AddVariable("tree_deltaPhilb", 'F');
   factory->AddVariable("tree_asym",       'F');
   factory->AddVariable("tree_Zpt",        'F');
   
   
   
   
   
   
   
   
   
   // to set weights. The variable must exist in the tree
   //    for signal    : factory->SetSignalWeightExpression    ("weight1*weight2");
   //    for background: factory->SetBackgroundWeightExpression("weight1*weight2");
   
   
   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5";

   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
   
   
   
   factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:nEventsMin=100:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );




   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );


}
示例#26
0
void TMVAClassification( TString fname = "./tmva_class_example.root")
{
   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //
   // if you like to use a method via the plugin mechanism, we recommend using
   //
   // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)
   // (an example is given for using the BDT as plugin (see below),
   // but of course the real application is when you write your own
   // method based)

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // to get access to the GUI and all tmva macros
   TString tmva_dir(TString(gRootDir) + "/tmva");
   if(gSystem->Getenv("TMVASYS"))
      tmva_dir = TString(gSystem->Getenv("TMVASYS"));
   gROOT->SetMacroPath(tmva_dir + "/test/:" + gROOT->GetMacroPath() );
   gROOT->ProcessLine(".L TMVAGui.C");

   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   
   Use["KNN"]             = 1; // k-nearest neighbour method
   
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;

  

   // --------------------------------------------------------------------------------------------------

   // --- Here the preparation phase begins

   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   TString outfileName( "TMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory is 
   // the only TMVA object you have to interact with
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification" );

   // If you wish to modify default settings
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
   factory->AddVariable( "pt_eH", 'D' );
   factory->AddVariable( "max(pt_jet_eH,pt_eH)", 'D' );
   factory->AddVariable( "njets", 'I' );
   
   // You can add so-called "Spectator variables", which are not used in the MVA training,
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
   // input variables, the response values of all trained MVAs, and the spectator variables
  /// factory->AddSpectator( "spec1 := var1*2",  "Spectator 1", "units", 'F' );
  /// factory->AddSpectator( "spec2 := var1*3",  "Spectator 2", "units", 'F' );

   // Read training and test data
   // (it is also possible to use ASCII format as input -> see TMVA Users Guide)
  
   TFile *input(0);
   if (gSystem->AccessPathName( fname )){  // file does not exist in local directory
      gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root");
	  fname = "./tmva_class_example.root";
  }else{
	input= TFile::Open( fname );
  }
  if (!input) {
     std::cout << "ERROR: could not open data file " << fname << std::endl;
     exit(1);
  }
   
   std::cout << "--- TMVAClassification       : Using input file: " << input->GetName() << std::endl;
   
   // --- Register the training and test trees

   TTree *inputTree     = (TTree*)input->Get("FakeTreeSig");
   TTree *background = (TTree*)input->Get("FakeTreeBG");
   
   // global event weights per tree (see below for setting event-wise weights)
   Double_t signalWeight     = 1.0;
   Double_t backgroundWeight = 1.0;
   
   // cuts for signal and background
   
   //~ TCut signalCut = "selected==1 && id_iso_eleH==1";
   //~ TCut backgroundCut = "selected==1 && id_iso_eleH==0";
   //~ 
   //~ std::cout << " THe signal cut is " << signalCut.GetTitle() << " bg cut is " << backgroundCut.GetTitle() << std::endl;
   
   Int_t num_pass    = inputTree->GetEntries();
   Int_t num_fail    = background->GetEntries();
   
   std::cout << num_pass << " " << num_fail << std::endl;
   
   // You can add an arbitrary number of signal or background trees
   
   factory->AddSignalTree    ( inputTree,     1.0     );
   factory->AddBackgroundTree( background, 1.0 );
 factory->SetWeightExpression( "weight" );

 //factory->SetInputTrees( inputTree, signalCut, backgroundCut );
   
   // To give different trees for training and testing, do as follows:
   //    factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" );
   //    factory->AddSignalTree( signalTestTree,     signalTestWeight,  "Test" );
   
   // Use the following code instead of the above two or four lines to add signal and background
   // training and test events "by hand"
   // NOTE that in this case one should not give expressions (such as "var1+var2") in the input
   //      variable definition, but simply compute the expression before adding the event
   //
   //     // --- begin ----------------------------------------------------------
   //     std::vector<Double_t> vars( 4 ); // vector has size of number of input variables
   //     Float_t  treevars[4], weight;
   //     
   //     // Signal
   //     for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
   //     for (UInt_t i=0; i<signal->GetEntries(); i++) {
   //        signal->GetEntry(i);
   //        for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
   //        // add training and test events; here: first half is training, second is testing
   //        // note that the weight can also be event-wise
   //        if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight );
   //        else                              factory->AddSignalTestEvent    ( vars, signalWeight );
   //     }
   //   
   //     // Background (has event weights)
   //     background->SetBranchAddress( "weight", &weight );
   //     for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
   //     for (UInt_t i=0; i<background->GetEntries(); i++) {
   //        background->GetEntry(i);
   //        for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
   //        // add training and test events; here: first half is training, second is testing
   //        // note that the weight can also be event-wise
   //        if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight );
   //        else                                factory->AddBackgroundTestEvent    ( vars, backgroundWeight*weight );
   //     }
         // --- end ------------------------------------------------------------
   //
   // --- end of tree registration 

   // Set individual event weights (the variables must exist in the original TTree)
   //    for signal    : factory->SetSignalWeightExpression    ("weight1*weight2");
   //    for background: factory->SetBackgroundWeightExpression("weight1*weight2");
   
   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = "selected==1"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   TCut mycutb = "selected==1"; // for example: TCut mycutb = "abs(var1)<0.5";

   // Tell the factory how to use the training and testing events
   //
   // If no numbers of events are given, half of the events in the tree are used 
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut,
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );
  
  std::stringstream indexes;  
  indexes.str("");
  indexes << "nTrain_Signal="  << num_pass << ":nTrain_Background=" << num_fail << ":SplitMode=Random:NormMode=None:!V"; 
  std::string input_opt=indexes.str();
  
  std::cout << "Options are " << input_opt << std::endl;
  factory->PrepareTrainingAndTestTree( mycuts, mycutb, input_opt);
                                        //"nTrain_Signal="+num_pass+":nTrain_Background="+num_fail+":SplitMode=Random:NormMode=None:!V" );

   // ---- Book MVA methods
   //
   // Please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
  
   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN",
                           "H:nkNN=50:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   
   // For an example of the category classifier usage, see: TMVAClassificationCategory

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

   // factory->OptimizeAllMethods("SigEffAt001","Scan");
   // factory->OptimizeAllMethods("ROCIntegral","GA");

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   //factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
  // factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
示例#27
0
TString autoencoder (std::string inputFileName) 
{

    std::string tmstr (now ());
    TString tmstmp (tmstr.c_str ());
   
  
    std::cout << "==> Start Autoencoder " << std::endl;
    std::cout << "-------------------- open input file ---------------- " << std::endl;
    TString fname = pathToData + TString (inputFileName.c_str ()) + TString (".root");
    TFile *input = TFile::Open( fname );

    std::cout << "-------------------- get tree ---------------- " << std::endl;
    TTree *tree     = (TTree*)input->Get("data");
   
    TString outfileName( "TMVAAutoEnc__" );
    outfileName += TString (inputFileName.c_str ()) + TString ("__") + tmstmp + TString (".root");

    std::cout << "-------------------- open output file ---------------- " << std::endl;
    TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

    std::cout << "-------------------- prepare factory ---------------- " << std::endl;
    TMVA::Factory *factory = new TMVA::Factory( "TMVAAutoencoder", outputFile,
						"AnalysisType=Regression:Color:DrawProgressBar" );
    std::cout << "-------------------- add variables ---------------- " << std::endl;


    for (auto varname : variableNames+additionalVariableNames)
    {
	factory->AddVariable (varname.c_str (), 'F');
	factory->AddTarget   (varname.c_str (), 'F');
    }



    std::cout << "-------------------- add tree ---------------- " << std::endl;
    // global event weights per tree (see below for setting event-wise weights)
    Double_t regWeight  = 1.0;   
    factory->AddRegressionTree (tree, regWeight);

   
    std::cout << "-------------------- prepare ---------------- " << std::endl;
    TCut mycut = ""; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1";
    factory->PrepareTrainingAndTestTree( mycut, 
					 "nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents:!V" );


    /* // This would set individual event weights (the variables defined in the  */
    /* // expression need to exist in the original TTree) */
    /* factory->SetWeightExpression( "var1", "Regression" ); */


    if (true)
    {
	TString layoutString ("Layout=TANH|100,TANH|20,TANH|40,LINEAR");

	TString training0 ("LearningRate=1e-5,Momentum=0.5,Repetitions=1,ConvergenceSteps=500,BatchSize=50,TestRepetitions=7,WeightDecay=0.01,Regularization=NONE,DropConfig=0.5+0.5+0.5+0.5,DropRepetitions=2");
	TString training1 ("LearningRate=1e-5,Momentum=0.9,Repetitions=1,ConvergenceSteps=500,BatchSize=30,TestRepetitions=7,WeightDecay=0.01,Regularization=L2,DropConfig=0.1+0.1+0.1,DropRepetitions=1");
	TString training2 ("LearningRate=1e-4,Momentum=0.3,Repetitions=1,ConvergenceSteps=10,BatchSize=40,TestRepetitions=7,WeightDecay=0.1,Regularization=L2");
	TString training3 ("LearningRate=1e-5,Momentum=0.1,Repetitions=1,ConvergenceSteps=10,BatchSize=10,TestRepetitions=7,WeightDecay=0.001,Regularization=NONE");

	TString trainingStrategyString ("TrainingStrategy=");
	trainingStrategyString += training0 + "|" + training1 + "|" + training2 ; //+ "|" + training3;

       
	//       TString trainingStrategyString ("TrainingStrategy=LearningRate=1e-1,Momentum=0.3,Repetitions=3,ConvergenceSteps=20,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,L1=false,DropFraction=0.0,DropRepetitions=5");

	TString nnOptions ("!H:V:ErrorStrategy=SUMOFSQUARES:VarTransform=N:WeightInitialization=XAVIERUNIFORM");
	//       TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CHECKGRADIENTS");
	nnOptions.Append (":"); nnOptions.Append (layoutString);
	nnOptions.Append (":"); nnOptions.Append (trainingStrategyString);

	factory->BookMethod( TMVA::Types::kNN, TString("NN_")+tmstmp, nnOptions ); // NN
    }


   
    // --------------------------------------------------------------------------------------------------
    factory->TrainAllMethods();
    factory->TestAllMethods();
    factory->EvaluateAllMethods();

    outputFile->Close();

//    TMVA::TMVARegGui (outfileName);
   
    delete factory;
    return TString("NN_")+tmstmp;
}