Exemplo n.º 1
0
void process(const std::vector<std::string>& inputFiles, const std::string& name, const std::string& outputFile) {
  TChain* signal = loadChain(inputFiles, "signal");
  TChain* background = loadChain(inputFiles, "background");

  TFile* output = TFile::Open(outputFile.c_str(), "recreate");

  TMVA::Factory* factory = new TMVA::Factory(name.c_str(), output, "V");
  factory->AddSignalTree(signal, 1.);
  factory->AddBackgroundTree(background, 1.);

  //{
    //factory->AddVariable("lightJet1p2_Pt");
    //factory->AddVariable("leptonic_B_Pt");
    //factory->AddVariable("leptonic_Top_Pt");
    //factory->AddVariable("leptonic_Top_M");
    //factory->AddVariable("hadronic_B_Pt");
    //factory->AddVariable("hadronic_W_M");
    //factory->AddVariable("hadronic_Top_Pt");
    //factory->AddVariable("hadronic_Top_M");
    //factory->AddVariable("delta_R_tops");
    //factory->AddVariable("delta_R_lightjets");
    //factory->AddVariable("leptonic_B_CSV");
    //factory->AddVariable("hadronic_B_CSV");
  //}


  // chi^2 style
  {
    factory->AddVariable("leptonic_Top_M");
    factory->AddVariable("hadronic_W_M");
    factory->AddVariable("hadronic_Top_M");
    factory->AddVariable("ht_fraction");
  }

  factory->SetWeightExpression("weight");

  factory->PrepareTrainingAndTestTree("", "", "V:VerboseLevel=Info:nTrain_Signal=100000:nTrain_Background=100000:nTest_Signal=100000:nTest_Background=100000");

  factory->BookMethod(TMVA::Types::kBDT, "BDT", "V:BoostType=AdaBoost:nCuts=20:VarTransform=D");
  factory->BookMethod(TMVA::Types::kMLP, "NN", "V:VarTransform=D");
  //factory->BookMethod(TMVA::Types::kPDERS, "PDERS", "V");

  factory->TrainAllMethods();
  factory->TestAllMethods();
  factory->EvaluateAllMethods();

  output->Close();
  delete output;

  delete signal;
  delete background;
}
Exemplo n.º 2
0
void test_train(TString signalName = "WW",
		TString bkgName = "DY")
{
  TFile *outFile = new TFile("myAnalysisFile.root","RECREATE");
  
  TMVA::Factory *factory = new TMVA::Factory(signalName, outFile,"");
  
  TString directory = "../rootFiles/SF/MediumIDTighterIP/";
  //signalName = directory + signalName;
  
  //defining WW signal
  TFile *MySignalFile = new TFile("../rootFiles/SF/MediumIDTighterIP/WW.root","READ");
  TTree* sigTree = (TTree*)MySignalFile->Get("nt");
  factory->AddSignalTree(sigTree,1);
  
  //defining DY background
  TFile *MyBkgFile = new TFile("../rootFiles/SF/MediumIDTighterIP/DY.root","READ");
  TTree* bkgTree = (TTree*)MyBkgFile->Get("nt");
  factory->AddBackgroundTree(bkgTree,1);

  factory->SetWeightExpression("baseW");

  //************************************ FACTORY  
  
  factory->AddVariable("fullpmet");
  factory->AddVariable("trkpmet");
  factory->AddVariable("ratioMet");
  factory->AddVariable("ptll");
  factory->AddVariable("mth");
  factory->AddVariable("jetpt1");
  factory->AddVariable("ptWW");
  factory->AddVariable("dphilljet");
  factory->AddVariable("dphillmet");
  factory->AddVariable("dphijet1met");
  factory->AddVariable("nvtx");

  factory->PrepareTrainingAndTestTree("",500,500,500,500);
  cout<<"I've prepared trees"<<endl;
  //factory->BookMethod(TMVA::Types::kFisher, "Fisher","");
  factory->BookMethod(TMVA::Types::kBDT, "BDT","");
  
  cout<<"I've booked method"<<endl;
  factory->TrainAllMethods();
  factory->TestAllMethods();
  cout<<"I've tested all methods"<<endl;
  factory->EvaluateAllMethods();
  cout<<"I've evaluated all methods"<<endl;
  
}
Exemplo n.º 3
0
//------------------------------------------------------------------------------
// MVATrain
//------------------------------------------------------------------------------
void MVATrain(TString signal)
{
  TFile* outputfile = TFile::Open(trainingdir + signal + ".root", "recreate");


  // Factory
  //----------------------------------------------------------------------------
  TMVA::Factory* factory = new TMVA::Factory(signal, outputfile,    
					     "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification");


  // Get the trees
  //----------------------------------------------------------------------------
  _mctree.clear();

  AddProcess("signal", signal);
  AddProcess("background", "HZJ_HToWW_M125");
  AddProcess("background", "ggZH_HToWW_M125");

  //  AddProcess("background", "14_HZ");
  //  AddProcess("background", "10_HWW");
  //  AddProcess("background", "06_WW");
  //  AddProcess("background", "02_WZTo3LNu");
  //  AddProcess("background", "03_ZZ");
  //  AddProcess("background", "11_Wg");
  //  AddProcess("background", "07_ZJets");
  //  AddProcess("background", "09_TTV");
  //  AddProcess("background", "05_ST");
  //  AddProcess("background", "00_Fakes");

  Double_t weight = 1.0;

  factory->AddSignalTree(_signaltree, weight);

  for (UInt_t i=0; i<_mctree.size(); i++) factory->AddBackgroundTree(_mctree[i], weight);
  
  factory->SetWeightExpression("eventW");


  // Add variables
  //----------------------------------------------------------------------------
  // Be careful with the order: it must be respected at the reading step
  // factory->AddVariable("<var1>+<var2>", "pretty title", "unit", 'F');

  //  factory->AddVariable("channel",        "", "", 'F');
  factory->AddVariable("metPfType1",     "", "", 'F');
  factory->AddVariable("m2l",            "", "", 'F');
  //  factory->AddVariable("njet",           "", "", 'F');
  //  factory->AddVariable("nbjet20cmvav2l", "", "", 'F');
  factory->AddVariable("lep1pt",         "", "", 'F');
  factory->AddVariable("lep2pt",         "", "", 'F');
  //  factory->AddVariable("jet1pt",         "", "", 'F');
  factory->AddVariable("jet2pt",         "", "", 'F');
  factory->AddVariable("mtw1",           "", "", 'F');
  factory->AddVariable("dphill",         "", "", 'F');
  factory->AddVariable("dphilep1jet1",   "", "", 'F');
  //  factory->AddVariable("dphilep1jet2",   "", "", 'F');
  //  factory->AddVariable("dphilmet1",      "", "", 'F');
  //  factory->AddVariable("dphilep2jet1",   "", "", 'F');
  //  factory->AddVariable("dphilep2jet2",   "", "", 'F');
  //  factory->AddVariable("dphilmet2",      "", "", 'F');
  //  factory->AddVariable("dphijj",         "", "", 'F');
  //  factory->AddVariable("dphijet1met",    "", "", 'F');
  //  factory->AddVariable("dphijet2met",    "", "", 'F');
  factory->AddVariable("dphillmet",      "", "", 'F');


  // Preselection cuts and preparation
  //----------------------------------------------------------------------------
  factory->PrepareTrainingAndTestTree("", ":nTrain_Signal=0:nTest_Signal=0:nTrain_Background=0:nTest_Background=0:SplitMode=Alternate:MixMode=Random:!V");


  // Book MVA
  //----------------------------------------------------------------------------
  factory->BookMethod(TMVA::Types::kMLP, "MLP",
		      "H:!V:NeuronType=sigmoid:VarTransform=N:NCycles=600:HiddenLayers=25,10:TestRate=5:!UseRegulator");


  // Train, test and evaluate MVA
  //----------------------------------------------------------------------------
  factory->TrainAllMethods();     // Train using the set of training events
  factory->TestAllMethods();      // Evaluate using the set of test events
  factory->EvaluateAllMethods();  // Evaluate and compare performance


  // Save the output
  //----------------------------------------------------------------------------
  outputfile->Close();

  delete factory;
}
Exemplo n.º 4
0
void TMVAClassification( TString fname = "./tmva_class_example.root")
{
   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //
   // if you like to use a method via the plugin mechanism, we recommend using
   //
   // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)
   // (an example is given for using the BDT as plugin (see below),
   // but of course the real application is when you write your own
   // method based)

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // to get access to the GUI and all tmva macros
   TString tmva_dir(TString(gRootDir) + "/tmva");
   if(gSystem->Getenv("TMVASYS"))
      tmva_dir = TString(gSystem->Getenv("TMVASYS"));
   gROOT->SetMacroPath(tmva_dir + "/test/:" + gROOT->GetMacroPath() );
   gROOT->ProcessLine(".L TMVAGui.C");

   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   
   Use["KNN"]             = 1; // k-nearest neighbour method
   
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;

  

   // --------------------------------------------------------------------------------------------------

   // --- Here the preparation phase begins

   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   TString outfileName( "TMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory is 
   // the only TMVA object you have to interact with
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification" );

   // If you wish to modify default settings
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
   factory->AddVariable( "pt_eH", 'D' );
   factory->AddVariable( "max(pt_jet_eH,pt_eH)", 'D' );
   factory->AddVariable( "njets", 'I' );
   
   // You can add so-called "Spectator variables", which are not used in the MVA training,
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
   // input variables, the response values of all trained MVAs, and the spectator variables
  /// factory->AddSpectator( "spec1 := var1*2",  "Spectator 1", "units", 'F' );
  /// factory->AddSpectator( "spec2 := var1*3",  "Spectator 2", "units", 'F' );

   // Read training and test data
   // (it is also possible to use ASCII format as input -> see TMVA Users Guide)
  
   TFile *input(0);
   if (gSystem->AccessPathName( fname )){  // file does not exist in local directory
      gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root");
	  fname = "./tmva_class_example.root";
  }else{
	input= TFile::Open( fname );
  }
  if (!input) {
     std::cout << "ERROR: could not open data file " << fname << std::endl;
     exit(1);
  }
   
   std::cout << "--- TMVAClassification       : Using input file: " << input->GetName() << std::endl;
   
   // --- Register the training and test trees

   TTree *inputTree     = (TTree*)input->Get("FakeTreeSig");
   TTree *background = (TTree*)input->Get("FakeTreeBG");
   
   // global event weights per tree (see below for setting event-wise weights)
   Double_t signalWeight     = 1.0;
   Double_t backgroundWeight = 1.0;
   
   // cuts for signal and background
   
   //~ TCut signalCut = "selected==1 && id_iso_eleH==1";
   //~ TCut backgroundCut = "selected==1 && id_iso_eleH==0";
   //~ 
   //~ std::cout << " THe signal cut is " << signalCut.GetTitle() << " bg cut is " << backgroundCut.GetTitle() << std::endl;
   
   Int_t num_pass    = inputTree->GetEntries();
   Int_t num_fail    = background->GetEntries();
   
   std::cout << num_pass << " " << num_fail << std::endl;
   
   // You can add an arbitrary number of signal or background trees
   
   factory->AddSignalTree    ( inputTree,     1.0     );
   factory->AddBackgroundTree( background, 1.0 );
 factory->SetWeightExpression( "weight" );

 //factory->SetInputTrees( inputTree, signalCut, backgroundCut );
   
   // To give different trees for training and testing, do as follows:
   //    factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" );
   //    factory->AddSignalTree( signalTestTree,     signalTestWeight,  "Test" );
   
   // Use the following code instead of the above two or four lines to add signal and background
   // training and test events "by hand"
   // NOTE that in this case one should not give expressions (such as "var1+var2") in the input
   //      variable definition, but simply compute the expression before adding the event
   //
   //     // --- begin ----------------------------------------------------------
   //     std::vector<Double_t> vars( 4 ); // vector has size of number of input variables
   //     Float_t  treevars[4], weight;
   //     
   //     // Signal
   //     for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
   //     for (UInt_t i=0; i<signal->GetEntries(); i++) {
   //        signal->GetEntry(i);
   //        for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
   //        // add training and test events; here: first half is training, second is testing
   //        // note that the weight can also be event-wise
   //        if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight );
   //        else                              factory->AddSignalTestEvent    ( vars, signalWeight );
   //     }
   //   
   //     // Background (has event weights)
   //     background->SetBranchAddress( "weight", &weight );
   //     for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
   //     for (UInt_t i=0; i<background->GetEntries(); i++) {
   //        background->GetEntry(i);
   //        for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
   //        // add training and test events; here: first half is training, second is testing
   //        // note that the weight can also be event-wise
   //        if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight );
   //        else                                factory->AddBackgroundTestEvent    ( vars, backgroundWeight*weight );
   //     }
         // --- end ------------------------------------------------------------
   //
   // --- end of tree registration 

   // Set individual event weights (the variables must exist in the original TTree)
   //    for signal    : factory->SetSignalWeightExpression    ("weight1*weight2");
   //    for background: factory->SetBackgroundWeightExpression("weight1*weight2");
   
   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = "selected==1"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   TCut mycutb = "selected==1"; // for example: TCut mycutb = "abs(var1)<0.5";

   // Tell the factory how to use the training and testing events
   //
   // If no numbers of events are given, half of the events in the tree are used 
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut,
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );
  
  std::stringstream indexes;  
  indexes.str("");
  indexes << "nTrain_Signal="  << num_pass << ":nTrain_Background=" << num_fail << ":SplitMode=Random:NormMode=None:!V"; 
  std::string input_opt=indexes.str();
  
  std::cout << "Options are " << input_opt << std::endl;
  factory->PrepareTrainingAndTestTree( mycuts, mycutb, input_opt);
                                        //"nTrain_Signal="+num_pass+":nTrain_Background="+num_fail+":SplitMode=Random:NormMode=None:!V" );

   // ---- Book MVA methods
   //
   // Please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
  
   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN",
                           "H:nkNN=50:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   
   // For an example of the category classifier usage, see: TMVAClassificationCategory

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

   // factory->OptimizeAllMethods("SigEffAt001","Scan");
   // factory->OptimizeAllMethods("ROCIntegral","GA");

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   //factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
  // factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
Exemplo n.º 5
0
void classifyBDT(TString inputVariables = "trainingVars.txt",
                 TString signalName = "/mnt/hscratch/dabercro/skims2/BDT_Signal.root",
                 TString backName = "/mnt/hscratch/dabercro/skims2/BDT_Background.root") {
  TMVA::Tools::Instance();
  std::cout << "==> Start TMVAClassification" << std::endl;

   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   TString outfileName( "TMVA/TMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationCategory", outputFile,
					       "!V:!Silent:Color:DrawProgressBar:Transformations=I;N" );

   // A very simple MVA (feel free to uncomment and comment what you like) => as a rule of thumb 10-20 variables is where people start to get worried about total number

   ifstream configFile;
   configFile.open(inputVariables.Data());
   TString tempFormula;

   configFile >> tempFormula;  // Is the name of the BDT
   while(!configFile.eof()){
     configFile >> tempFormula;
     if(tempFormula != ""){
       factory->AddVariable(tempFormula,'F');
     }
   }

   TString lVars;

   // TCut lCut   = "jet1qg2<2.&&jet1pt>250.&&jet1pullAngle>-5.";// < 10 && jet1mass_m2 > 60 && jet1mass_m2 < 120";
   // TCut lCut = "passZ > 3  && fjet1pt > 250 && fjet1MassPruned < 120 && fatjetid < 2";
   TCut lCut   = "abs(fjet1PartonId)!=24&&abs(fjet1PartonId)!=23";
   // std::string lEventCut = "event % 2 == 1";
   // lCut += lEventCut.c_str();

   // TCut lSCut = "passT > 0   && fjet1pt > 250 && fjet1MassPruned < 120 && abs(fjet1PartonId) == 24&& fatjetid < 2";
   TCut lSCut   = "abs(fjet1PartonId)==24||abs(fjet1PartonId)==23";
   // lSCut += lEventCut.c_str();

   TCut cleanCut = "fjet1QGtagSub2 > -10 && fjet1PullAngle > -4 && abs(fjet1pt/fjet1MassTrimmed)<200 && abs(fjet1pt/fjet1MassPruned)<200";

   TFile *lSAInput = TFile::Open(signalName);
   TTree   *lSASignal    = (TTree*)lSAInput    ->Get("DMSTree"); 
   TFile *lSBInput = TFile::Open(backName);
   TTree   *lSBSignal    = (TTree*)lSBInput    ->Get("DMSTree"); 
   
   Double_t lSWeight = 1.0;
   Double_t lBWeight = 1.0;
   gROOT->cd( outfileName+TString(":/") );   
   factory->AddSignalTree    ( lSASignal, lSWeight );
   
   gROOT->cd( outfileName+TString(":/") );   
   factory->AddBackgroundTree( lSBSignal, lBWeight );
   
   factory->SetWeightExpression("weight");
   std::stringstream pSignal,pBackground;
   pSignal << "nTrain_Signal="<< lSASignal->GetEntries() << ":nTrain_Background=" << lSBSignal->GetEntries();
   // factory->PrepareTrainingAndTestTree( lSCut, lCut,(pSignal.str()+":SplitMode=Block:NormMode=NumEvents:!V").c_str() );
   factory->PrepareTrainingAndTestTree(lSCut&&cleanCut,lCut&&cleanCut,"nTrain_Signal=0:nTrain_Background=0:SplitMode=Alternate:NormMode=NumEvents:!V");
   std::string lName = "alpha_VBF";
   TString lBDTDef   = "!H:!V:NTrees=400:BoostType=Grad:Shrinkage=0.1:UseBaggedGrad=F:nCuts=2000:NNodesMax=10000:MaxDepth=5:UseYesNoLeaf=F:nEventsMin=200";
//    TString lBDTDef   = "!H:!V:NTrees=400:BoostType=Grad:Shrinkage=0.1:UseBaggedGrad=F:nCuts=2000:MaxDepth=5:UseYesNoLeaf=F:MinNodeSize=0.086:NegWeightTreatment=IgnoreNegWeightsInTraining";
   factory->BookMethod(TMVA::Types::kBDT,"BDT_simple_alpha",lBDTDef);   
   factory->TrainAllMethods();
   factory->TestAllMethods();
   factory->EvaluateAllMethods();
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;
   delete factory;
   //if (!gROOT->IsBatch()) TMVAGui( outfileName );
   //TString lBDTDef   = "!H:!V:NTrees=100:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad=F:nCuts=2000:NNodesMax=10000:MaxDepth=3:SeparationType=GiniIndex";
}
Exemplo n.º 6
0
void TMVARegression( TString myMethodList = "" ) 
{
   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the 
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVARegression.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   // --- Mutidimensional likelihood and Nearest-Neighbour methods
   Use["PDERS"]           = 0;
   Use["PDEFoam"]         = 1; 
   Use["KNN"]             = 1;
   // 
   // --- Linear Discriminant Analysis
   Use["LD"]		        = 1;
   // 
   // --- Function Discriminant analysis
   Use["FDA_GA"]          = 1;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   // 
   // --- Neural Network
   Use["MLP"]             = 1; 
   // 
   // --- Support Vector Machine 
   Use["SVM"]             = 0;
   // 
   // --- Boosted Decision Trees
   Use["BDT"]             = 0;
   Use["BDTG"]            = 1;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVARegression" << std::endl;

   // Select methods (don't look at this code - not of interest)
   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 1;
      }
   }

   // --------------------------------------------------------------------------------------------------

   // --- Here the preparation phase begins

   // Create a new root output file
   TString outfileName( "TMVAReg.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory will
   // then run the performance analysis for you.
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/ 
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in 
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVARegression", outputFile, 
                                               "!V:!Silent:Color:DrawProgressBar" );

   // If you wish to modify default settings 
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
   factory->AddVariable( "var1", "Variable 1", "units", 'F' );
   factory->AddVariable( "var2", "Variable 2", "units", 'F' );

   // You can add so-called "Spectator variables", which are not used in the MVA training, 
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
   // input variables, the response values of all trained MVAs, and the spectator variables
   factory->AddSpectator( "spec1:=var1*2",  "Spectator 1", "units", 'F' );
   factory->AddSpectator( "spec2:=var1*3",  "Spectator 2", "units", 'F' );

   // Add the variable carrying the regression target
   factory->AddTarget( "fvalue" ); 

   // It is also possible to declare additional targets for multi-dimensional regression, ie:
   // -- factory->AddTarget( "fvalue2" );
   // BUT: this is currently ONLY implemented for MLP

   // Read training and test data (see TMVAClassification for reading ASCII files)
   // load the signal and background event samples from ROOT trees
   TFile *input(0);
   TString fname = "./tmva_reg_example.root";
   if (!gSystem->AccessPathName( fname )) 
      input = TFile::Open( fname ); // check if file in local directory exists
   else 
      input = TFile::Open( "http://root.cern.ch/files/tmva_reg_example.root" ); // if not: download from ROOT server
   
   if (!input) {
      std::cout << "ERROR: could not open data file" << std::endl;
      exit(1);
   }
   std::cout << "--- TMVARegression           : Using input file: " << input->GetName() << std::endl;

   // --- Register the regression tree

   TTree *regTree = (TTree*)input->Get("TreeR");

   // global event weights per tree (see below for setting event-wise weights)
   Double_t regWeight  = 1.0;   

   // You can add an arbitrary number of regression trees
   factory->AddRegressionTree( regTree, regWeight );

   // This would set individual event weights (the variables defined in the 
   // expression need to exist in the original TTree)
   factory->SetWeightExpression( "var1", "Regression" );

   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycut = ""; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1";

   // tell the factory to use all remaining events in the trees after training for testing:
   factory->PrepareTrainingAndTestTree( mycut, 
                                        "nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents:!V" );

   // If no numbers of events are given, half of the events in the tree are used 
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );  

   // ---- Book MVA methods
   //
   // please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // PDE - RS method
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS", 
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" );
   // And the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );   
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );   

   if (Use["PDEFoam"])
       factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", 
			    "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN", 
                           "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // Linear discriminant
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", 
                           "!H:!V:VarTransform=None" );

	// Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"]) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                          "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" );
   
   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" );

   if (Use["FDA_MT"]) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"]) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   // Neural network (MLP)
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" );

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDT"])
     factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=100:MinNodeSize=1.0%:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" );

   if (Use["BDTG"])
     factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                           "!H:!V:NTrees=1000::BoostType=Grad:Shrinkage=0.1:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=3:MaxDepth=4" );
   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();    

   // --------------------------------------------------------------
   
   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVARegression is done!" << std::endl;      

   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVARegGui( outfileName );
}
Exemplo n.º 7
0
void TMVAClassificationElecTau(std::string ordering_ = "Pt", std::string bkg_ = "qqH115vsWZttQCD") {

    TMVA::Tools::Instance();

    TString outfileName( "TMVAElecTau"+ordering_+"Ord_"+bkg_+".root" );
    TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

    TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationElecTau"+ordering_+"Ord_"+bkg_, outputFile,
            "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" );
    factory->AddVariable( "pt1", "pT-tag1", "GeV/c"         , 'F'  );
    factory->AddVariable( "pt2", "pT-tag2", "GeV/c"         , 'F'  );
    factory->AddVariable( "Deta","|y-tag1 - y-tag2|",""     , 'F'  );
    //factory->AddVariable( "opposite:=abs(eta1*eta2)/eta1/eta2","sign1*sign2",""             , 'F'  );
    //factory->AddVariable( "Dphi", "#Delta#phi" ,""             , 'F'  );
    factory->AddVariable( "Mjj", "M(tag1,tag2)", "GeV/c^{2}"  , 'F'  );

    factory->AddSpectator( "eta1",  "#eta_{tag1}" , 'F' );
    factory->AddSpectator( "eta2",  "#eta_{tag2}" , 'F' );

    factory->SetWeightExpression( "sampleWeight" );

    TString fSignalName              = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleVBFH115-powheg-PUS1_Open_ElecTauStream.root";
    TString fBackgroundNameDYJets    = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleZjets-alpgen-PUS1_Open_ElecTauStream.root";
    TString fBackgroundNameWJets     = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleWJets-madgraph-PUS1_Open_ElecTauStream.root";
    TString fBackgroundNameQCD       = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleQCD_Open_ElecTauStream.root";
    TString fBackgroundNameTTbar     = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleTTJets-madgraph-PUS1_Open_ElecTauStream.root";


    TFile *fSignal(0);
    TFile *fBackgroundDYJets(0);
    TFile *fBackgroundWJets(0);
    TFile *fBackgroundQCD(0);
    TFile *fBackgroundTTbar(0);

    fSignal           = TFile::Open( fSignalName );
    fBackgroundDYJets = TFile::Open( fBackgroundNameDYJets );
    fBackgroundWJets  = TFile::Open( fBackgroundNameWJets );
    fBackgroundQCD    = TFile::Open( fBackgroundNameQCD );
    fBackgroundTTbar  = TFile::Open( fBackgroundNameTTbar );

    if(!fSignal || !fBackgroundDYJets || !fBackgroundWJets || !fBackgroundQCD || !fBackgroundTTbar) {
        std::cout << "ERROR: could not open files" << std::endl;
        exit(1);
    }

    TString tree = "outTree"+ordering_+"Ord";

    TCut mycuts = "";
    TCut mycutb = "";

    TCut cutA  = "pt1>0 && tightestHPSWP>0";
    TCut cutB  = "pt1>0 && combRelIsoLeg1<0.1";
    TCut cutBl = "pt1>0 && combRelIsoLeg1<0.3";
    TCut cutC  = "pt1>0 && diTauCharge==0";
    TCut cutD  = "pt1>0 && MtLeg1<40";

    // select events for training
    TFile* dummy = new TFile("dummy.root","RECREATE");
    TH1F* allEvents = new TH1F("allEvents","",1,-10,10);
    float totalEvents, cutEvents;

    // signal: all
    TTree *signal           = ((TTree*)(fSignal->Get(tree)))->CopyTree(cutA&&cutB&&cutC&&cutD);
    cout << "Copied signal tree with full selection: " << ((TTree*)(fSignal->Get(tree)))->GetEntries() << " --> "  << signal->GetEntries()  << endl;
    allEvents->Reset();
    signal->Draw("eta1>>allEvents","sampleWeight");
    cutEvents  = allEvents->Integral();
    Double_t signalWeight =   1.0;
    cout << "Signal: expected yield " << cutEvents << " -- weight " << signalWeight << endl;

    // Z+jets: all
    TTree *backgroundDYJets = ((TTree*)(fBackgroundDYJets->Get(tree)))->CopyTree(cutA&&cutB&&cutC&&cutD);
    cout << "Copied DYJets tree with full selection: " << ((TTree*)(fBackgroundDYJets->Get(tree)))->GetEntries() << " --> "  << backgroundDYJets->GetEntries()  << endl;
    allEvents->Reset();
    backgroundDYJets->Draw("eta1>>allEvents","sampleWeight");
    cutEvents  = allEvents->Integral();
    Double_t backgroundDYJetsWeight = 1.0;
    cout << "ZJets: expected yield " << cutEvents << " -- weight " << backgroundDYJetsWeight << endl;

    // W+jets: iso+Mt
    TTree *backgroundWJets  = ((TTree*)(fBackgroundWJets->Get(tree)))->CopyTree(cutB&&cutD);
    cout << "Copied WJets tree with iso+Mt selection: " << ((TTree*)(fBackgroundWJets->Get(tree)))->GetEntries() << " --> "  << backgroundWJets->GetEntries()  << endl;
    allEvents->Reset();
    backgroundWJets->Draw("eta1>>allEvents","sampleWeight");
    totalEvents  = allEvents->Integral();
    allEvents->Reset();
    backgroundWJets->Draw("eta1>>allEvents","sampleWeight*(tightestHPSWP>0 && diTauCharge==0)");
    cutEvents  = allEvents->Integral();
    Double_t backgroundWJetsWeight  =  cutEvents / totalEvents;
    cout << "WJets: expected yield " << cutEvents  << " -- weight " << backgroundWJetsWeight << endl;

    // QCD: Mt+loose iso
    TTree *backgroundQCD    = ((TTree*)(fBackgroundQCD->Get(tree)))->CopyTree(cutD&&cutBl);
    cout << "Copied QCD tree with Mt selection: " << ((TTree*)(fBackgroundQCD->Get(tree)))->GetEntries() << " --> "  << backgroundQCD->GetEntries()  << endl;
    allEvents->Reset();
    backgroundQCD->Draw("eta1>>allEvents","sampleWeight");
    totalEvents  = allEvents->Integral();
    allEvents->Reset();
    backgroundQCD->Draw("eta1>>allEvents","sampleWeight*(tightestHPSWP>0 && diTauCharge==0 && combRelIsoLeg1<0.1)");
    cutEvents  = allEvents->Integral();
    Double_t backgroundQCDWeight  =  cutEvents / totalEvents;
    cout << "QCD: expected yield " << cutEvents  << " -- weight "  << backgroundQCDWeight << endl;


    // TTbar: iso+Mt
    TTree *backgroundTTbar  = ((TTree*)(fBackgroundTTbar->Get(tree)))->CopyTree(cutB&&cutD);
    cout << "Copied TTbar tree with iso+Mt selection: " << ((TTree*)(fBackgroundTTbar->Get(tree)))->GetEntries() << " --> "  << backgroundTTbar->GetEntries()  << endl;
    allEvents->Reset();
    backgroundTTbar->Draw("eta1>>allEvents","sampleWeight");
    totalEvents  = allEvents->Integral();
    allEvents->Reset();
    backgroundTTbar->Draw("eta1>>allEvents","sampleWeight*(tightestHPSWP>0 && diTauCharge==0)");
    cutEvents  = allEvents->Integral();
    Double_t backgroundTTbarWeight  =  cutEvents / totalEvents;
    cout << "TTbar: expected yield "  << cutEvents  << " -- weight " << backgroundTTbarWeight << endl;


    delete allEvents;


    factory->AddSignalTree    ( signal,           signalWeight           );
    //factory->AddBackgroundTree( backgroundDYJets, backgroundDYJetsWeight );
    //factory->AddBackgroundTree( backgroundWJets,  backgroundWJetsWeight  );
    factory->AddBackgroundTree( backgroundQCD,    backgroundQCDWeight    );
    //factory->AddBackgroundTree( backgroundTTbar,  backgroundTTbarWeight  );


    factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                         "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=1:nTest_Background=1:SplitMode=Random:NormMode=NumEvents:!V" );

    factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                         "!H:!V:FitMethod=GA:EffSel:CutRangeMin[0]=25.:CutRangeMax[0]=999:CutRangeMin[1]=25.:CutRangeMax[1]=999.:CutRangeMin[2]=1.0:CutRangeMax[2]=9.:CutRangeMin[3]=100:CutRangeMax[3]=7000:VarProp=FSmart" );

    /*
    factory->BookMethod( TMVA::Types::kBDT, "BDT",
    	       "!H:!V:NTrees=200:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );
    */

    factory->TrainAllMethods();

    factory->TestAllMethods();

    factory->EvaluateAllMethods();

    outputFile->Close();

    std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
    std::cout << "==> TMVAClassification is done!" << std::endl;

    delete factory;

    //if (!gROOT->IsBatch()) TMVAGui( outfileName );

}
Exemplo n.º 8
0
void TMVAClassificationHwwNtuple( TString myMethodList = "" )
{
   // This loads the library
   TMVA::Tools::Instance();

    gROOT->ProcessLine(".L TMVAGui.C");


   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   // --- Cut optimisation
   Use["Cuts"]            = 1;
   Use["CutsD"]           = 0;
   Use["CutsPCA"]         = 0;
   Use["CutsGA"]          = 0;
   Use["CutsSA"]          = 0;
   // 
   Use["BDT"]             = 1; // uses Adaptive Boost
   Use["BDTG"]            = 0; // uses Gradient Boost
   Use["BDTB"]            = 0; // uses Bagging
   Use["BDTD"]            = 0; // decorrelation + Adaptive Boost
   Use["BDTF"]            = 0; // allow usage of fisher discriminant for node splitting 
   // 
   // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
   Use["RuleFit"]         = 0;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;
   // Select methods (don't look at this code - not of interest)
   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 1;
	 cout<<regMethod<<" is on"<<endl;
      }
   }
   // -------------------------------------------------------------------------

   // --- Here the preparation phase begins

   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   TString outfileName( "TMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // For one variable
   //TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
   //                                            "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification" );
   // For Multiple Variables
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );
   //factory->AddVariable( "pt1",                "LeadLepton pt", "", 'F' );
   //factory->AddVariable( "pt2",                "TailLepton pt", "", 'F' );
   factory->AddVariable( "pfmet",                "MissingEt", "", 'F' );
   factory->AddVariable( "mpmet",              "Minimum Proj. Met", "", 'F' );
   factory->AddVariable( "dphill",             "DeltPhiOfLepLep", "", 'F' );
   //factory->AddVariable( "mll",                "DiLepton Mass", "", 'F' );
   factory->AddVariable( "ptll",               "DiLepton pt", "", 'F' );
   //
   // You can add so-called "Spectator variables", which are not used in the MVA training,
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
   // input variables, the response values of all trained MVAs, and the spectator variables
   //factory->AddSpectator( "spec1 := var1*2",  "Spectator 1", "units", 'F' );
   //factory->AddSpectator( "spec2 := var1*3",  "Spectator 2", "units", 'F' );
   //
   //factory->AddSpectator( "mWW",                "Higgs Mass", "", 'F' );
   factory->AddSpectator( "pt1",                "LeadLepton pt", "", 'F' );
   factory->AddSpectator( "pt2",                "TailLepton pt", "", 'F' );
   factory->AddSpectator( "pfmet",                "MissingEt", "", 'F' );
   factory->AddSpectator( "mpmet",              "Minimum Proj. Met", "", 'F' );
   factory->AddSpectator( "dphill",             "DeltPhiOfLepLep", "", 'F' );
   factory->AddSpectator( "mll",                "DiLepton Mass", "", 'F' );
   factory->AddSpectator( "ptll",               "DiLepton pt", "", 'F' );
   // Read training and test data
   // (it is also possible to use ASCII format as input -> see TMVA Users Guide)
   //TString fname = "./tmva_class_example.root";
   //TString fname = "/afs/cern.ch/work/s/salee/private/HWWwidth/HWW/GGVvAnalyzer/MkNtuple/Hw1Int8TeV/MkNtuple.root";
   //TString fname = "/terranova_0/HWWwidth/HWW/GGVvAnalyzer/MkNtuple/Hw1Int8TeV/MkNtuple.root";
   
   //if (gSystem->AccessPathName( fname ))  // file does not exist in local directory
    // exit(-1);
      //gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root");
   
   //TFile *input = TFile::Open( fname );
   //TFile *SB_OnPeak = TFile::Open("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntOnPeak_8TeV.root");
   //TTree *SB_OnPeak_Tree = (TTree*)SB_OnPeak->Get("latino");
   
   TChain *S_Chain = new TChain("latino");
   TChain *C_Chain = new TChain("latino");
   TChain *SCI_Chain = new TChain("latino");
   TChain *qqWW_Chain = new TChain("latino");

   S_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_SigOnPeak_8TeV.root");
   S_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_SigShoulder_8TeV.root");
   S_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_SigTail_8TeV.root");
   SCI_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntOnPeak_8TeV.root");
   SCI_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntShoulder_8TeV.root");
   SCI_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntTail_8TeV.root");
   C_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw25_CotHead_8TeV.root");
   C_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw25_CotTail_8TeV.root");

   qqWW_Chain->Add("/afs/cern.ch/user/m/maiko/work/public/Tree/tree_skim_wwmin/nominals/latino_000_WWJets2LMad.root");
   
   // --- Register the training and test trees

   // You can add an arbitrary number of signal or background trees
   factory->AddSignalTree    ( S_Chain  );
   factory->AddBackgroundTree( qqWW_Chain );
   factory->AddBackgroundTree( C_Chain );
   // Classification training and test data in ROOT tree format with signal and background events being located in the same tree
   //factory->SetInputTrees(SCI_Chain, GenOffCut, GenOnCut);
   
   // To give different trees for training and testing, do as follows:
   //    factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" );
   //    factory->AddSignalTree( signalTestTree,     signalTestWeight,  "Test" );
   
   factory->SetWeightExpression          ("2.1*puW*baseW*effW*triggW*19.468");
   //factory->SetSignalWeightExpression    ("2.1*puW*baseW*effW*triggW*19.468");
   //factory->SetBackgroundWeightExpression("puW*baseW*effW*triggW*19.468");

   //factory->PrepareTrainingAndTestTree( ChanCommOff,
   //                                     "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=None:!V" );
                                        //"nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V";
   factory->PrepareTrainingAndTestTree( ChanCommOff0J,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=None:!V" );
   // ---- Book MVA methods
   //
   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:V:NTrees=850:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );
                           //"!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );

   // For an example of the category classifier usage, see: TMVAClassificationCategory

   // -----------------------------------------------------------------------------------------

   // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

   // ---- STILL EXPERIMENTAL and only implemented for BDT's ! 
   // factory->OptimizeAllMethods("SigEffAt001","Scan");
   // factory->OptimizeAllMethods("ROCIntegral","FitGA");

   // -----------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
   //if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
Exemplo n.º 9
0
//------------------------------------------------------------------------------
// MVATrain
//------------------------------------------------------------------------------
void MVATrain(float metPfType1_cut, float mt2ll_cut, TString signal)
{
  TFile* outputfile = TFile::Open(trainingdir + signal + ".root", "recreate");


  // Factory
  //----------------------------------------------------------------------------
  TMVA::Factory* factory = new TMVA::Factory(signal, outputfile,    
					    // "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification");
						"!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification");


  // Get the trees
  //----------------------------------------------------------------------------
  _mctree.clear();

  AddProcess("signal"    , signal);//"01_Data_reduced_1outof6"); //signal
  AddProcess("background", "04_TTTo2L2Nu");
  
  /*AddProcess("background", "14_HZ");
  AddProcess("background", "10_HWW");
  AddProcess("background", "06_WW");
  AddProcess("background", "02_WZTo3LNu");
  AddProcess("background", "03_VZ");
  AddProcess("background", "11_Wg");
  AddProcess("background", "07_ZJets");
  AddProcess("background", "09_TTV");
  AddProcess("background", "05_ST");
  AddProcess("background", "00_Fakes_reduced_1outof6");*/


  Double_t weight = 1.0;

  factory->AddSignalTree(_signaltree, weight);

  for (UInt_t i=0; i<_mctree.size(); i++) factory->AddBackgroundTree(_mctree[i], weight);

  factory->SetWeightExpression("eventW");


  // Add variables
  //----------------------------------------------------------------------------
  // Be careful with the order: it must be respected at the reading step
  // factory->AddVariable("<var1>+<var2>", "pretty title", "unit", 'F');

	factory->AddVariable("newdarkpt"       , "", "", 'F');
	//factory->AddVariable("topRecoW"     , "", "", 'F');
	//factory->AddVariable("lep1pt"       , "", "", 'F');
	//factory->AddVariable("lep1eta"      , "", "", 'F');
	//factory->AddVariable("lep1phi"      , "", "", 'F'); 
	//factory->AddVariable("lep1mass"     , "", "", 'F');
	//factory->AddVariable("lep2pt"       , "", "", 'F'); 
	//factory->AddVariable("lep2eta"      , "", "", 'F');
	//factory->AddVariable("lep2phi"      , "", "", 'F'); 
	//factory->AddVariable("lep2mass"     , "", "", 'F');
	//factory->AddVariable("jet1pt "      , "", "", 'F');
	//factory->AddVariable("jet1eta"      , "", "", 'F');
	//factory->AddVariable("jet1phi"      , "", "", 'F');
	//factory->AddVariable("jet1mass"     , "", "", 'F');
	//factory->AddVariable("jet2pt"       , "", "", 'F');
	//factory->AddVariable("jet2eta"      , "", "", 'F');
	//factory->AddVariable("jet2phi"      , "", "", 'F');
	//factory->AddVariable("jet2mass"     , "", "", 'F');
	factory->AddVariable("metPfType1"   , "", "", 'F');
	//factory->AddVariable("metPfType1Phi", "", "", 'F');
	//factory->AddVariable("m2l"          , "", "", 'F');
	factory->AddVariable("mt2ll"        , "", "", 'F');
	//factory->AddVariable("mt2lblb"      , "", "", 'F');
	//factory->AddVariable("mtw1"         , "", "", 'F');
	//factory->AddVariable("mtw2"         , "", "", 'F');
	//factory->AddVariable("ht"           , "", "", 'F');
	//factory->AddVariable("htjets"       , "", "", 'F');
	//factory->AddVariable("htnojets"     , "", "", 'F');
	//factory->AddVariable("njet"         , "", "", 'F');
	//factory->AddVariable("nbjet30csvv2l", "", "", 'F');
	//factory->AddVariable("nbjet30csvv2m", "", "", 'F');
	//factory->AddVariable("nbjet30csvv2t", "", "", 'F');
	//factory->AddVariable("dphijet1met"  , "", "", 'F');
	//factory->AddVariable("dphijet2met"  , "", "", 'F');
	//factory->AddVariable("dphijj"       , "", "", 'F');
	//factory->AddVariable("dphijjmet"    , "", "", 'F');
	//factory->AddVariable("dphill"       , "", "", 'F');
	//factory->AddVariable("dphilep1jet1" , "", "", 'F');
	//factory->AddVariable("dphilep1jet2" , "", "", 'F');
	//factory->AddVariable("dphilep2jet1" , "", "", 'F');
	//factory->AddVariable("dphilep2jet2" , "", "", 'F');
	//factory->AddVariable("dphilmet1"    , "", "", 'F');
	//factory->AddVariable("dphilmet2"    , "", "", 'F');
	factory->AddVariable("dphillmet"    , "", "", 'F');	
	//factory->AddVariable("sphericity"   , "", "", 'F');
	//factory->AddVariable("alignment"    , "", "", 'F');
	//factory->AddVariable("planarity"    , "", "", 'F');



  // Preselection cuts and preparation
  //----------------------------------------------------------------------------
  //factory->PrepareTrainingAndTestTree(Form("metPfType1>%5.2f&&mt2ll>%5.2f&&newdarkpt>0.", metPfType1_cut, mt2ll_cut), "NormMode=EqualNumEvents:nTrain_Signal=80:nTest_Signal=80:nTrain_Background=400:nTest_Background=400:!V");
  factory->PrepareTrainingAndTestTree("mt2ll>100.&&newdarkpt>0.&&metPfType1>80.", "NormMode=EqualNumEvents:nTrain_Signal=0:nTest_Signal=0:nTrain_Background=0:nTest_Background=0:!V");

  // Book MVA
  //----------------------------------------------------------------------------

    factory->BookMethod(TMVA::Types::kMLP, "MLP01",
    	      	      "H:!V:NeuronType=sigmoid:NCycles=500:VarTransform=Norm:HiddenLayers=6,3:TestRate=1:LearningRate=0.005");

  //factory->BookMethod(TMVA::Types::kMLP, "MLP01",
  //	      	      "H:!V:NeuronType=sigmoid:NCycles=500:VarTransform=Norm:HiddenLayers=4,4:TestRate=3:LearningRate=0.005");  

  //factory->BookMethod(TMVA::Types::kMLP, "MLP02",
  //		      "H:!V:NeuronType=sigmoid:NCycles=40:VarTransform=Norm:HiddenLayers=20,10:TestRate=3:LearningRate=0.005"); 
  
  //factory->BookMethod(TMVA::Types::kMLP, "MLP03",
  //		      "H:!V:NeuronType=sigmoid:NCycles=30:VarTransform=Norm:HiddenLayers=20,20:TestRate=3:LearningRate=0.005");  


  //factory->BookMethod(TMVA::Types::kBDT, "BDT04", "NTrees=50:MaxDepth=2" );
  //factory->BookMethod(TMVA::Types::kBDT, "BDT05", "NTrees=50:MaxDepth=3" );



  // Train, test and evaluate MVA
  //----------------------------------------------------------------------------
  factory->TrainAllMethods();     // Train using the set of training events
  factory->TestAllMethods();      // Evaluate using the set of test events
  factory->EvaluateAllMethods();  // Evaluate and compare performance


  // Save the output
  //----------------------------------------------------------------------------
  outputfile->Close();

  delete factory;
}