void process(const std::vector<std::string>& inputFiles, const std::string& name, const std::string& outputFile) { TChain* signal = loadChain(inputFiles, "signal"); TChain* background = loadChain(inputFiles, "background"); TFile* output = TFile::Open(outputFile.c_str(), "recreate"); TMVA::Factory* factory = new TMVA::Factory(name.c_str(), output, "V"); factory->AddSignalTree(signal, 1.); factory->AddBackgroundTree(background, 1.); //{ //factory->AddVariable("lightJet1p2_Pt"); //factory->AddVariable("leptonic_B_Pt"); //factory->AddVariable("leptonic_Top_Pt"); //factory->AddVariable("leptonic_Top_M"); //factory->AddVariable("hadronic_B_Pt"); //factory->AddVariable("hadronic_W_M"); //factory->AddVariable("hadronic_Top_Pt"); //factory->AddVariable("hadronic_Top_M"); //factory->AddVariable("delta_R_tops"); //factory->AddVariable("delta_R_lightjets"); //factory->AddVariable("leptonic_B_CSV"); //factory->AddVariable("hadronic_B_CSV"); //} // chi^2 style { factory->AddVariable("leptonic_Top_M"); factory->AddVariable("hadronic_W_M"); factory->AddVariable("hadronic_Top_M"); factory->AddVariable("ht_fraction"); } factory->SetWeightExpression("weight"); factory->PrepareTrainingAndTestTree("", "", "V:VerboseLevel=Info:nTrain_Signal=100000:nTrain_Background=100000:nTest_Signal=100000:nTest_Background=100000"); factory->BookMethod(TMVA::Types::kBDT, "BDT", "V:BoostType=AdaBoost:nCuts=20:VarTransform=D"); factory->BookMethod(TMVA::Types::kMLP, "NN", "V:VarTransform=D"); //factory->BookMethod(TMVA::Types::kPDERS, "PDERS", "V"); factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); output->Close(); delete output; delete signal; delete background; }
void test_train(TString signalName = "WW", TString bkgName = "DY") { TFile *outFile = new TFile("myAnalysisFile.root","RECREATE"); TMVA::Factory *factory = new TMVA::Factory(signalName, outFile,""); TString directory = "../rootFiles/SF/MediumIDTighterIP/"; //signalName = directory + signalName; //defining WW signal TFile *MySignalFile = new TFile("../rootFiles/SF/MediumIDTighterIP/WW.root","READ"); TTree* sigTree = (TTree*)MySignalFile->Get("nt"); factory->AddSignalTree(sigTree,1); //defining DY background TFile *MyBkgFile = new TFile("../rootFiles/SF/MediumIDTighterIP/DY.root","READ"); TTree* bkgTree = (TTree*)MyBkgFile->Get("nt"); factory->AddBackgroundTree(bkgTree,1); factory->SetWeightExpression("baseW"); //************************************ FACTORY factory->AddVariable("fullpmet"); factory->AddVariable("trkpmet"); factory->AddVariable("ratioMet"); factory->AddVariable("ptll"); factory->AddVariable("mth"); factory->AddVariable("jetpt1"); factory->AddVariable("ptWW"); factory->AddVariable("dphilljet"); factory->AddVariable("dphillmet"); factory->AddVariable("dphijet1met"); factory->AddVariable("nvtx"); factory->PrepareTrainingAndTestTree("",500,500,500,500); cout<<"I've prepared trees"<<endl; //factory->BookMethod(TMVA::Types::kFisher, "Fisher",""); factory->BookMethod(TMVA::Types::kBDT, "BDT",""); cout<<"I've booked method"<<endl; factory->TrainAllMethods(); factory->TestAllMethods(); cout<<"I've tested all methods"<<endl; factory->EvaluateAllMethods(); cout<<"I've evaluated all methods"<<endl; }
//------------------------------------------------------------------------------ // MVATrain //------------------------------------------------------------------------------ void MVATrain(TString signal) { TFile* outputfile = TFile::Open(trainingdir + signal + ".root", "recreate"); // Factory //---------------------------------------------------------------------------- TMVA::Factory* factory = new TMVA::Factory(signal, outputfile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification"); // Get the trees //---------------------------------------------------------------------------- _mctree.clear(); AddProcess("signal", signal); AddProcess("background", "HZJ_HToWW_M125"); AddProcess("background", "ggZH_HToWW_M125"); // AddProcess("background", "14_HZ"); // AddProcess("background", "10_HWW"); // AddProcess("background", "06_WW"); // AddProcess("background", "02_WZTo3LNu"); // AddProcess("background", "03_ZZ"); // AddProcess("background", "11_Wg"); // AddProcess("background", "07_ZJets"); // AddProcess("background", "09_TTV"); // AddProcess("background", "05_ST"); // AddProcess("background", "00_Fakes"); Double_t weight = 1.0; factory->AddSignalTree(_signaltree, weight); for (UInt_t i=0; i<_mctree.size(); i++) factory->AddBackgroundTree(_mctree[i], weight); factory->SetWeightExpression("eventW"); // Add variables //---------------------------------------------------------------------------- // Be careful with the order: it must be respected at the reading step // factory->AddVariable("<var1>+<var2>", "pretty title", "unit", 'F'); // factory->AddVariable("channel", "", "", 'F'); factory->AddVariable("metPfType1", "", "", 'F'); factory->AddVariable("m2l", "", "", 'F'); // factory->AddVariable("njet", "", "", 'F'); // factory->AddVariable("nbjet20cmvav2l", "", "", 'F'); factory->AddVariable("lep1pt", "", "", 'F'); factory->AddVariable("lep2pt", "", "", 'F'); // factory->AddVariable("jet1pt", "", "", 'F'); factory->AddVariable("jet2pt", "", "", 'F'); factory->AddVariable("mtw1", "", "", 'F'); factory->AddVariable("dphill", "", "", 'F'); factory->AddVariable("dphilep1jet1", "", "", 'F'); // factory->AddVariable("dphilep1jet2", "", "", 'F'); // factory->AddVariable("dphilmet1", "", "", 'F'); // factory->AddVariable("dphilep2jet1", "", "", 'F'); // factory->AddVariable("dphilep2jet2", "", "", 'F'); // factory->AddVariable("dphilmet2", "", "", 'F'); // factory->AddVariable("dphijj", "", "", 'F'); // factory->AddVariable("dphijet1met", "", "", 'F'); // factory->AddVariable("dphijet2met", "", "", 'F'); factory->AddVariable("dphillmet", "", "", 'F'); // Preselection cuts and preparation //---------------------------------------------------------------------------- factory->PrepareTrainingAndTestTree("", ":nTrain_Signal=0:nTest_Signal=0:nTrain_Background=0:nTest_Background=0:SplitMode=Alternate:MixMode=Random:!V"); // Book MVA //---------------------------------------------------------------------------- factory->BookMethod(TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=sigmoid:VarTransform=N:NCycles=600:HiddenLayers=25,10:TestRate=5:!UseRegulator"); // Train, test and evaluate MVA //---------------------------------------------------------------------------- factory->TrainAllMethods(); // Train using the set of training events factory->TestAllMethods(); // Evaluate using the set of test events factory->EvaluateAllMethods(); // Evaluate and compare performance // Save the output //---------------------------------------------------------------------------- outputfile->Close(); delete factory; }
void TMVAClassification( TString fname = "./tmva_class_example.root") { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // to get access to the GUI and all tmva macros TString tmva_dir(TString(gRootDir) + "/tmva"); if(gSystem->Getenv("TMVASYS")) tmva_dir = TString(gSystem->Getenv("TMVASYS")); gROOT->SetMacroPath(tmva_dir + "/test/:" + gROOT->GetMacroPath() ); gROOT->ProcessLine(".L TMVAGui.C"); // Default MVA methods to be trained + tested std::map<std::string,int> Use; Use["KNN"] = 1; // k-nearest neighbour method // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory->AddVariable( "pt_eH", 'D' ); factory->AddVariable( "max(pt_jet_eH,pt_eH)", 'D' ); factory->AddVariable( "njets", 'I' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables /// factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); /// factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) TFile *input(0); if (gSystem->AccessPathName( fname )){ // file does not exist in local directory gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root"); fname = "./tmva_class_example.root"; }else{ input= TFile::Open( fname ); } if (!input) { std::cout << "ERROR: could not open data file " << fname << std::endl; exit(1); } std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl; // --- Register the training and test trees TTree *inputTree = (TTree*)input->Get("FakeTreeSig"); TTree *background = (TTree*)input->Get("FakeTreeBG"); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // cuts for signal and background //~ TCut signalCut = "selected==1 && id_iso_eleH==1"; //~ TCut backgroundCut = "selected==1 && id_iso_eleH==0"; //~ //~ std::cout << " THe signal cut is " << signalCut.GetTitle() << " bg cut is " << backgroundCut.GetTitle() << std::endl; Int_t num_pass = inputTree->GetEntries(); Int_t num_fail = background->GetEntries(); std::cout << num_pass << " " << num_fail << std::endl; // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( inputTree, 1.0 ); factory->AddBackgroundTree( background, 1.0 ); factory->SetWeightExpression( "weight" ); //factory->SetInputTrees( inputTree, signalCut, backgroundCut ); // To give different trees for training and testing, do as follows: // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // // // --- begin ---------------------------------------------------------- // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables // Float_t treevars[4], weight; // // // Signal // for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<signal->GetEntries(); i++) { // signal->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight ); // else factory->AddSignalTestEvent ( vars, signalWeight ); // } // // // Background (has event weights) // background->SetBranchAddress( "weight", &weight ); // for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<background->GetEntries(); i++) { // background->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight ); // else factory->AddBackgroundTestEvent ( vars, backgroundWeight*weight ); // } // --- end ------------------------------------------------------------ // // --- end of tree registration // Set individual event weights (the variables must exist in the original TTree) // for signal : factory->SetSignalWeightExpression ("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = "selected==1"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = "selected==1"; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); std::stringstream indexes; indexes.str(""); indexes << "nTrain_Signal=" << num_pass << ":nTrain_Background=" << num_fail << ":SplitMode=Random:NormMode=None:!V"; std::string input_opt=indexes.str(); std::cout << "Options are " << input_opt << std::endl; factory->PrepareTrainingAndTestTree( mycuts, mycutb, input_opt); //"nTrain_Signal="+num_pass+":nTrain_Background="+num_fail+":SplitMode=Random:NormMode=None:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=50:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","GA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events //factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs // factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void classifyBDT(TString inputVariables = "trainingVars.txt", TString signalName = "/mnt/hscratch/dabercro/skims2/BDT_Signal.root", TString backName = "/mnt/hscratch/dabercro/skims2/BDT_Background.root") { TMVA::Tools::Instance(); std::cout << "==> Start TMVAClassification" << std::endl; // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA/TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationCategory", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;N" ); // A very simple MVA (feel free to uncomment and comment what you like) => as a rule of thumb 10-20 variables is where people start to get worried about total number ifstream configFile; configFile.open(inputVariables.Data()); TString tempFormula; configFile >> tempFormula; // Is the name of the BDT while(!configFile.eof()){ configFile >> tempFormula; if(tempFormula != ""){ factory->AddVariable(tempFormula,'F'); } } TString lVars; // TCut lCut = "jet1qg2<2.&&jet1pt>250.&&jet1pullAngle>-5.";// < 10 && jet1mass_m2 > 60 && jet1mass_m2 < 120"; // TCut lCut = "passZ > 3 && fjet1pt > 250 && fjet1MassPruned < 120 && fatjetid < 2"; TCut lCut = "abs(fjet1PartonId)!=24&&abs(fjet1PartonId)!=23"; // std::string lEventCut = "event % 2 == 1"; // lCut += lEventCut.c_str(); // TCut lSCut = "passT > 0 && fjet1pt > 250 && fjet1MassPruned < 120 && abs(fjet1PartonId) == 24&& fatjetid < 2"; TCut lSCut = "abs(fjet1PartonId)==24||abs(fjet1PartonId)==23"; // lSCut += lEventCut.c_str(); TCut cleanCut = "fjet1QGtagSub2 > -10 && fjet1PullAngle > -4 && abs(fjet1pt/fjet1MassTrimmed)<200 && abs(fjet1pt/fjet1MassPruned)<200"; TFile *lSAInput = TFile::Open(signalName); TTree *lSASignal = (TTree*)lSAInput ->Get("DMSTree"); TFile *lSBInput = TFile::Open(backName); TTree *lSBSignal = (TTree*)lSBInput ->Get("DMSTree"); Double_t lSWeight = 1.0; Double_t lBWeight = 1.0; gROOT->cd( outfileName+TString(":/") ); factory->AddSignalTree ( lSASignal, lSWeight ); gROOT->cd( outfileName+TString(":/") ); factory->AddBackgroundTree( lSBSignal, lBWeight ); factory->SetWeightExpression("weight"); std::stringstream pSignal,pBackground; pSignal << "nTrain_Signal="<< lSASignal->GetEntries() << ":nTrain_Background=" << lSBSignal->GetEntries(); // factory->PrepareTrainingAndTestTree( lSCut, lCut,(pSignal.str()+":SplitMode=Block:NormMode=NumEvents:!V").c_str() ); factory->PrepareTrainingAndTestTree(lSCut&&cleanCut,lCut&&cleanCut,"nTrain_Signal=0:nTrain_Background=0:SplitMode=Alternate:NormMode=NumEvents:!V"); std::string lName = "alpha_VBF"; TString lBDTDef = "!H:!V:NTrees=400:BoostType=Grad:Shrinkage=0.1:UseBaggedGrad=F:nCuts=2000:NNodesMax=10000:MaxDepth=5:UseYesNoLeaf=F:nEventsMin=200"; // TString lBDTDef = "!H:!V:NTrees=400:BoostType=Grad:Shrinkage=0.1:UseBaggedGrad=F:nCuts=2000:MaxDepth=5:UseYesNoLeaf=F:MinNodeSize=0.086:NegWeightTreatment=IgnoreNegWeightsInTraining"; factory->BookMethod(TMVA::Types::kBDT,"BDT_simple_alpha",lBDTDef); factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; //if (!gROOT->IsBatch()) TMVAGui( outfileName ); //TString lBDTDef = "!H:!V:NTrees=100:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad=F:nCuts=2000:NNodesMax=10000:MaxDepth=3:SeparationType=GiniIndex"; }
void TMVARegression( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVARegression.C\(\"myMethod1,myMethod2,myMethod3\"\) // //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDEFoam"] = 1; Use["KNN"] = 1; // // --- Linear Discriminant Analysis Use["LD"] = 1; // // --- Function Discriminant analysis Use["FDA_GA"] = 1; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; // // --- Neural Network Use["MLP"] = 1; // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 0; Use["BDTG"] = 1; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVARegression" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a new root output file TString outfileName( "TMVAReg.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVARegression", outputFile, "!V:!Silent:Color:DrawProgressBar" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory->AddVariable( "var1", "Variable 1", "units", 'F' ); factory->AddVariable( "var2", "Variable 2", "units", 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables factory->AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ); factory->AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ); // Add the variable carrying the regression target factory->AddTarget( "fvalue" ); // It is also possible to declare additional targets for multi-dimensional regression, ie: // -- factory->AddTarget( "fvalue2" ); // BUT: this is currently ONLY implemented for MLP // Read training and test data (see TMVAClassification for reading ASCII files) // load the signal and background event samples from ROOT trees TFile *input(0); TString fname = "./tmva_reg_example.root"; if (!gSystem->AccessPathName( fname )) input = TFile::Open( fname ); // check if file in local directory exists else input = TFile::Open( "http://root.cern.ch/files/tmva_reg_example.root" ); // if not: download from ROOT server if (!input) { std::cout << "ERROR: could not open data file" << std::endl; exit(1); } std::cout << "--- TMVARegression : Using input file: " << input->GetName() << std::endl; // --- Register the regression tree TTree *regTree = (TTree*)input->Get("TreeR"); // global event weights per tree (see below for setting event-wise weights) Double_t regWeight = 1.0; // You can add an arbitrary number of regression trees factory->AddRegressionTree( regTree, regWeight ); // This would set individual event weights (the variables defined in the // expression need to exist in the original TTree) factory->SetWeightExpression( "var1", "Regression" ); // Apply additional cuts on the signal and background samples (can be different) TCut mycut = ""; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1"; // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycut, "nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents:!V" ); // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // ---- Book MVA methods // // please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // PDE - RS method if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" ); // And the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // Linear discriminant if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "!H:!V:VarTransform=None" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); // Neural network (MLP) if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" ); // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDT"]) factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:MinNodeSize=1.0%:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" ); if (Use["BDTG"]) factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000::BoostType=Grad:Shrinkage=0.1:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=3:MaxDepth=4" ); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVARegression is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVARegGui( outfileName ); }
void TMVAClassificationElecTau(std::string ordering_ = "Pt", std::string bkg_ = "qqH115vsWZttQCD") { TMVA::Tools::Instance(); TString outfileName( "TMVAElecTau"+ordering_+"Ord_"+bkg_+".root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationElecTau"+ordering_+"Ord_"+bkg_, outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" ); factory->AddVariable( "pt1", "pT-tag1", "GeV/c" , 'F' ); factory->AddVariable( "pt2", "pT-tag2", "GeV/c" , 'F' ); factory->AddVariable( "Deta","|y-tag1 - y-tag2|","" , 'F' ); //factory->AddVariable( "opposite:=abs(eta1*eta2)/eta1/eta2","sign1*sign2","" , 'F' ); //factory->AddVariable( "Dphi", "#Delta#phi" ,"" , 'F' ); factory->AddVariable( "Mjj", "M(tag1,tag2)", "GeV/c^{2}" , 'F' ); factory->AddSpectator( "eta1", "#eta_{tag1}" , 'F' ); factory->AddSpectator( "eta2", "#eta_{tag2}" , 'F' ); factory->SetWeightExpression( "sampleWeight" ); TString fSignalName = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleVBFH115-powheg-PUS1_Open_ElecTauStream.root"; TString fBackgroundNameDYJets = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleZjets-alpgen-PUS1_Open_ElecTauStream.root"; TString fBackgroundNameWJets = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleWJets-madgraph-PUS1_Open_ElecTauStream.root"; TString fBackgroundNameQCD = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleQCD_Open_ElecTauStream.root"; TString fBackgroundNameTTbar = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleTTJets-madgraph-PUS1_Open_ElecTauStream.root"; TFile *fSignal(0); TFile *fBackgroundDYJets(0); TFile *fBackgroundWJets(0); TFile *fBackgroundQCD(0); TFile *fBackgroundTTbar(0); fSignal = TFile::Open( fSignalName ); fBackgroundDYJets = TFile::Open( fBackgroundNameDYJets ); fBackgroundWJets = TFile::Open( fBackgroundNameWJets ); fBackgroundQCD = TFile::Open( fBackgroundNameQCD ); fBackgroundTTbar = TFile::Open( fBackgroundNameTTbar ); if(!fSignal || !fBackgroundDYJets || !fBackgroundWJets || !fBackgroundQCD || !fBackgroundTTbar) { std::cout << "ERROR: could not open files" << std::endl; exit(1); } TString tree = "outTree"+ordering_+"Ord"; TCut mycuts = ""; TCut mycutb = ""; TCut cutA = "pt1>0 && tightestHPSWP>0"; TCut cutB = "pt1>0 && combRelIsoLeg1<0.1"; TCut cutBl = "pt1>0 && combRelIsoLeg1<0.3"; TCut cutC = "pt1>0 && diTauCharge==0"; TCut cutD = "pt1>0 && MtLeg1<40"; // select events for training TFile* dummy = new TFile("dummy.root","RECREATE"); TH1F* allEvents = new TH1F("allEvents","",1,-10,10); float totalEvents, cutEvents; // signal: all TTree *signal = ((TTree*)(fSignal->Get(tree)))->CopyTree(cutA&&cutB&&cutC&&cutD); cout << "Copied signal tree with full selection: " << ((TTree*)(fSignal->Get(tree)))->GetEntries() << " --> " << signal->GetEntries() << endl; allEvents->Reset(); signal->Draw("eta1>>allEvents","sampleWeight"); cutEvents = allEvents->Integral(); Double_t signalWeight = 1.0; cout << "Signal: expected yield " << cutEvents << " -- weight " << signalWeight << endl; // Z+jets: all TTree *backgroundDYJets = ((TTree*)(fBackgroundDYJets->Get(tree)))->CopyTree(cutA&&cutB&&cutC&&cutD); cout << "Copied DYJets tree with full selection: " << ((TTree*)(fBackgroundDYJets->Get(tree)))->GetEntries() << " --> " << backgroundDYJets->GetEntries() << endl; allEvents->Reset(); backgroundDYJets->Draw("eta1>>allEvents","sampleWeight"); cutEvents = allEvents->Integral(); Double_t backgroundDYJetsWeight = 1.0; cout << "ZJets: expected yield " << cutEvents << " -- weight " << backgroundDYJetsWeight << endl; // W+jets: iso+Mt TTree *backgroundWJets = ((TTree*)(fBackgroundWJets->Get(tree)))->CopyTree(cutB&&cutD); cout << "Copied WJets tree with iso+Mt selection: " << ((TTree*)(fBackgroundWJets->Get(tree)))->GetEntries() << " --> " << backgroundWJets->GetEntries() << endl; allEvents->Reset(); backgroundWJets->Draw("eta1>>allEvents","sampleWeight"); totalEvents = allEvents->Integral(); allEvents->Reset(); backgroundWJets->Draw("eta1>>allEvents","sampleWeight*(tightestHPSWP>0 && diTauCharge==0)"); cutEvents = allEvents->Integral(); Double_t backgroundWJetsWeight = cutEvents / totalEvents; cout << "WJets: expected yield " << cutEvents << " -- weight " << backgroundWJetsWeight << endl; // QCD: Mt+loose iso TTree *backgroundQCD = ((TTree*)(fBackgroundQCD->Get(tree)))->CopyTree(cutD&&cutBl); cout << "Copied QCD tree with Mt selection: " << ((TTree*)(fBackgroundQCD->Get(tree)))->GetEntries() << " --> " << backgroundQCD->GetEntries() << endl; allEvents->Reset(); backgroundQCD->Draw("eta1>>allEvents","sampleWeight"); totalEvents = allEvents->Integral(); allEvents->Reset(); backgroundQCD->Draw("eta1>>allEvents","sampleWeight*(tightestHPSWP>0 && diTauCharge==0 && combRelIsoLeg1<0.1)"); cutEvents = allEvents->Integral(); Double_t backgroundQCDWeight = cutEvents / totalEvents; cout << "QCD: expected yield " << cutEvents << " -- weight " << backgroundQCDWeight << endl; // TTbar: iso+Mt TTree *backgroundTTbar = ((TTree*)(fBackgroundTTbar->Get(tree)))->CopyTree(cutB&&cutD); cout << "Copied TTbar tree with iso+Mt selection: " << ((TTree*)(fBackgroundTTbar->Get(tree)))->GetEntries() << " --> " << backgroundTTbar->GetEntries() << endl; allEvents->Reset(); backgroundTTbar->Draw("eta1>>allEvents","sampleWeight"); totalEvents = allEvents->Integral(); allEvents->Reset(); backgroundTTbar->Draw("eta1>>allEvents","sampleWeight*(tightestHPSWP>0 && diTauCharge==0)"); cutEvents = allEvents->Integral(); Double_t backgroundTTbarWeight = cutEvents / totalEvents; cout << "TTbar: expected yield " << cutEvents << " -- weight " << backgroundTTbarWeight << endl; delete allEvents; factory->AddSignalTree ( signal, signalWeight ); //factory->AddBackgroundTree( backgroundDYJets, backgroundDYJetsWeight ); //factory->AddBackgroundTree( backgroundWJets, backgroundWJetsWeight ); factory->AddBackgroundTree( backgroundQCD, backgroundQCDWeight ); //factory->AddBackgroundTree( backgroundTTbar, backgroundTTbarWeight ); factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=1:nTest_Background=1:SplitMode=Random:NormMode=NumEvents:!V" ); factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=GA:EffSel:CutRangeMin[0]=25.:CutRangeMax[0]=999:CutRangeMin[1]=25.:CutRangeMax[1]=999.:CutRangeMin[2]=1.0:CutRangeMax[2]=9.:CutRangeMin[3]=100:CutRangeMax[3]=7000:VarProp=FSmart" ); /* factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=200:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); */ factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; //if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVAClassificationHwwNtuple( TString myMethodList = "" ) { // This loads the library TMVA::Tools::Instance(); gROOT->ProcessLine(".L TMVAGui.C"); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 1; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // Use["BDT"] = 1; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; cout<<regMethod<<" is on"<<endl; } } // ------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // For one variable //TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, // "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification" ); // For Multiple Variables TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); //factory->AddVariable( "pt1", "LeadLepton pt", "", 'F' ); //factory->AddVariable( "pt2", "TailLepton pt", "", 'F' ); factory->AddVariable( "pfmet", "MissingEt", "", 'F' ); factory->AddVariable( "mpmet", "Minimum Proj. Met", "", 'F' ); factory->AddVariable( "dphill", "DeltPhiOfLepLep", "", 'F' ); //factory->AddVariable( "mll", "DiLepton Mass", "", 'F' ); factory->AddVariable( "ptll", "DiLepton pt", "", 'F' ); // // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables //factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); //factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // //factory->AddSpectator( "mWW", "Higgs Mass", "", 'F' ); factory->AddSpectator( "pt1", "LeadLepton pt", "", 'F' ); factory->AddSpectator( "pt2", "TailLepton pt", "", 'F' ); factory->AddSpectator( "pfmet", "MissingEt", "", 'F' ); factory->AddSpectator( "mpmet", "Minimum Proj. Met", "", 'F' ); factory->AddSpectator( "dphill", "DeltPhiOfLepLep", "", 'F' ); factory->AddSpectator( "mll", "DiLepton Mass", "", 'F' ); factory->AddSpectator( "ptll", "DiLepton pt", "", 'F' ); // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) //TString fname = "./tmva_class_example.root"; //TString fname = "/afs/cern.ch/work/s/salee/private/HWWwidth/HWW/GGVvAnalyzer/MkNtuple/Hw1Int8TeV/MkNtuple.root"; //TString fname = "/terranova_0/HWWwidth/HWW/GGVvAnalyzer/MkNtuple/Hw1Int8TeV/MkNtuple.root"; //if (gSystem->AccessPathName( fname )) // file does not exist in local directory // exit(-1); //gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root"); //TFile *input = TFile::Open( fname ); //TFile *SB_OnPeak = TFile::Open("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntOnPeak_8TeV.root"); //TTree *SB_OnPeak_Tree = (TTree*)SB_OnPeak->Get("latino"); TChain *S_Chain = new TChain("latino"); TChain *C_Chain = new TChain("latino"); TChain *SCI_Chain = new TChain("latino"); TChain *qqWW_Chain = new TChain("latino"); S_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_SigOnPeak_8TeV.root"); S_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_SigShoulder_8TeV.root"); S_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_SigTail_8TeV.root"); SCI_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntOnPeak_8TeV.root"); SCI_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntShoulder_8TeV.root"); SCI_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntTail_8TeV.root"); C_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw25_CotHead_8TeV.root"); C_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw25_CotTail_8TeV.root"); qqWW_Chain->Add("/afs/cern.ch/user/m/maiko/work/public/Tree/tree_skim_wwmin/nominals/latino_000_WWJets2LMad.root"); // --- Register the training and test trees // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( S_Chain ); factory->AddBackgroundTree( qqWW_Chain ); factory->AddBackgroundTree( C_Chain ); // Classification training and test data in ROOT tree format with signal and background events being located in the same tree //factory->SetInputTrees(SCI_Chain, GenOffCut, GenOnCut); // To give different trees for training and testing, do as follows: // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); factory->SetWeightExpression ("2.1*puW*baseW*effW*triggW*19.468"); //factory->SetSignalWeightExpression ("2.1*puW*baseW*effW*triggW*19.468"); //factory->SetBackgroundWeightExpression("puW*baseW*effW*triggW*19.468"); //factory->PrepareTrainingAndTestTree( ChanCommOff, // "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=None:!V" ); //"nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"; factory->PrepareTrainingAndTestTree( ChanCommOff0J, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=None:!V" ); // ---- Book MVA methods // // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:V:NTrees=850:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); //"!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // ----------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // ---- STILL EXPERIMENTAL and only implemented for BDT's ! // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","FitGA"); // ----------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros //if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
//------------------------------------------------------------------------------ // MVATrain //------------------------------------------------------------------------------ void MVATrain(float metPfType1_cut, float mt2ll_cut, TString signal) { TFile* outputfile = TFile::Open(trainingdir + signal + ".root", "recreate"); // Factory //---------------------------------------------------------------------------- TMVA::Factory* factory = new TMVA::Factory(signal, outputfile, // "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification"); "!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification"); // Get the trees //---------------------------------------------------------------------------- _mctree.clear(); AddProcess("signal" , signal);//"01_Data_reduced_1outof6"); //signal AddProcess("background", "04_TTTo2L2Nu"); /*AddProcess("background", "14_HZ"); AddProcess("background", "10_HWW"); AddProcess("background", "06_WW"); AddProcess("background", "02_WZTo3LNu"); AddProcess("background", "03_VZ"); AddProcess("background", "11_Wg"); AddProcess("background", "07_ZJets"); AddProcess("background", "09_TTV"); AddProcess("background", "05_ST"); AddProcess("background", "00_Fakes_reduced_1outof6");*/ Double_t weight = 1.0; factory->AddSignalTree(_signaltree, weight); for (UInt_t i=0; i<_mctree.size(); i++) factory->AddBackgroundTree(_mctree[i], weight); factory->SetWeightExpression("eventW"); // Add variables //---------------------------------------------------------------------------- // Be careful with the order: it must be respected at the reading step // factory->AddVariable("<var1>+<var2>", "pretty title", "unit", 'F'); factory->AddVariable("newdarkpt" , "", "", 'F'); //factory->AddVariable("topRecoW" , "", "", 'F'); //factory->AddVariable("lep1pt" , "", "", 'F'); //factory->AddVariable("lep1eta" , "", "", 'F'); //factory->AddVariable("lep1phi" , "", "", 'F'); //factory->AddVariable("lep1mass" , "", "", 'F'); //factory->AddVariable("lep2pt" , "", "", 'F'); //factory->AddVariable("lep2eta" , "", "", 'F'); //factory->AddVariable("lep2phi" , "", "", 'F'); //factory->AddVariable("lep2mass" , "", "", 'F'); //factory->AddVariable("jet1pt " , "", "", 'F'); //factory->AddVariable("jet1eta" , "", "", 'F'); //factory->AddVariable("jet1phi" , "", "", 'F'); //factory->AddVariable("jet1mass" , "", "", 'F'); //factory->AddVariable("jet2pt" , "", "", 'F'); //factory->AddVariable("jet2eta" , "", "", 'F'); //factory->AddVariable("jet2phi" , "", "", 'F'); //factory->AddVariable("jet2mass" , "", "", 'F'); factory->AddVariable("metPfType1" , "", "", 'F'); //factory->AddVariable("metPfType1Phi", "", "", 'F'); //factory->AddVariable("m2l" , "", "", 'F'); factory->AddVariable("mt2ll" , "", "", 'F'); //factory->AddVariable("mt2lblb" , "", "", 'F'); //factory->AddVariable("mtw1" , "", "", 'F'); //factory->AddVariable("mtw2" , "", "", 'F'); //factory->AddVariable("ht" , "", "", 'F'); //factory->AddVariable("htjets" , "", "", 'F'); //factory->AddVariable("htnojets" , "", "", 'F'); //factory->AddVariable("njet" , "", "", 'F'); //factory->AddVariable("nbjet30csvv2l", "", "", 'F'); //factory->AddVariable("nbjet30csvv2m", "", "", 'F'); //factory->AddVariable("nbjet30csvv2t", "", "", 'F'); //factory->AddVariable("dphijet1met" , "", "", 'F'); //factory->AddVariable("dphijet2met" , "", "", 'F'); //factory->AddVariable("dphijj" , "", "", 'F'); //factory->AddVariable("dphijjmet" , "", "", 'F'); //factory->AddVariable("dphill" , "", "", 'F'); //factory->AddVariable("dphilep1jet1" , "", "", 'F'); //factory->AddVariable("dphilep1jet2" , "", "", 'F'); //factory->AddVariable("dphilep2jet1" , "", "", 'F'); //factory->AddVariable("dphilep2jet2" , "", "", 'F'); //factory->AddVariable("dphilmet1" , "", "", 'F'); //factory->AddVariable("dphilmet2" , "", "", 'F'); factory->AddVariable("dphillmet" , "", "", 'F'); //factory->AddVariable("sphericity" , "", "", 'F'); //factory->AddVariable("alignment" , "", "", 'F'); //factory->AddVariable("planarity" , "", "", 'F'); // Preselection cuts and preparation //---------------------------------------------------------------------------- //factory->PrepareTrainingAndTestTree(Form("metPfType1>%5.2f&&mt2ll>%5.2f&&newdarkpt>0.", metPfType1_cut, mt2ll_cut), "NormMode=EqualNumEvents:nTrain_Signal=80:nTest_Signal=80:nTrain_Background=400:nTest_Background=400:!V"); factory->PrepareTrainingAndTestTree("mt2ll>100.&&newdarkpt>0.&&metPfType1>80.", "NormMode=EqualNumEvents:nTrain_Signal=0:nTest_Signal=0:nTrain_Background=0:nTest_Background=0:!V"); // Book MVA //---------------------------------------------------------------------------- factory->BookMethod(TMVA::Types::kMLP, "MLP01", "H:!V:NeuronType=sigmoid:NCycles=500:VarTransform=Norm:HiddenLayers=6,3:TestRate=1:LearningRate=0.005"); //factory->BookMethod(TMVA::Types::kMLP, "MLP01", // "H:!V:NeuronType=sigmoid:NCycles=500:VarTransform=Norm:HiddenLayers=4,4:TestRate=3:LearningRate=0.005"); //factory->BookMethod(TMVA::Types::kMLP, "MLP02", // "H:!V:NeuronType=sigmoid:NCycles=40:VarTransform=Norm:HiddenLayers=20,10:TestRate=3:LearningRate=0.005"); //factory->BookMethod(TMVA::Types::kMLP, "MLP03", // "H:!V:NeuronType=sigmoid:NCycles=30:VarTransform=Norm:HiddenLayers=20,20:TestRate=3:LearningRate=0.005"); //factory->BookMethod(TMVA::Types::kBDT, "BDT04", "NTrees=50:MaxDepth=2" ); //factory->BookMethod(TMVA::Types::kBDT, "BDT05", "NTrees=50:MaxDepth=3" ); // Train, test and evaluate MVA //---------------------------------------------------------------------------- factory->TrainAllMethods(); // Train using the set of training events factory->TestAllMethods(); // Evaluate using the set of test events factory->EvaluateAllMethods(); // Evaluate and compare performance // Save the output //---------------------------------------------------------------------------- outputfile->Close(); delete factory; }