int main(int argc, char* argv[]) { if (argc != 3) { std::cout << " Usage: " << " <path to input files>" << " <path to output MVA file>" << std::endl; return 1; } std::string inPath(argv[1]); std::string outPath(argv[2]); std::cout << " -- Input path: " << inPath << std::endl; std::cout << " -- Output file: " << outPath << std::endl; // This loads the library TMVA::Tools::Instance(); // --- Here the preparation phase begins TString outfileName( outPath ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] // factory->AddVariable("jet1_pt","jet1_pt","", 'D'); // factory->AddVariable("jet2_pt","jet2_pt","", 'D'); factory->AddVariable("dijet_M","dijet_M","", 'D'); factory->AddVariable("dijet_deta","dijet_deta","", 'D'); factory->AddVariable("metnomu_significance","metnomu_significance","",'D'); factory->AddVariable("alljetsmetnomu_mindphi","alljetsmetnomu_mindphi","",'D'); // factory->AddSpectator( "nPhot_presel", "nPhot_presel", "", 'F' ); std::vector<std::string> backgrounds; backgrounds.push_back("WJetsToLNu_HT-100to200_Tune4C_13TeV-madgraph-tauol"); backgrounds.push_back("WJetsToLNu_HT-200to400_Tune4C_13TeV-madgraph-tauola"); backgrounds.push_back("WJetsToLNu_HT-400to600_Tune4C_13TeV-madgraph-tauola"); backgrounds.push_back("WJetsToLNu_HT-600toInf_Tune4C_13TeV-madgraph-tauola"); backgrounds.push_back("ZJetsToNuNu_HT-100to200_Tune4C_13TeV-madgraph-tauola"); backgrounds.push_back("ZJetsToNuNu_HT-200to400_Tune4C_13TeV-madgraph-tauola"); backgrounds.push_back("ZJetsToNuNu_HT-400to600_Tune4C_13TeV-madgraph-tauola"); backgrounds.push_back("ZJetsToNuNu_HT-600toInf_Tune4C_13TeV-madgraph-tauola"); std::vector<std::string> signals; signals.push_back("VBF_HToInv_M-125_13TeV_powheg-pythia6"); double lumiData = 10000;//in pb-1 for (int i=0; i<signals.size(); i++){ float weight = getNormalisationFactor(lumiData,signals[i]); TFile* f=TFile::Open(Form("%s/%s.root",inPath.c_str(),signals[i].c_str())); TTree* sig=(TTree*) f->Get("lightTree/LightTree"); if (!sig) { std::cout << "====> ERROR: Sig tree " << signals[i] << " cannot be found" << std::endl; continue; } factory->AddSignalTree ( sig, weight); } for (int i=0; i<backgrounds.size(); i++){ float weight = getNormalisationFactor(lumiData,backgrounds[i]); TFile* f=TFile::Open(Form("%s/%s.root",inPath.c_str(),backgrounds[i].c_str())); TTree* bkg=(TTree*) f->Get("lightTree/LightTree"); if (!bkg) { std::cout << "====> ERROR: Bkg tree " << backgrounds[i] << " cannot be found" << std::endl; continue; } factory->AddBackgroundTree ( bkg, weight); } // Apply additional cuts on the signal and background samples (can be different) TCut mycuts; TCut mycutb; //Preselection to get rid of QCD mycuts = "passtrigger==1 && nvetomuons==0 && nvetoelectrons==0 && metnomuons>140 && abs(jet1_eta)<4.7 && abs(jet2_eta)<4.7 && dijet_M>700 && jet1_eta*jet2_eta<0 && metnomu_significance>4 && alljetsmetnomu_mindphi>2 && jet1_pt>50 && jet2_pt>40"; mycutb = "passtrigger==1 && nvetomuons==0 && nvetoelectrons==0 && metnomuons>140 && abs(jet1_eta)<4.7 && abs(jet2_eta)<4.7 && dijet_M>700 && jet1_eta*jet2_eta<0 && metnomu_significance>4 && alljetsmetnomu_mindphi>2 && jet1_pt>50 && jet2_pt>40"; factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // factory->BookMethod( TMVA::Types::kCuts, "Cuts", // // "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp[0]=FSmart:VarProp[1]=FSmart:VarProp[2]=FSmart:VarProp[3]=FSmart:VarProp[4]=FSmart:VarProp[5]=FSmart" ); factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95:VarProp[0]=FSmart:VarProp[1]=FSmart:VarProp[2]=FSmart:VarProp[3]=FSmart" ); // factory->BookMethod( TMVA::Types::kCuts, "CutsSA", // "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=1000:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning"); // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void Boost(){ TString outfileName = "boost.root"; TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" ); factory->AddVariable( "var0", 'F' ); factory->AddVariable( "var1", 'F' ); TFile *input(0); TString fname = "./data.root"; if (!gSystem->AccessPathName( fname )) { // first we try to find tmva_example.root in the local directory std::cout << "--- BOOST : Accessing " << fname << std::endl; input = TFile::Open( fname ); } else { gROOT->LoadMacro( "./createData.C"); create_circ(20000); cout << " created data.root with data and circle arranged in half circles"<<endl; input = TFile::Open( fname ); } if (!input) { std::cout << "ERROR: could not open data file" << std::endl; exit(1); } TTree *signal = (TTree*)input->Get("TreeS"); TTree *background = (TTree*)input->Get("TreeB"); Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; gROOT->cd( outfileName+TString(":/") ); factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); factory->PrepareTrainingAndTestTree( "", "", "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); TString fisher="H:!V"; factory->BookMethod( TMVA::Types::kFisher, "Fisher", fisher ); factory->BookMethod( TMVA::Types::kFisher, "FisherBoost", fisher+":Boost_Num=100:Boost_Type=AdaBoost" ); factory->BookMethod( TMVA::Types::kFisher, "FisherBoostLog", fisher+":Boost_Num=100:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=1.0" ); factory->BookMethod( TMVA::Types::kFisher, "FisherBoostLog2", fisher+":Boost_Num=100:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=2.0" ); factory->BookMethod( TMVA::Types::kFisher, "FisherBoostStep", fisher+":Boost_Num=100:Boost_Transform=step:Boost_Type=AdaBoost:Boost_AdaBoostBeta=1.0" ); factory->BookMethod( TMVA::Types::kFisher, "FisherBoostStep2", fisher+":Boost_Num=100:Boost_Transform=step:Boost_Type=AdaBoost:Boost_AdaBoostBeta=1.2" ); factory->BookMethod( TMVA::Types::kFisher, "FisherBoostStep3", fisher+":Boost_Num=100:Boost_Transform=step:Boost_Type=AdaBoost:Boost_AdaBoostBeta=1.5" ); // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void testBDT(){ //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); /* TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "testBDT", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // global event weights per tree (see below for setting event-wise weights) //Double_t signalWeight = 0.003582; //Double_t backgroundWeight = 0.0269; Double_t signalWeight = 1; Double_t backgroundWeight = 1; TFile *input_sig = TFile::Open( "signal_exclusif.root" ); TFile *input_wz = TFile::Open( "bruit_w_z.root" ); TTree *signal = (TTree*)input_sig->Get("tree"); TTree *background = (TTree*)input_wz->Get("tree"); // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); factory->AddVariable("PT_z" , 'F'); factory->AddVariable("ASYM" , 'F'); factory->AddVariable("PHI_lw_b", 'F'); factory->AddVariable("M_top", 'F'); */ TString outfileName( "bdtTMVA_FCNC_tZ.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "doBDT_FCNC_tZ", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // global event weights per tree (see below for setting event-wise weights) //Double_t signalWeight = 0.003582; //Double_t backgroundWeight = 0.0269; Double_t signalWeight = 1; Double_t backgroundWeight = 1; TFile *input_sig = TFile::Open( "proof.root" ); TFile *input_wz = TFile::Open( "proof.root" ); TTree *signal = (TTree*)input_sig->Get("Ttree_FCNCkut"); TTree *background_WZ = (TTree*)input_wz->Get("Ttree_WZ"); /*TTree *background_ZZ = (TTree*)input_wz->Get("Ttree_ZZ"); TTree *background_WW = (TTree*)input_wz->Get("Ttree_WW"); TTree *background_TTbar = (TTree*)input_wz->Get("Ttree_TTbar"); TTree *background_Zjets = (TTree*)input_wz->Get("Ttree_Zjets"); TTree *background_Wjets = (TTree*)input_wz->Get("Ttree_Wjets"); TTree *background_TtW = (TTree*)input_wz->Get("Ttree_TtW"); TTree *background_TbartW = (TTree*)input_wz->Get("Ttree_TbartW");*/ // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background_WZ, backgroundWeight ); /*factory->AddBackgroundTree( background_ZZ, backgroundWeight ); factory->AddBackgroundTree( background_WW, backgroundWeight ); factory->AddBackgroundTree( background_TTbar, backgroundWeight ); factory->AddBackgroundTree( background_Zjets, backgroundWeight ); factory->AddBackgroundTree( background_Wjets, backgroundWeight ); factory->AddBackgroundTree( background_TtW, backgroundWeight ); factory->AddBackgroundTree( background_TbartW, backgroundWeight );*/ factory->AddVariable("tree_topMass", 'F'); factory->AddVariable("tree_deltaPhilb", 'F'); factory->AddVariable("tree_asym", 'F'); factory->AddVariable("tree_Zpt", 'F'); // to set weights. The variable must exist in the tree // for signal : factory->SetSignalWeightExpression ("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:nEventsMin=100:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
//void TMVAClassification( TString myMethodList = "" ) void tmvaClassifier( TString myMethodList = "", TString inputDir="~/work/ewkzp2j_5311/ll/", bool minimalTrain=false, bool useQG=false) { gSystem->ExpandPathName(inputDir); TString pf("base_weights"); if(!minimalTrain){ if(useQG) pf="full_weights"; else pf="weights"; } TMVA::gConfig().GetIONames().fWeightFileDir = inputDir + pf; // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 0; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 0; Use["LikelihoodD"] = 1; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 0; // Linear Discriminant identical to Fisher Use["Fisher"] = 1; Use["FisherCat"] = 0;//added by loic Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 0; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 1; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables // factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); // factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) // TString fname = "./tmva_class_example.root"; //if (gSystem->AccessPathName( fname )) // file does not exist in local directory // gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root"); // std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl; // --- Register the training and test trees TChain *signal = new TChain("ewkzp2j"); TChain *background = new TChain("ewkzp2j"); TSystemDirectory dir(inputDir,inputDir); TList *files = dir.GetListOfFiles(); if (files) { TSystemFile *file; TString fname; TIter next(files); while ((file=(TSystemFile*)next())) { fname = file->GetName(); if(!fname.EndsWith("_summary.root")) continue; if(fname.Contains("Data")) continue; if(!fname.Contains("DY")) continue; bool isSignal(false); if(fname.Contains("JJ")) { signal->Add(fname); isSignal=true; } else if(fname.Contains("50toInf") && fname.Contains("DY")) background->Add(fname); cout << fname << " added as " << (isSignal ? "signal" : "background") << endl; } }else{ cout << "[Error] no files found in " << inputDir << endl; } cout << "Signal has " << signal->GetEntries() << " raw events" << endl << "Background has " << background->GetEntries() << " raw events"<< endl; // global event weights per tree Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); // event-per-event weights per tree factory->SetBackgroundWeightExpression( "weight/cnorm" ); factory->SetSignalWeightExpression( "weight/cnorm" ); //define variables for the training if(minimalTrain) { factory->AddVariable( "mjj", "M_{jj}" "GeV", 'F' ); factory->AddVariable( "detajj", "#Delta#eta_{jj}", "", 'F' ); factory->AddVariable( "spt", "#Delta_{rel}", "GeV", 'F' ); } else { factory->AddVariable( "mjj", "M_{jj}" "GeV", 'F' ); factory->AddVariable( "detajj", "#Delta#eta_{jj}", "", 'F' ); factory->AddVariable( "setajj", "#Sigma#eta_{j}", "", 'F' ); factory->AddVariable( "eta1", "#eta(1)", "", 'F' ); factory->AddVariable( "eta2", "#eta(2)", "", 'F' ); factory->AddVariable( "pt1", "p_{T}(1)", "GeV", 'F' ); factory->AddVariable( "pt2", "p_{T}(2)", "GeV", 'F' ); factory->AddVariable( "spt", "#Delta_{rel}", "GeV", 'F' ); if(useQG) factory->AddVariable( "qg1", "q/g(1)", "", 'F' ); if(useQG) factory->AddVariable( "qg2", "q/g(2)", "", 'F' ); } // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); if (Use["FisherCat"]){ TMVA::MethodBase* fiCat = factory->BookMethod( TMVA::Types::kCategory, "FisherCat","" ); TMVA::MethodCategory* mcategory = dynamic_cast<TMVA::MethodCategory*>(fiCat); mcategory->AddMethod( "mjj<250", "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat1", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); mcategory->AddMethod( "mjj>=250&&mjj<350" , "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat0000", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); mcategory->AddMethod( "mjj>=350&&mjj<450" , "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat0350", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); mcategory->AddMethod( "mjj>=450&&mjj<550" , "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat0450", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); mcategory->AddMethod( "mjj>=550&&mjj<750" , "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat0550", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); mcategory->AddMethod( "mjj>=750&&mjj<1000", "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat0750", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); mcategory->AddMethod( "mjj>=1000" , "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat1000", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); } // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:MaxDepth=2" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=25:PruneMethod=CostComplexity:PruneStrength=25.0:VarTransform=Decorrelate"); //"!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","FitGA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; std::cout << " ==> Weights are stored in " << TMVA::gConfig().GetIONames().fWeightFileDir << std::endl; delete factory; // Launch the GUI for the root macros // if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void Reg(){ TMVA::Tools::Instance(); std::cout << "==> Start TMVARegression" << std::endl; ifstream myfile; myfile.open("99per.txt"); ostringstream xcS,xcH,xcP,xcC,xcN; double xS,xH,xC,xN,xP; if(myfile.is_open()){ while(!myfile.eof()){ myfile>>xS>>xH>>xC>>xN>>xP; } } xcS<<xS; xcH<<xH; xcC<<xC; xcN<<xN; xcP<<xP; //Output file TString outfileName( "Ex1out_FullW_def.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); //Declaring the factory TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar" ); //Declaring Input Varibles factory->AddVariable( "Sieie",'F'); factory->AddVariable( "ToE", 'F' ); factory->AddVariable( "isoC",'F' ); factory->AddVariable( "isoN",'F' ); factory->AddVariable( "isoP",'F' ); TString fname = "../../CutTMVATrees_Barrel.root"; input = TFile::Open( fname ); // --- Register the regression tree TTree *signal = (TTree*)input->Get("t_S"); TTree *background = (TTree*)input->Get("t_B"); //Just Some more settings Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // You can add an arbitrary number of regression trees factory->AddSignalTree( signal, signalWeight ); factory->AddBackgroundTree( background , backgroundWeight ); TCut mycuts =""; TCut mycutb =""; // factory->PrepareTrainingAndTestTree(mycuts,mycutb,"nTrain_Signal=9000:nTrain_Background=9000:nTest_Signal=10000:nTest_Background=10000"); factory->SetBackgroundWeightExpression("weightPT*weightXS"); factory->SetSignalWeightExpression("weightPT*weightXS"); TString methodName = "Cuts_FullsampleW_def"; TString methodOptions ="!H:!V:FitMethod=GA:EffMethod=EffSEl"; methodOptions +=":VarProp[0]=FMin:VarProp[1]=FMin:VarProp[2]=FMin:VarProp[3]=FMin:VarProp[4]=FMin"; methodOptions +=":CutRangeMax[0]="+xcS.str(); methodOptions +=":CutRangeMax[1]="+xcH.str(); methodOptions +=":CutRangeMax[2]="+xcC.str(); methodOptions +=":CutRangeMax[3]="+xcN.str(); methodOptions +=":CutRangeMax[4]="+xcP.str(); //************ factory->BookMethod(TMVA::Types::kCuts,methodName,methodOptions); factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVARegression is done!" << std::endl; delete factory; }
int TestBDT_forreal_test(TString sig) { // This loads the library TMVA::Tools::Instance(); // to get access to the GUI and all tmva macros //TString thisdir = gSystem->DirName(gInterpreter->GetCurrentMacroName());//was not commented, but does not work anymore //gROOT->SetMacroPath(thisdir + ":" + gROOT->GetMacroPath());//was not commented, but cannot not work anymore //gROOT->ProcessLine(".L TMVAGui.C"); TString outfileName( "rootfiles/TMVA/resultTMVA_"+sig+"VsTTbar2l.root" ); TString weightname( "weightsTMVA_"+sig+"VsTTbar2l" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( weightname, outputFile,"!V:!Silent:Color:DrawProgressBar"); vector<TString> histonames; histonames.clear(); map<string, float> value; //histonames.push_back("MT2W"); //histonames.push_back("MT2_lb_b"); //histonames.push_back("MT2_lb_bqq"); //histonames.push_back("MT2_lb_b_mass"); //histonames.push_back("MT2_lb_bqq_mass"); histonames.push_back("Mlb_lead_bdiscr"); //histonames.push_back("Mjjj"); //histonames.push_back("topness"); //histonames.push_back("topnessMod"); histonames.push_back("pfmet"); //histonames.push_back("ak4_HT"); //histonames.push_back("MET_over_sqrtHT"); //histonames.push_back("ak4_htratiom"); histonames.push_back("dR_lep_leadb"); //histonames.push_back("hadronic_top_chi2"); //histonames.push_back("ngoodbtags"); histonames.push_back("ngoodjets"); //histonames.push_back("mindphi_met_j1_j2"); //histonames.push_back("lep1_pt"); histonames.push_back("ak4pfjets_leadMEDbjet_p4_Pt"); for(unsigned int b = 0; b<histonames.size(); ++b){ factory->AddVariable(histonames[b], 'F' ); } TString signame = "/nfs-7/userdata/stopRun2/testMVA/"+sig+".root"; TString bkgname1 = "/nfs-7/userdata/stopRun2/testMVA/TTJets_DiLept_madgraph_25ns_1.root"; TString bkgname2 = "/nfs-7/userdata/stopRun2/testMVA/TTJets_DiLept_madgraph_25ns_2.root"; /* TString signame = "/hadoop/cms/store/user/haweber/forBDT/"+sig+".root"; TString bkgname1 = "/hadoop/cms/store/user/haweber/forBDT/TTJets_DiLept_madgraph_25ns_1.root"; TString bkgname2 = "/hadoop/cms/store/user/haweber/forBDT/TTJets_DiLept_madgraph_25ns_2.root"; */ cout << "signame " << signame << endl; TFile *inputSig = TFile::Open( signame ); TFile *inputBkg1 = TFile::Open( bkgname1 ); TFile *inputBkg2 = TFile::Open( bkgname2 ); TTree *signal = (TTree*)inputSig->Get("t"); TTree *background1 = (TTree*)inputBkg1->Get("t"); TTree *background2 = (TTree*)inputBkg2->Get("t"); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background1, backgroundWeight ); factory->AddBackgroundTree( background2, backgroundWeight ); //factory->SetBackgroundWeightExpression( "weight" ); // Apply additional cuts on the signal and background samples (can be different) //TCut mycuts = "MT2W>200&&mindphi_met_j1_j2>0.8"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; //TCut mycutb = "MT2W>200&&mindphi_met_j1_j2>0.8"; // for example: TCut mycutb = "abs(var1)<0.5"; //if(sig.Contains("T2tt_425_325")||sig.Contains("T2tt_500_325")){ mycuts = "mindphi_met_j1_j2>0.8"; mycutb = "mindphi_met_j1_j2>0.8"; } TCut mycuts = ""; TCut mycutb = ""; factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ); // Train MVAs using the set of training events cout << "Train methods" << endl; factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events cout << "Test methods" << endl; factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs cout << "Evaluate methods" << endl; factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros //if (!gROOT->IsBatch()) TMVA::TMVAGui( outfileName ); return 0; }
void TMVATrainer(){ // This loads the library TMVA::Tools::Instance(); // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName = "TMVATrainingResults_fat_BBvsGSP.root"; TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVATrainer", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory->AddVariable("TagVarCSV_vertexCategory","TagVarCSV_vertexCategory","units",'F'); factory->AddVariable("TagVarCSV_jetNTracks","TagVarCSV_jetNTracks","units",'F'); //factory->AddVariable("TagVarCSV_trackSip2dSig_0","TagVarCSV_trackSip2dSig_0","units",'F'); //factory->AddVariable("TagVarCSV_trackSip2dSig_1","TagVarCSV_trackSip2dSig_1","units",'F'); //factory->AddVariable("TagVarCSV_trackSip2dSig_2","TagVarCSV_trackSip2dSig_2","units",'F'); //factory->AddVariable("TagVarCSV_trackSip2dSig_3","TagVarCSV_trackSip2dSig_3","units",'F'); factory->AddVariable("TagVarCSV_trackSip3dSig_0","TagVarCSV_trackSip3dSig_0","units",'F'); factory->AddVariable("TagVarCSV_trackSip3dSig_1","TagVarCSV_trackSip3dSig_1","units",'F'); factory->AddVariable("TagVarCSV_trackSip3dSig_2","TagVarCSV_trackSip3dSig_2","units",'F'); factory->AddVariable("TagVarCSV_trackSip3dSig_3","TagVarCSV_trackSip3dSig_3","units",'F'); //factory->AddVariable("TagVarCSV_trackPtRel_0","TagVarCSV_trackPtRel_0","units",'F'); //factory->AddVariable("TagVarCSV_trackPtRel_1","TagVarCSV_trackPtRel_1","units",'F'); //factory->AddVariable("TagVarCSV_trackPtRel_2","TagVarCSV_trackPtRel_2","units",'F'); //factory->AddVariable("TagVarCSV_trackPtRel_3","TagVarCSV_trackPtRel_3","units",'F'); factory->AddVariable("TagVarCSV_trackSip2dSigAboveCharm","TagVarCSV_trackSip2dSigAboveCharm","units",'F'); //factory->AddVariable("TagVarCSV_trackSip3dSigAboveCharm","TagVarCSV_trackSip3dSigAboveCharm","units",'F'); //factory->AddVariable("TagVarCSV_trackSumJetEtRatio","TagVarCSV_trackSumJetEtRatio","units",'F'); //factory->AddVariable("TagVarCSV_trackSumJetDeltaR","TagVarCSV_trackSumJetDeltaR","units",'F'); factory->AddVariable("TagVarCSV_jetNTracksEtaRel","TagVarCSV_jetNTracksEtaRel","units",'F'); factory->AddVariable("TagVarCSV_trackEtaRel_0","TagVarCSV_trackEtaRel_0","units",'F'); factory->AddVariable("TagVarCSV_trackEtaRel_1","TagVarCSV_trackEtaRel_1","units",'F'); factory->AddVariable("TagVarCSV_trackEtaRel_2","TagVarCSV_trackEtaRel_2","units",'F'); factory->AddVariable("TagVarCSV_jetNSecondaryVertices","TagVarCSV_jetNSecondaryVertices","units",'F'); factory->AddVariable("TagVarCSV_vertexMass","TagVarCSV_vertexMass","units",'F'); factory->AddVariable("TagVarCSV_vertexNTracks","TagVarCSV_vertexNTracks","units",'F'); factory->AddVariable("TagVarCSV_vertexEnergyRatio","TagVarCSV_vertexEnergyRatio","units",'F'); factory->AddVariable("TagVarCSV_vertexJetDeltaR","TagVarCSV_vertexJetDeltaR","units",'F'); factory->AddVariable("TagVarCSV_flightDistance2dSig","TagVarCSV_flightDistance2dSig","units",'F'); //factory->AddVariable("TagVarCSV_flightDistance3dSig","TagVarCSV_flightDistance3dSig","units",'F'); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables factory->AddSpectator("Jet_pt","Jet_pt","units",'F'); factory->AddSpectator("Jet_eta","Jet_eta","units",'F'); factory->AddSpectator("Jet_phi","Jet_phi","units",'F'); factory->AddSpectator("Jet_mass","Jet_mass","units",'F'); factory->AddSpectator("Jet_massGroomed","Jet_massGroomed","units",'F'); factory->AddSpectator("Jet_flavour","Jet_flavour","units",'F'); factory->AddSpectator("Jet_nbHadrons","Jet_nbHadrons","units",'F'); factory->AddSpectator("Jet_JP","Jet_JP","units",'F'); factory->AddSpectator("Jet_JBP","Jet_JBP","units",'F'); factory->AddSpectator("Jet_CSV","Jet_CSV","units",'F'); factory->AddSpectator("Jet_CSVIVF","Jet_CSVIVF","units",'F'); factory->AddSpectator("Jet_tau1","Jet_tau1","units",'F'); factory->AddSpectator("Jet_tau2","Jet_tau2","units",'F'); factory->AddSpectator("SubJet1_CSVIVF","SubJet1_CSVIVF","units",'F'); factory->AddSpectator("SubJet2_CSVIVF","SubJet2_CSVIVF","units",'F'); // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) TString fnameSig = "RadionToHH_4b_M-800_TuneZ2star_8TeV-Madgraph_pythia6_JetTaggingVariables_training.root"; TString fnameBkg = "QCD_Pt-300to470_TuneZ2star_8TeV_pythia6_JetTaggingVariables_training.root"; TFile *inputSig = TFile::Open( fnameSig ); TFile *inputBkg = TFile::Open( fnameBkg ); std::cout << "--- TMVAClassification : Using input files: " << inputSig->GetName() << std::endl << inputBkg->GetName() << std::endl; // --- Register the training and test trees TTree *sigTree = (TTree*)inputSig->Get("tagVars/ttree"); TTree *bkgTree = (TTree*)inputBkg->Get("tagVars/ttree"); // // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // factory->SetInputTrees( tree,signalCut,backgroundCut ); factory->AddSignalTree ( sigTree, signalWeight ); factory->AddBackgroundTree( bkgTree, backgroundWeight ); // Apply additional cuts on the signal and background samples (can be different) TCut signalCut = "Jet_massGroomed>80 && Jet_massGroomed<150"; TCut backgroundCut = "abs(Jet_flavour)==5 && Jet_nbHadrons>1 && Jet_massGroomed>80 && Jet_massGroomed<150"; // Tell the factory how to use the training and testing events factory->PrepareTrainingAndTestTree( signalCut, backgroundCut, "nTrain_Signal=22000:nTest_Signal=20000:nTrain_Background=22000:nTest_Background=2730:SplitMode=Random:!V" ); // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG_T1000D3_fat_BBvsGSP", "!H:!V:NTrees=1000:MaxDepth=3:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ); //factory->BookMethod( TMVA::Types::kBDT, "BDTG_T1000D5_fat_BBvsGSP", // "!H:!V:NTrees=1000:MaxDepth=5:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20" ); // // Adaptive Boost // factory->BookMethod( TMVA::Types::kBDT, "BDT", // "!H:!V:NTrees=1000:MaxDepth=5:MinNodeSize=2.5%:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ); // // Bagging // factory->BookMethod( TMVA::Types::kBDT, "BDTB", // "!H:!V:NTrees=1000:MaxDepth=5:MinNodeSize=2.5%:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ); // // Decorrelation + Adaptive Boost // factory->BookMethod( TMVA::Types::kBDT, "BDTD", // "!H:!V:NTrees=1000:MaxDepth=5:MinNodeSize=2.5%:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ); // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVATraining_ch4( ) { TFile* outputFile = TFile::Open( "TMVA_ch4.root", "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "MVAnalysis", outputFile,"!V"); TFile *signal = TFile::Open("../production/BGx0/Prod2_iptubeK0/B0_etapr-eta-3pi2pi_KS-pi+pi-_output_signal_iptubeK0.root"); TFile *background = TFile::Open("../production/BGx0/Prod2_iptubeK0/B0_etapr-eta-3pi2pi_KS-pi+pi-_output_ccbar_iptubeK0.root"); factory->AddSignalTree ( (TTree*)signal->Get("B0"), 1.0 ); factory->AddBackgroundTree ( (TTree*)background->Get("B0"), 1.0 ); sigCut = TCut("B0__isContinuumEvent==0"); bgCut = TCut("B0__isContinuumEvent==1"); factory->AddVariable("B0_ThrustB",'F'); factory->AddVariable("B0_ThrustO",'F'); factory->AddVariable("B0_CosTBTO",'F'); factory->AddVariable("B0_CosTBz",'F'); factory->AddVariable("B0_R2",'F'); factory->AddVariable("B0_cc1",'F'); factory->AddVariable("B0_cc2",'F'); factory->AddVariable("B0_cc3",'F'); factory->AddVariable("B0_cc4",'F'); factory->AddVariable("B0_cc5",'F'); factory->AddVariable("B0_cc6",'F'); factory->AddVariable("B0_cc7",'F'); factory->AddVariable("B0_cc8",'F'); factory->AddVariable("B0_cc9",'F'); factory->AddVariable("B0_mm2",'F'); factory->AddVariable("B0_et",'F'); factory->AddVariable("B0_hso00",'F'); // factory->AddVariable("B0_hso01",'F'); factory->AddVariable("B0_hso02",'F'); //factory->AddVariable("B0_hso03",'F'); factory->AddVariable("B0_hso04",'F'); factory->AddVariable("B0_hso10",'F'); factory->AddVariable("B0_hso12",'F'); factory->AddVariable("B0_hso14",'F'); factory->AddVariable("B0_hso20",'F'); factory->AddVariable("B0_hso22",'F'); factory->AddVariable("B0_hso24",'F'); factory->AddVariable("B0_hoo0",'F'); factory->AddVariable("B0_hoo1",'F'); factory->AddVariable("B0_hoo2",'F'); factory->AddVariable("B0_hoo3",'F'); factory->AddVariable("B0_hoo4",'F'); factory->PrepareTrainingAndTestTree(sigCut, bgCut, "!V:nTrain_Signal=10000:nTest_Signal=10000:nTrain_Background=10000:nTest_Background=10000:SplitMode=Random:NormMode=NumEvents" ); //factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA"); //factory->BookMethod( TMVA::Types::kMLP, "MLP", "!V:NCycles=200:HiddenLayers=N+1,N:TestRate=5" ); factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ); factory->BookMethod( TMVA::Types::kSVM, "SVM", "!H:!V:Gamma=0.25:Tol=0.001:VarTransform=Norm" ); //factory->BookMethod( TMVA::Types::kBDT, "FastBDT", "!H:!V:CreateMVAPdfs:NbinsMVAPdf=40:NTrees=100:Shrinkage=0.10"); //:RandRatio=0.5:NCutLevel=8:NTreeLayers=3"); factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); outputFile->Close(); delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVA::TMVAGui( "TMVA_ch4.root" ); }
int main () { TFile * outputfile = TFile::Open ("outputTMVA.root","RECREATE"); TMVA::Factory * TMVAtest = new TMVA::Factory ("TMVAtest", outputfile, "S") ; //PG get the signal and deliver it to the TMVA factory TChain signalTree ("sample") ; signalTree.Add ("data/sig_0.root") ; std::cout << "READ " << signalTree.GetEntries () << " signal events\n" ; TMVAtest->AddSignalTree (&signalTree, 1) ; //PG get the bkg and deliver it to the TMVA factory TChain bkgTree ("sample") ; bkgTree.Add ("data/bkg_0.root") ; std::cout << "READ " << bkgTree.GetEntries () << " bkg events\n" ; TMVAtest->AddBackgroundTree (&bkgTree, 1) ; //PG get the training and test samples and deliver them to the TMVA factory TChain signalTrainTree ("sample") ; signalTrainTree.Add ("data/sig_1.root") ; std::cout << "READ " << signalTrainTree.GetEntries () << " signal train events\n" ; TChain bkgTrainTree ("sample") ; bkgTrainTree.Add ("data/bkg_1.root") ; std::cout << "READ " << bkgTrainTree.GetEntries () << " bkg train events\n" ; TMVAtest->SetInputTrees (signalTrainTree.GetTree (), bkgTrainTree.GetTree (), 1., 1.) ; //PG variables to be used for the selection //PG must be defined in the TTrees TMVAtest->AddVariable ("vars.x", 'F') ; TMVAtest->AddVariable ("vars.y" , 'F') ; int signalNumTrain = signalTrainTree.GetEntries () * 4 / 5 ; int bkgNumTrain = bkgTrainTree.GetEntries () * 4 / 5 ; int signalNumTest = signalTrainTree.GetEntries () - signalNumTrain ; int bkgNumTest = bkgTrainTree.GetEntries () - bkgNumTrain ; char trainOptions[120] ; sprintf (trainOptions,"NSigTrain=%d:NBkgTrain=%d:NSigTest=%d:NBkgTest=%d", signalNumTrain, bkgNumTrain, signalNumTest, bkgNumTest) ; sprintf (trainOptions,"NSigTrain=%d:NBkgTrain=%d:NSigTest=%d:NBkgTest=%d", 0,0,0,0) ; std::cout << "TRAINING CONFIGURATION : " << trainOptions << "\n" ; TMVAtest->PrepareTrainingAndTestTree ("",trainOptions) ; //PG prepare the classifier //PG cut-based, default params TMVAtest->BookMethod (TMVA::Types::kCuts, "Cuts") ; TMVAtest->TrainAllMethods () ; TMVAtest->TestAllMethods () ; TMVAtest->EvaluateAllMethods () ; delete TMVAtest ; delete outputfile ; }
int main(int argc, char * argv[]) { //Processing input options int c; std::string outFname; outFname = std::string("QualityNaF.root"); // Open input files, get the trees TChain *mc = InputFileReader("FileListNtuples_ext.txt","parametri_geo"); // Preparing options for the TMVA::Factory std::string options( "!V:" "!Silent:" "Color:" "DrawProgressBar:" "Transformations=I;D;P;G,D:" "AnalysisType=Classification" ); //Creating the factory TFile * ldFile = new TFile(outFname.c_str(),"RECREATE"); TMVA::Factory * factory = new TMVA::Factory("QualityNaF", ldFile, options.c_str()); //Preparing variables //general /*factory->AddVariable("Chisquare", 'F'); factory->AddVariable("Layernonusati", 'I'); factory->AddVariable("NTofUsed", 'I'); factory->AddVariable("diffR", 'F'); factory->AddVariable("TOF_Up_Down", 'F');*/ //Tof //factory->AddVariable("TOFchisq_s", 'F'); //factory->AddVariable("TOFchisq_t", 'F'); //RICH factory->AddVariable("Richtotused", 'F'); factory->AddVariable("RichPhEl", 'F'); factory->AddVariable("RICHprob", 'F'); factory->AddVariable("RICHcollovertotal"); factory->AddVariable("RICHLipBetaConsistency"); factory->AddVariable("RICHTOFBetaConsistency"); factory->AddVariable("RICHChargeConsistency"); factory->AddVariable("RICHPmts"); factory->AddVariable("RICHgetExpected"); factory->AddVariable("tot_hyp_p_uncorr"); factory->AddVariable("Bad_ClusteringRICH"); factory->AddVariable("NSecondariesRICHrich"); //factory->AddVariable("HitHValldir"); //factory->AddVariable("HitHVallrefl"); //factory->AddVariable("HVBranchCheck:= (HitHValldir - HitHVoutdir) - (HitHVallrefl - HitHVoutrefl)"); factory->AddVariable("HitHVoutdir"); factory->AddVariable("HitHVoutrefl"); //Spectator Variables factory->AddSpectator("R", 'F'); factory->AddSpectator("BetaRICH_new", 'F'); //Preselection cuts std::string PreSelection = "qL1>0&&(joinCutmask&187)==187&&qL1<1.75&&R>0"; std::string ChargeCut = "qUtof>0.8&&qUtof<1.3&&qLtof>0.8&&qLtof<1.3"; std::string VelocityCut = /*"Beta<0.8";*/"((joinCutmask>>11))==1024&&BetaRICH_new>0&&BetaRICH_new<0.975"; std::string signalCut = /*"(R/Beta)*(1-Beta^2)^0.5>1.65&&GenMass>1&&GenMass<2";*/"(R/BetaRICH_new)*(1-BetaRICH_new^2)^0.5>0.5&&(R/BetaRICH_new)*(1-BetaRICH_new^2)^0.5<1.5"; std::string bkgndCut = /*"(R/Beta)*(1-Beta^2)^0.5>1.65&&GenMass>0&&GenMass<1";*/"(R/BetaRICH_new)*(1-BetaRICH_new^2)^0.5>3"; factory->AddTree(mc,"Signal" ,1,(PreSelection +"&&"+ ChargeCut + "&&" + VelocityCut + "&&"+ signalCut).c_str()); factory->AddTree(mc,"Background",1,(PreSelection +"&&"+ ChargeCut + "&&" + VelocityCut + "&&"+ bkgndCut).c_str()); // Preparing std::string preselection = ""; std::string inputparams( "SplitMode=Random:" "NormMode=NumEvents:" "!V" ); factory->PrepareTrainingAndTestTree(preselection.c_str(),inputparams.c_str()); // Training std::string trainparams ="!H:!V:MaxDepth=3"; factory->BookMethod(TMVA::Types::kBDT, "BDT", trainparams.c_str()); trainparams ="!H:!V"; factory->BookMethod(TMVA::Types::kLikelihood, "Likelihood", trainparams.c_str()); trainparams ="!H:!V:VarTransform=Decorrelate"; //factory->BookMethod(TMVA::Types::kLikelihood, "LikelihoodD", trainparams.c_str()); trainparams ="!H:!V"; //factory->BookMethod(TMVA::Types::kCuts, "Cuts", trainparams.c_str()); factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); }
void TMVAClassificationCategory() { //--------------------------------------------------------------- std::cout << std::endl << "==> Start TMVAClassificationCategory" << std::endl; bool batchMode(false); // Create a new root output file. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string std::string factoryOptions( "!V:!Silent:Transformations=I;D;P;G,D" ); if (batchMode) factoryOptions += ":!Color:!DrawProgressBar"; TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationCategory", outputFile, factoryOptions ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory->AddVariable( "var1", 'F' ); factory->AddVariable( "var2", 'F' ); factory->AddVariable( "var3", 'F' ); factory->AddVariable( "var4", 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables factory->AddSpectator( "eta" ); // load the signal and background event samples from ROOT trees TFile *input(0); TString fname( "" ); if (UseOffsetMethod) fname = "../execs/data/toy_sigbkg_categ_offset.root"; else fname = "../execs/data/toy_sigbkg_categ_varoff.root"; if (!gSystem->AccessPathName( fname )) { // first we try to find tmva_example.root in the local directory std::cout << "--- TMVAClassificationCategory: Accessing " << fname << std::endl; input = TFile::Open( fname ); } if (!input) { std::cout << "ERROR: could not open data file: " << fname << std::endl; exit(1); } TTree *signal = (TTree*)input->Get("TreeS"); TTree *background = (TTree*)input->Get("TreeB"); /// global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; /// you can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // Fisher discriminant factory->BookMethod( TMVA::Types::kFisher, "Fisher", "!H:!V:Fisher" ); // Likelihood factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Categorised classifier TMVA::MethodCategory* mcat = 0; // the variable sets TString theCat1Vars = "var1:var2:var3:var4"; TString theCat2Vars = (UseOffsetMethod ? "var1:var2:var3:var4" : "var1:var2:var3"); // the Fisher TMVA::MethodBase* fiCat = factory->BookMethod( TMVA::Types::kCategory, "FisherCat","" ); mcat = dynamic_cast<TMVA::MethodCategory*>(fiCat); mcat->AddMethod("abs(eta)<=1.3",theCat1Vars, TMVA::Types::kFisher,"Category_Fisher_1","!H:!V:Fisher"); mcat->AddMethod("abs(eta)>1.3", theCat2Vars, TMVA::Types::kFisher,"Category_Fisher_2","!H:!V:Fisher"); // the Likelihood TMVA::MethodBase* liCat = factory->BookMethod( TMVA::Types::kCategory, "LikelihoodCat","" ); mcat = dynamic_cast<TMVA::MethodCategory*>(liCat); mcat->AddMethod("abs(eta)<=1.3",theCat1Vars, TMVA::Types::kLikelihood,"Category_Likelihood_1","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50"); mcat->AddMethod("abs(eta)>1.3", theCat2Vars, TMVA::Types::kLikelihood,"Category_Likelihood_2","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50"); // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassificationCategory is done!" << std::endl; // Clean up delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void WWTMVAClassification( TString myMethodList = "", double mH=400., int njets, TString chan="el" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) // this loads the library TMVA::Tools::Instance(); //--------------------------------------------------------------- // default MVA methods to be trained + tested std::map<std::string,int> Use; Use["Cuts"] = 0; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // --- Use["Likelihood"] = 1; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // --- Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDERSkNN"] = 0; // depreciated until further notice Use["PDEFoam"] = 0; // -- Use["KNN"] = 0; // --- Use["HMatrix"] = 0; Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; Use["LD"] = 1; // --- Use["FDA_GA"] = 0; Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // --- Use["MLP"] = 0; // this is the recommended ANN Use["MLPBFGS"] = 0; // recommended ANN with optional training method Use["CFMlpANN"] = 0; // *** missing Use["TMlpANN"] = 0; // --- Use["SVM"] = 0; // --- Use["BDT"] = 1; Use["BDTD"] = 0; Use["BDTG"] = 0; Use["BDTB"] = 0; // --- Use["RuleFit"] = 0; // --- Use["Plugin"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // Create a new root output file. ///////TString outfileName( "TMVA.root" ); char outfileName[192]; sprintf(outfileName,"TMVA_%3.0f_nJ%i_%s.root",mH,njets,chan.Data()); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string char classifierName[192]; sprintf(classifierName,"TMVAClassification_%3.0f_nJ%i_%s",mH,njets,chan.Data()); TMVA::Factory *factory = new TMVA::Factory( classifierName, outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" ); //TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, // "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // leptonic W factory->AddVariable("WWpt := ptlvjj", 'F'); factory->AddVariable("WWy := ylvjj", 'F'); //factory->AddVariable("Wpt := W_pt", 'F'); //factory->AddVariable("MET := event_met_pfmet", 'F'); if (chan = "mu"){ factory->AddVariable("LepCharge := W_muon_charge", 'F'); } else if (chan = "el"){ factory->AddVariable("LepCharge := W_electron_charge", 'F'); } else{ std::cout << "Invalid channel!" << std::endl; return; } // factory->AddVariable("J1QGL := JetPFCor_QGLikelihood[0]", 'F'); // factory->AddVariable("J2QGL := JetPFCor_QGLikelihood[1]", 'F'); factory->AddVariable("costheta1 := ang_ha", 'F'); factory->AddVariable("costheta2 := ang_hb", 'F'); factory->AddVariable("costhetaS := ang_hs", 'F'); factory->AddVariable("Phi := ang_phi", 'F'); factory->AddVariable("Phi2 := ang_phib", 'F'); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables factory->AddSpectator("run := event_runNo", "I"); factory->AddSpectator("lumi := event_lumi", "I"); factory->AddSpectator("event := event_evtNo", "I"); factory->AddSpectator("mjj := Mass2j_PFCor", "F"); factory->AddSpectator("mlvjj := MassV2j_PFCor", "F"); factory->AddSpectator("masslvjj := masslvjj", "F"); //factory->AddSpectator("ggdevt := ggdevt", "F"); //factory->AddSpectator("fit_mlvjj := fit_mlvjj", "F"); // read training and test data char signalOutputName[192]; sprintf(signalOutputName,"/uscms_data/d2/kalanand/WjjTrees/Full2011DataFall11MC/ReducedTree/RD_%s_HWWMH%3.0f_CMSSW428.root",chan.Data(),mH); TFile *input1 = TFile::Open( signalOutputName ); //TFile *input1 = TFile::Open( "/uscms_data/d2/kalanand/WjjTrees/Full2011DataFall11MC/ReducedTree/RD_mu_HWWMH400_CMSSW428.root"); char backgroundOutputName[192]; sprintf(backgroundOutputName,"/uscms_data/d2/kalanand/WjjTrees/Full2011DataFall11MC/ReducedTree/RD_%s_WpJ_CMSSW428.root",chan.Data()); TFile *input2 = TFile::Open( backgroundOutputName ); std::cout << "--- TMVAClassification : Using input file: " << input1->GetName() << std::endl; TTree *signal = (TTree*)input1->Get("WJet"); TTree *background = (TTree*)input2->Get("WJet"); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // ====== register trees ==================================================== // // the following method is the prefered one: // you can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); // To give different trees for training and testing, do as follows: // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // // // --- begin ---------------------------------------------------------- // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables // Float_t treevars[4]; // for (Int_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (Int_t i=0; i<signal->GetEntries(); i++) { // signal->GetEntry(i); // for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < signal->GetEntries()/2) factory->AddSignalTrainingEvent( vars, signalWeight ); // else factory->AddSignalTestEvent ( vars, signalWeight ); // } // // for (Int_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (Int_t i=0; i<background->GetEntries(); i++) { // background->GetEntry(i); // for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight ); // else factory->AddBackgroundTestEvent ( vars, backgroundWeight ); // } // // --- end ------------------------------------------------------------ // // ====== end of register trees ============================================== // This would set individual event weights (the variables defined in the // expression need to exist in the original TTree) // for signal : factory->SetSignalWeightExpression("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); // factory->SetBackgroundWeightExpression("weight"); // Apply additional cuts on the signal and background samples (can be different) // TCut mycuts = "abs(eta)>1.5"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; // TCut mycutb = "abs(eta)>1.5"; // for example: TCut mycutb = "abs(var1)<0.5"; char * mass4bodycut = ""; if(njets==2) { if(chan.Contains("mu")) { if(mH==170.) mass4bodycut = "(fit_mlvjj>176 && fit_mlvjj<262)"; // 2j170mu ===== if(mH==180.) mass4bodycut = "(fit_mlvjj>179 && fit_mlvjj<256)"; // 2j180mu if(mH==190.) mass4bodycut = "(fit_mlvjj>186 && fit_mlvjj<214)"; // 2j190mu if(mH==200.) mass4bodycut = "(fit_mlvjj>191 && fit_mlvjj<226)"; // 2j200mu if(mH==250.) mass4bodycut = "(fit_mlvjj>226 && fit_mlvjj<287)"; // 2j250mu if(mH==300.) mass4bodycut = "(fit_mlvjj>265 && fit_mlvjj<347)"; // 2j300mu if(mH==350.) mass4bodycut = "(fit_mlvjj>308 && fit_mlvjj<401)"; // 2j350mu if(mH==400.) mass4bodycut = "(fit_mlvjj>346 && fit_mlvjj<457)"; // 2j400mu if(mH==450.) mass4bodycut = "(fit_mlvjj>381 && fit_mlvjj<512)"; // 2j450mu if(mH==500.) mass4bodycut = "(fit_mlvjj>415 && fit_mlvjj<568)"; // 2j500mu if(mH==550.) mass4bodycut = "(fit_mlvjj>440 && fit_mlvjj<617)"; // 2j550mu if(mH==600.) mass4bodycut = "(fit_mlvjj>462 && fit_mlvjj<663)"; // 2j600mu } if(chan.Contains("el")) { if(mH==170.) mass4bodycut = "(fit_mlvjj>176 && fit_mlvjj<262)"; // 2j170el ===== if(mH==180.) mass4bodycut = "(fit_mlvjj>179 && fit_mlvjj<256)"; // 2j180el if(mH==190.) mass4bodycut = "(fit_mlvjj>186 && fit_mlvjj<214)"; // 2j190el if(mH==200.) mass4bodycut = "(fit_mlvjj>191 && fit_mlvjj<226)"; // 2j200el if(mH==250.) mass4bodycut = "(fit_mlvjj>226 && fit_mlvjj<287)"; // 2j250el if(mH==300.) mass4bodycut = "(fit_mlvjj>265 && fit_mlvjj<347)"; // 2j300el if(mH==350.) mass4bodycut = "(fit_mlvjj>308 && fit_mlvjj<401)"; // 2j350el if(mH==400.) mass4bodycut = "(fit_mlvjj>346 && fit_mlvjj<457)"; // 2j400el if(mH==450.) mass4bodycut = "(fit_mlvjj>381 && fit_mlvjj<512)"; // 2j450el if(mH==500.) mass4bodycut = "(fit_mlvjj>415 && fit_mlvjj<568)"; // 2j500el if(mH==550.) mass4bodycut = "(fit_mlvjj>440 && fit_mlvjj<617)"; // 2j550el if(mH==600.) mass4bodycut = "(fit_mlvjj>462 && fit_mlvjj<663)"; // 2j600el } } if(njets==3) { if(chan.Contains("mu")) { if(mH==170.) mass4bodycut = "(fit_mlvjj>150 && fit_mlvjj<271)"; // 3j170mu ===== if(mH==180.) mass4bodycut = "(fit_mlvjj>175 && fit_mlvjj<284)"; // 3j180mu if(mH==190.) mass4bodycut = "(fit_mlvjj>185 && fit_mlvjj<290)"; // 3j190mu if(mH==200.) mass4bodycut = "(fit_mlvjj>188 && fit_mlvjj<293)"; // 3j200mu if(mH==250.) mass4bodycut = "(fit_mlvjj>216 && fit_mlvjj<300)"; // 3j250mu if(mH==300.) mass4bodycut = "(fit_mlvjj>241 && fit_mlvjj<355)"; // 3j300mu if(mH==350.) mass4bodycut = "(fit_mlvjj>269 && fit_mlvjj<407)"; // 3j350mu if(mH==400.) mass4bodycut = "(fit_mlvjj>300 && fit_mlvjj<465)"; // 3j400mu if(mH==450.) mass4bodycut = "(fit_mlvjj>332 && fit_mlvjj<518)"; // 3j450mu if(mH==500.) mass4bodycut = "(fit_mlvjj>362 && fit_mlvjj<569)"; // 3j500mu if(mH==550.) mass4bodycut = "(fit_mlvjj>398 && fit_mlvjj<616)"; // 3j550mu if(mH==600.) mass4bodycut = "(fit_mlvjj>419 && fit_mlvjj<660)"; // 3j600mu } if(chan.Contains("el")) { if(mH==170.) mass4bodycut = "(fit_mlvjj>150 && fit_mlvjj<271)"; // 3j170el ===== if(mH==180.) mass4bodycut = "(fit_mlvjj>175 && fit_mlvjj<284)"; // 3j180el if(mH==190.) mass4bodycut = "(fit_mlvjj>185 && fit_mlvjj<290)"; // 3j190el if(mH==200.) mass4bodycut = "(fit_mlvjj>188 && fit_mlvjj<293)"; // 3j200el if(mH==250.) mass4bodycut = "(fit_mlvjj>216 && fit_mlvjj<300)"; // 3j250el if(mH==300.) mass4bodycut = "(fit_mlvjj>241 && fit_mlvjj<355)"; // 3j300el if(mH==350.) mass4bodycut = "(fit_mlvjj>269 && fit_mlvjj<407)"; // 3j350el if(mH==400.) mass4bodycut = "(fit_mlvjj>300 && fit_mlvjj<465)"; // 3j400el if(mH==450.) mass4bodycut = "(fit_mlvjj>332 && fit_mlvjj<518)"; // 3j450el if(mH==500.) mass4bodycut = "(fit_mlvjj>362 && fit_mlvjj<569)"; // 3j500el if(mH==550.) mass4bodycut = "(fit_mlvjj>398 && fit_mlvjj<616)"; // 3j550el if(mH==600.) mass4bodycut = "(fit_mlvjj>419 && fit_mlvjj<660)"; // 3j600el } } char mycutschar[1000]; sprintf(mycutschar,"ggdevt == %i &&(Mass2j_PFCor>65 && Mass2j_PFCor<95) && %s", njets, mass4bodycut); TCut mycuts (mycutschar); // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycuts, mycuts, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // If no numbers of events are given, half of the events in the tree are used for training, and // the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); // ---- Book MVA methods // // please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // test the decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // test the new kernel density estimator if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // test the mixed splines and kernel density estimator (depending on which variable) if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSkNN"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSkNN", "!H:!V:VolumeRangeMode=kNN:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:CutNmin=T:Nmin=100:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" ); // Fisher discriminant if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=60:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2"); // Linear discriminant (same as Fisher) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=N+5:TestRate=10:EpochMonitoring" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=N+5:TestRate=10:TrainingMethod=BFGS:!EpochMonitoring" ); // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // As an example how to use the ROOT plugin mechanism, book BDT via // plugin mechanism if (Use["Plugin"]) { // // first the plugin has to be defined, which can happen either through the following line in the local or global .rootrc: // // # plugin handler plugin name(regexp) class to be instanciated library constructor format // Plugin.TMVA@@MethodBase: ^BDT TMVA::MethodBDT TMVA.1 "MethodBDT(TString,TString,DataSet&,TString)" // // or by telling the global plugin manager directly gPluginMgr->AddHandler("TMVA@@MethodBase", "BDT", "TMVA::MethodBDT", "TMVA.1", "MethodBDT(TString,TString,DataSet&,TString)"); factory->BookMethod( TMVA::Types::kPlugins, "BDT", "!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=50" ); } // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
/********************************************************************************** * Project : TMVA - a ROOT-integrated toolkit for multivariate data analysis * * Package : TMVA * * Root Macro: TMVAClassification * * * * This macro provides examples for the training and testing of the * * TMVA classifiers. * * * * As input data is used a toy-MC sample consisting of four Gaussian-distributed * * and linearly correlated input variables. * * * * The methods to be used can be switched on and off by means of booleans, or * * via the prompt command, for example: * * * * root -l ./TMVAClassification.C\(\"Fisher,Likelihood\"\) * * * * (note that the backslashes are mandatory) * * If no method given, a default set of classifiers is used. * * * * The output file "TMVA.root" can be analysed with the use of dedicated * * macros (simply say: root -l <macro.C>), which can be conveniently * * invoked through a GUI that will appear at the end of the run of this macro. * * Launch the GUI via the command: * * * * root -l ./TMVAGui.C * * * **********************************************************************************/ void TMVAClassification( TString myMethodList = "") { TTree *signal = (TTree *)gDirectory->Get("VertexG"); if (! signal) { std::cout << "No signal TTree" << std::endl; return;} TTree *background = (TTree *)gDirectory->Get("VertexB"); if (! background) { std::cout << "No background TTree" << std::endl; return;} //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string, int> Use; // --- Cut optimisation Use["Cuts"] = 1; Use["CutsD"] = 1; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 1; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 1; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 1; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 1; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 1; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 1; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 1; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 1; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 1; // // --- Boosted Decision Trees Use["BDT"] = 1; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting Use["myBDTD"] = 1; // mine // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 1; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string, int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (size_t i = 0; i < mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string, int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile *outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // load the signal and background event samples from ROOT trees std::cout << " starts ... " << std::endl; // global event weights per tree (see below for setting event-wise weights) // Float_t w; double signalWeight = 1.0; double backgroundWeight = 1.0; std::cout << " signalWeight = " << signalWeight << " backWeight = " << backgroundWeight << std::endl; factory->AddSignalTree( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); TString separator(":"); TString Vnames(vnames); TObjArray *array = Vnames.Tokenize(separator); std::vector<std::string> inputVars; TIter next(array); TObjString *objs; while ((objs = (TObjString *) next())) { // std::cout << objs->GetString() << std::endl; TString name(objs->GetString()); if (name == "BEMC") continue; if (name == "noBEMC") continue; factory->AddVariable(name, 'F'); } // This would set individual event weights (the variables defined in the // expression need to exist in the original TTree) // for signal : factory->SetSignalWeightExpression("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); // commented JB : 04/26 ?? //factory->dSetBackgroundWeightExpression("weight"); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; TCut mycutb = ""; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: //factory->PrepareTrainingAndTestTree( mycuts,mycutb,"NSigTrain=9000:NBkgTrain=50000:NSigTest=9000:NBkgTest=50000:SplitMode=Random:!V" ); factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=4900:nTrain_Background=49000:nTest_Signal=4900:nTest_Background=49000:SplitMode=Random:!V"); // for KFVertex // factory->PrepareTrainingAndTestTree( mycuts, mycutb,"nTrain_Signal=20000:nTrain_Background=40000:nTest_Signal=20000:nTest_Background=40000:SplitMode=Random:!V"); // for PPV // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); if (Use["myBDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTDTEST", "!H:!V:NTrees=1000:nEventsMin=400:MaxDepth=6:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:nEventsMin=150:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // TMVA::IMethod* category = factory->BookMethod( TMVA::Types::kCategory,"Category","" ); // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events #if 0 factory->OptimizeAllMethods("SigEffAt001", "Scan"); factory->OptimizeAllMethods("ROCIntegral", "GA"); #endif // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; }
void TMVAClassificationCategory() { //--------------------------------------------------------------- // Example for usage of different event categories with classifiers std::cout << std::endl << "==> Start TMVAClassificationCategory" << std::endl; bool batchMode = false; // Create a new root output file. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object (see TMVAClassification.C for more information) std::string factoryOptions( "!V:!Silent:Transformations=I;D;P;G,D" ); if (batchMode) factoryOptions += ":!Color:!DrawProgressBar"; TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationCategory", outputFile, factoryOptions ); // Define the input variables used for the MVA training factory->AddVariable( "var1", 'F' ); factory->AddVariable( "var2", 'F' ); factory->AddVariable( "var3", 'F' ); factory->AddVariable( "var4", 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables factory->AddSpectator( "eta" ); // Load the signal and background event samples from ROOT trees TFile *input(0); TString fname( "" ); if (UseOffsetMethod) fname = "data/toy_sigbkg_categ_offset.root"; else fname = "data/toy_sigbkg_categ_varoff.root"; if (!gSystem->AccessPathName( fname )) { // first we try to find tmva_example.root in the local directory std::cout << "--- TMVAClassificationCategory: Accessing " << fname << std::endl; input = TFile::Open( fname ); } if (!input) { std::cout << "ERROR: could not open data file: " << fname << std::endl; exit(1); } TTree *signal = (TTree*)input->Get("TreeS"); TTree *background = (TTree*)input->Get("TreeB"); /// Global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; /// You can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the factory how to use the training and testing events factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // Fisher discriminant factory->BookMethod( TMVA::Types::kFisher, "Fisher", "!H:!V:Fisher" ); // Likelihood factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // --- Categorised classifier TMVA::MethodCategory* mcat = 0; // The variable sets TString theCat1Vars = "var1:var2:var3:var4"; TString theCat2Vars = (UseOffsetMethod ? "var1:var2:var3:var4" : "var1:var2:var3"); // Fisher with categories TMVA::MethodBase* fiCat = factory->BookMethod( TMVA::Types::kCategory, "FisherCat","" ); mcat = dynamic_cast<TMVA::MethodCategory*>(fiCat); mcat->AddMethod( "abs(eta)<=1.3", theCat1Vars, TMVA::Types::kFisher, "Category_Fisher_1","!H:!V:Fisher" ); mcat->AddMethod( "abs(eta)>1.3", theCat2Vars, TMVA::Types::kFisher, "Category_Fisher_2","!H:!V:Fisher" ); // Likelihood with categories TMVA::MethodBase* liCat = factory->BookMethod( TMVA::Types::kCategory, "LikelihoodCat","" ); mcat = dynamic_cast<TMVA::MethodCategory*>(liCat); mcat->AddMethod( "abs(eta)<=1.3",theCat1Vars, TMVA::Types::kLikelihood, "Category_Likelihood_1","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); mcat->AddMethod( "abs(eta)>1.3", theCat2Vars, TMVA::Types::kLikelihood, "Category_Likelihood_2","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassificationCategory is done!" << std::endl; // Clean up delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVAClassificationHwwNtuple( TString myMethodList = "" ) { // This loads the library TMVA::Tools::Instance(); gROOT->ProcessLine(".L TMVAGui.C"); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 1; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // Use["BDT"] = 1; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; cout<<regMethod<<" is on"<<endl; } } // ------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // For one variable //TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, // "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification" ); // For Multiple Variables TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); //factory->AddVariable( "pt1", "LeadLepton pt", "", 'F' ); //factory->AddVariable( "pt2", "TailLepton pt", "", 'F' ); factory->AddVariable( "pfmet", "MissingEt", "", 'F' ); factory->AddVariable( "mpmet", "Minimum Proj. Met", "", 'F' ); factory->AddVariable( "dphill", "DeltPhiOfLepLep", "", 'F' ); //factory->AddVariable( "mll", "DiLepton Mass", "", 'F' ); factory->AddVariable( "ptll", "DiLepton pt", "", 'F' ); // // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables //factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); //factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // //factory->AddSpectator( "mWW", "Higgs Mass", "", 'F' ); factory->AddSpectator( "pt1", "LeadLepton pt", "", 'F' ); factory->AddSpectator( "pt2", "TailLepton pt", "", 'F' ); factory->AddSpectator( "pfmet", "MissingEt", "", 'F' ); factory->AddSpectator( "mpmet", "Minimum Proj. Met", "", 'F' ); factory->AddSpectator( "dphill", "DeltPhiOfLepLep", "", 'F' ); factory->AddSpectator( "mll", "DiLepton Mass", "", 'F' ); factory->AddSpectator( "ptll", "DiLepton pt", "", 'F' ); // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) //TString fname = "./tmva_class_example.root"; //TString fname = "/afs/cern.ch/work/s/salee/private/HWWwidth/HWW/GGVvAnalyzer/MkNtuple/Hw1Int8TeV/MkNtuple.root"; //TString fname = "/terranova_0/HWWwidth/HWW/GGVvAnalyzer/MkNtuple/Hw1Int8TeV/MkNtuple.root"; //if (gSystem->AccessPathName( fname )) // file does not exist in local directory // exit(-1); //gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root"); //TFile *input = TFile::Open( fname ); //TFile *SB_OnPeak = TFile::Open("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntOnPeak_8TeV.root"); //TTree *SB_OnPeak_Tree = (TTree*)SB_OnPeak->Get("latino"); TChain *S_Chain = new TChain("latino"); TChain *C_Chain = new TChain("latino"); TChain *SCI_Chain = new TChain("latino"); TChain *qqWW_Chain = new TChain("latino"); S_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_SigOnPeak_8TeV.root"); S_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_SigShoulder_8TeV.root"); S_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_SigTail_8TeV.root"); SCI_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntOnPeak_8TeV.root"); SCI_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntShoulder_8TeV.root"); SCI_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntTail_8TeV.root"); C_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw25_CotHead_8TeV.root"); C_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw25_CotTail_8TeV.root"); qqWW_Chain->Add("/afs/cern.ch/user/m/maiko/work/public/Tree/tree_skim_wwmin/nominals/latino_000_WWJets2LMad.root"); // --- Register the training and test trees // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( S_Chain ); factory->AddBackgroundTree( qqWW_Chain ); factory->AddBackgroundTree( C_Chain ); // Classification training and test data in ROOT tree format with signal and background events being located in the same tree //factory->SetInputTrees(SCI_Chain, GenOffCut, GenOnCut); // To give different trees for training and testing, do as follows: // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); factory->SetWeightExpression ("2.1*puW*baseW*effW*triggW*19.468"); //factory->SetSignalWeightExpression ("2.1*puW*baseW*effW*triggW*19.468"); //factory->SetBackgroundWeightExpression("puW*baseW*effW*triggW*19.468"); //factory->PrepareTrainingAndTestTree( ChanCommOff, // "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=None:!V" ); //"nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"; factory->PrepareTrainingAndTestTree( ChanCommOff0J, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=None:!V" ); // ---- Book MVA methods // // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:V:NTrees=850:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); //"!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // ----------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // ---- STILL EXPERIMENTAL and only implemented for BDT's ! // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","FitGA"); // ----------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros //if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
int main(int argc, char* argv[]){ // Configurable parameters // int max_events; // Maximum number of events to process //string filelist; // The file containing a list of files to use as input //string input_prefix; // A prefix that will be added to the path of each input file string folder; string output_name; // Name of the ouput ROOT File string output_folder; // Folder to write the output in string paramfile; string paramfile2; string classname; bool twotag; bool onetag; po::options_description config("Configuration"); po::variables_map vm; po::notify(vm); config.add_options() ("folder", po::value<string>(&folder)->default_value("output/Paper_2012/")) // ("input_prefix", po::value<string>(&input_prefix)->default_value("")) ("output_name", po::value<string>(&output_name)->default_value("test_tmva.root")) ("output_folder", po::value<string>(&output_folder)->default_value("")) ("paramfile", po::value<string>(¶mfile)->default_value("./scripts/Paper_params_2012.dat")) ("paramfile2", po::value<string>(¶mfile2)->default_value("./scripts/TMVAinputshad.dat")) ("classname", po::value<string>(&classname)->default_value("HhhMVA")) ("twotag", po::value<bool>(&twotag)->default_value(true)) ("onetag", po::value<bool>(&onetag)->default_value(false)) ; po::store(po::command_line_parser(argc, argv). options(config).allow_unregistered().run(), vm); po::notify(vm); std::cout << "-------------------------------------" << std::endl; std::cout << "Train MVA" << std::endl; std::cout << "-------------------------------------" << std::endl; string param_fmt = "%-25s %-40s\n"; std::vector<string> bckglist; bckglist.push_back("TTJetsFullLept"); bckglist.push_back("TTJetsSemiLept"); bckglist.push_back("TTJetsHadronicExt"); // bckglist.push_back("WWJetsTo2L2Nu"); // bckglist.push_back("WZJetsTo2L2Q"); // bckglist.push_back("WZJetsTo3LNu"); // bckglist.push_back("ZZJetsTo2L2Nu"); // bckglist.push_back("ZZJetsTo2L2Q"); // bckglist.push_back("ZZJetsTo4L"); // bckglist.push_back("DYJetsToTauTauSoup"); // bckglist.push_back("DYJetsToLLSoup"); // bckglist.push_back("DYJetsToTauTau"); // bckglist.push_back("DYJetsToLL"); // bckglist.push_back("T-tW"); // bckglist.push_back("Tbar-tW"); std::vector<string> signallist; signallist.push_back("GluGluToHTohhTo2Tau2B_mH-300"); sample_names_.reserve(bckglist.size()+signallist.size()); sample_names_.insert(sample_names_.end(),bckglist.begin(),bckglist.end()); sample_names_.insert(sample_names_.end(),signallist.begin(),signallist.end()); std::vector<TFile*> BackgroundSamples; for(unsigned int iter=0;iter<bckglist.size();++iter){ BackgroundSamples.push_back(TFile::Open((folder+bckglist.at(iter)+"_mt_2012.root").c_str())); } std::vector<TFile*> SignalSamples; for(unsigned int sigIter=0;sigIter<signallist.size();++sigIter){ SignalSamples.push_back(TFile::Open((folder+signallist.at(sigIter)+"_mt_2012.root").c_str())); } std::vector<TTree*> backgroundTrees; for(unsigned int iter2=0;iter2<BackgroundSamples.size();++iter2){ backgroundTrees.push_back(dynamic_cast<TTree*>(BackgroundSamples.at(iter2)->Get("ntuple"))); } std::vector<TTree*> signalTrees; for(unsigned int sigIter2=0;sigIter2<SignalSamples.size();++sigIter2){ signalTrees.push_back(dynamic_cast<TTree*>(SignalSamples.at(sigIter2)->Get("ntuple"))); } TFile *outfile = new TFile((output_folder+output_name).c_str(),"RECREATE"); TMVA::Factory *factory = new TMVA::Factory(classname,outfile,"!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification"); std::vector<std::string> vars; std::ifstream parafile(paramfile2.c_str()); std::cout<<paramfile2.c_str()<<std::endl; string line; while(getline(parafile,line)){ vars.push_back(line); } parafile.close(); std::cout<<(vars.at(0)).c_str()<<std::endl; std::vector<float> var2; for(unsigned int variter=0;variter<vars.size();++variter){ var2.push_back(::atof((vars.at(variter)).c_str())); } for(unsigned int variter=0;variter<vars.size();++variter){ factory->AddVariable((vars.at(variter)).c_str(),(vars.at(variter)).c_str(),"",'F'); } factory->AddSpectator("mt_1","mt_1","",'F'); factory->AddSpectator("n_prebjets","n_prebjets","",'I'); factory->AddSpectator("prebjetbcsv_1","prebjetbcsv_1","",'F'); factory->AddSpectator("prebjetbcsv_2","prebjetbcsv_2","",'F'); double weightval_=0; ParseParamFile(paramfile); for(unsigned int bckgit=0;bckgit<backgroundTrees.size();++bckgit){ auto it = sample_info_.find(bckglist.at(bckgit).c_str()); if(it!=sample_info_.end()){ double evt = it->second.first; double xs = it->second.second; weightval_=(double) xs/evt; std::cout<<weightval_<<std::endl; } factory->AddBackgroundTree(backgroundTrees.at(bckgit),weightval_); } for(unsigned int sgit=0;sgit<signalTrees.size();++sgit){ auto it = sample_info_.find(signallist.at(sgit).c_str()); if(it!=sample_info_.end()){ double evt = it->second.first; double xs=it->second.second; weightval_=(Double_t) xs/evt; } std::cout<<weightval_<<std::endl; factory->AddSignalTree(signalTrees.at(sgit),weightval_); } factory->SetBackgroundWeightExpression("wt"); factory->SetSignalWeightExpression("wt"); TCut mycutb, mycuts; if(twotag){ mycutb="n_prebjets>1&&mt_1<30&&prebjetbcsv_1>0.679&&prebjetbcsv_2>0.679"; mycuts="n_prebjets>1&&mt_1<30&&prebjetbcsv_1>0.679&&prebjetbcsv_2>0.679"; } else if(onetag){ mycutb="n_prebjets>1&&mt_1<30&&prebjetbcsv_1>0.679&&prebjetbcsv_2<0.679"; mycuts="n_prebjets>1&&mt_1<30&&prebjetbcsv_1>0.679&&prebjetbcsv_2<0.679"; } else{ mycutb="n_prebjets>1&&mt_1<30"; mycuts="n_prebjets>1&&mt_1<30"; } //TCut mycutb=""; //TCut mycuts=""; factory->PrepareTrainingAndTestTree( mycuts, mycutb,"SplitMode=Random:!V"); factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); outfile->Close(); delete factory; return 0; }
void TMVAClassify_SepSSFromOS( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // to get access to the GUI and all tmva macros TString tmva_dir(TString(gRootDir) + "/tmva"); if(gSystem->Getenv("TMVASYS")) tmva_dir = TString(gSystem->Getenv("TMVASYS")); gROOT->SetMacroPath(tmva_dir + "/test/:" + gROOT->GetMacroPath() ); gROOT->ProcessLine(".L TMVAGui.C"); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 0; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 0; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 0; // uses Adaptive Boost Use["BDTG"] = 1; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "../../classifiers/D02KPi/TMVA/TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory->AddVariable( "track_angletod", "Angle to D0", "", 'F' ); factory->AddVariable( "track_angletochild1", "Angle to kaon from D0", "", 'F' ); factory->AddVariable( "track_angletochild2", "Angle to pion from D0", "", 'F' ); factory->AddVariable( "log(track_devdist)", "log(DOCA to D0 vertex)", "", 'F' ); //factory->AddVariable( "log(track_docatochild1)", "log(DOCA to kaon from D0)", "", 'F' ); //factory->AddVariable( "log(track_docatochild2)", "log(DOCA to pion from D0)", "", 'F' ); //factory->AddVariable( "log(track_docatod)", "Distance to D0 trajectory", "", 'F' ); factory->AddVariable( "track_ptratiod", "Ratio track PT to D0 PT ", "", 'F' ); factory->AddVariable( "track_ptratiochild1", "Ratio track PT to kaon PT", "", 'F' ); factory->AddVariable( "track_ptratiochild2", "Distance track PT to pion PT", "", 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables //factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); //factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) TFile *input_ss = TFile::Open( "../../data/mcd02kpi_ss_forsepssfromos.root" ); TFile *input_os = TFile::Open( "../../data/mcd02kpi_os_forsepssfromos.root" ); // --- Register the training and test trees TTree *intree_ss = (TTree*)input_ss->Get("DecayTree"); TTree *intree_os = (TTree*)input_os->Get("DecayTree"); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; factory->AddSignalTree ( intree_os, signalWeight ); factory->AddBackgroundTree( intree_ss, backgroundWeight ); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the factory how to use the training and testing events factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=80000:nTrain_Background=80000:nTest_Signal=80000:nTest_Background=80000:SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=N+2:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=4000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=3" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // ---- STILL EXPERIMENTAL and only implemented for BDT's ! // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","FitGA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVAClassification_cuts( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 1; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 0; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 0; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] //factory->AddVariable( "myvar1 := var1+var2", 'F' ); //factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ); factory->AddVariable( "CSV1", "CSV 1", "", 'F' ); factory->AddVariable( "CSV2", "CSV 2", "", 'F' ); factory->AddVariable( "nJets", "nJets", "", 'I' ); // factory->AddVariable( "Zmass", "Zmass", "", 'F' ); // factory->AddVariable( "Hmass", "Hmass", "", 'F' ); factory->AddVariable( "DeltaPhiHV", "Deltaphi HB", "", 'F' ); factory->AddVariable( "Hpt", "p_{T} Higgs", "", 'F' ); factory->AddVariable( "Zpt", "p_{T} Z Boson", "", 'F' ); factory->AddVariable( "mu1pt", "#mu1 p_{T}", "", 'F' ); factory->AddVariable( "Ht", "H_{T}", "", 'F' ); factory->AddVariable( "EtaStandDev", "Standard Deviation #eta", "", 'F' ); factory->AddVariable( "UnweightedEta", "unweighted #eta", "", 'F' ); factory->AddVariable( "EvntShpCircularity", "Circularity", "", 'F' ); factory->AddVariable( "alpha_j", "A-P #alpha jets", "", 'F' ); factory->AddVariable( "qtb1", "q_{T} b1", "", 'F' ); /// factory->AddVariable( "nSV", "nSV", "", 'I' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables // factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); // factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) TString ZZname = "ZZ.root"; TString DYname = "DY.root"; TString WZname = "WZ.root"; TString TTJetsname = "TTJets.root"; TString sname = "H115.root"; //if (gSystem->AccessPathName( fname )) // file does not exist in local directory // gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root"); TFile *H115_file = TFile::Open( sname ); TFile *ZZ_file = TFile::Open( ZZname ); TFile *DY_file = TFile::Open( DYname ); TFile *WZ_file = TFile::Open( WZname ); TFile *TTJets_file = TFile::Open( TTJetsname ); std::cout << "--- TMVAClassification : Using input file: " << H115_file->GetName() << std::endl; // --- Register the training and test trees TTree *signal = (TTree*)H115_file->Get("TMVA_tree"); TTree *ZZ_tree = (TTree*)ZZ_file->Get("TMVA_tree"); TTree *DY_tree = (TTree*)DY_file->Get("TMVA_tree"); TTree *WZ_tree = (TTree*)WZ_file->Get("TMVA_tree"); TTree *TTJets_tree = (TTree*)TTJets_file->Get("TMVA_tree"); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 0.4107*0.704*0.101*10000/219999.0; Double_t ZZ_weight = 7.41*10000/4157882.0; Double_t DY_weight = 3151.864553*10000/ 36217940.0; Double_t WZ_weight = 18.2*10000/ 18.2*10000/4145240.0; Double_t TTJets_weight = 157.5*10000/ 3611944.0; // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight); factory->AddBackgroundTree( ZZ_tree, ZZ_weight ); factory->AddBackgroundTree( DY_tree, DY_weight ); factory->AddBackgroundTree( WZ_tree, WZ_weight ); factory->AddBackgroundTree( TTJets_tree, TTJets_weight ); // To give different trees for training and testing, do as follows: // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // // // --- begin ---------------------------------------------------------- // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables // Float_t treevars[4], weight; // // // Signal // for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<signal->GetEntries(); i++) { // signal->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight ); // else factory->AddSignalTestEvent ( vars, signalWeight ); // } // // // Background (has event weights) // background->SetBranchAddress( "weight", &weight ); // for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<background->GetEntries(); i++) { // background->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight ); // else factory->AddBackgroundTestEvent ( vars, backgroundWeight*weight ); // } // --- end ------------------------------------------------------------ // // --- end of tree registration // Set individual event weights (the variables must exist in the original TTree) // for signal : factory->SetSignalWeightExpression ("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); //factory->SetBackgroundWeightExpression( "weight" ); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = "CSV1 > -1 && CSV2 > -1 && Zmass > 70 && Zmass < 110"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut nocut = "CSV1 > -1"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; //TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: factory->PrepareTrainingAndTestTree(nocut, "SplitMode=random:!V"); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); //factory->PrepareTrainingAndTestTree( mycuts, mycutb, // "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=GA:EffSel:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:nEventsMin=150:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","GA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVAClassificationElecTau(std::string ordering_ = "Pt", std::string bkg_ = "qqH115vsWZttQCD") { TMVA::Tools::Instance(); TString outfileName( "TMVAElecTau"+ordering_+"Ord_"+bkg_+".root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationElecTau"+ordering_+"Ord_"+bkg_, outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" ); factory->AddVariable( "pt1", "pT-tag1", "GeV/c" , 'F' ); factory->AddVariable( "pt2", "pT-tag2", "GeV/c" , 'F' ); factory->AddVariable( "Deta","|y-tag1 - y-tag2|","" , 'F' ); //factory->AddVariable( "opposite:=abs(eta1*eta2)/eta1/eta2","sign1*sign2","" , 'F' ); //factory->AddVariable( "Dphi", "#Delta#phi" ,"" , 'F' ); factory->AddVariable( "Mjj", "M(tag1,tag2)", "GeV/c^{2}" , 'F' ); factory->AddSpectator( "eta1", "#eta_{tag1}" , 'F' ); factory->AddSpectator( "eta2", "#eta_{tag2}" , 'F' ); factory->SetWeightExpression( "sampleWeight" ); TString fSignalName = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleVBFH115-powheg-PUS1_Open_ElecTauStream.root"; TString fBackgroundNameDYJets = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleZjets-alpgen-PUS1_Open_ElecTauStream.root"; TString fBackgroundNameWJets = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleWJets-madgraph-PUS1_Open_ElecTauStream.root"; TString fBackgroundNameQCD = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleQCD_Open_ElecTauStream.root"; TString fBackgroundNameTTbar = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleTTJets-madgraph-PUS1_Open_ElecTauStream.root"; TFile *fSignal(0); TFile *fBackgroundDYJets(0); TFile *fBackgroundWJets(0); TFile *fBackgroundQCD(0); TFile *fBackgroundTTbar(0); fSignal = TFile::Open( fSignalName ); fBackgroundDYJets = TFile::Open( fBackgroundNameDYJets ); fBackgroundWJets = TFile::Open( fBackgroundNameWJets ); fBackgroundQCD = TFile::Open( fBackgroundNameQCD ); fBackgroundTTbar = TFile::Open( fBackgroundNameTTbar ); if(!fSignal || !fBackgroundDYJets || !fBackgroundWJets || !fBackgroundQCD || !fBackgroundTTbar) { std::cout << "ERROR: could not open files" << std::endl; exit(1); } TString tree = "outTree"+ordering_+"Ord"; TCut mycuts = ""; TCut mycutb = ""; TCut cutA = "pt1>0 && tightestHPSWP>0"; TCut cutB = "pt1>0 && combRelIsoLeg1<0.1"; TCut cutBl = "pt1>0 && combRelIsoLeg1<0.3"; TCut cutC = "pt1>0 && diTauCharge==0"; TCut cutD = "pt1>0 && MtLeg1<40"; // select events for training TFile* dummy = new TFile("dummy.root","RECREATE"); TH1F* allEvents = new TH1F("allEvents","",1,-10,10); float totalEvents, cutEvents; // signal: all TTree *signal = ((TTree*)(fSignal->Get(tree)))->CopyTree(cutA&&cutB&&cutC&&cutD); cout << "Copied signal tree with full selection: " << ((TTree*)(fSignal->Get(tree)))->GetEntries() << " --> " << signal->GetEntries() << endl; allEvents->Reset(); signal->Draw("eta1>>allEvents","sampleWeight"); cutEvents = allEvents->Integral(); Double_t signalWeight = 1.0; cout << "Signal: expected yield " << cutEvents << " -- weight " << signalWeight << endl; // Z+jets: all TTree *backgroundDYJets = ((TTree*)(fBackgroundDYJets->Get(tree)))->CopyTree(cutA&&cutB&&cutC&&cutD); cout << "Copied DYJets tree with full selection: " << ((TTree*)(fBackgroundDYJets->Get(tree)))->GetEntries() << " --> " << backgroundDYJets->GetEntries() << endl; allEvents->Reset(); backgroundDYJets->Draw("eta1>>allEvents","sampleWeight"); cutEvents = allEvents->Integral(); Double_t backgroundDYJetsWeight = 1.0; cout << "ZJets: expected yield " << cutEvents << " -- weight " << backgroundDYJetsWeight << endl; // W+jets: iso+Mt TTree *backgroundWJets = ((TTree*)(fBackgroundWJets->Get(tree)))->CopyTree(cutB&&cutD); cout << "Copied WJets tree with iso+Mt selection: " << ((TTree*)(fBackgroundWJets->Get(tree)))->GetEntries() << " --> " << backgroundWJets->GetEntries() << endl; allEvents->Reset(); backgroundWJets->Draw("eta1>>allEvents","sampleWeight"); totalEvents = allEvents->Integral(); allEvents->Reset(); backgroundWJets->Draw("eta1>>allEvents","sampleWeight*(tightestHPSWP>0 && diTauCharge==0)"); cutEvents = allEvents->Integral(); Double_t backgroundWJetsWeight = cutEvents / totalEvents; cout << "WJets: expected yield " << cutEvents << " -- weight " << backgroundWJetsWeight << endl; // QCD: Mt+loose iso TTree *backgroundQCD = ((TTree*)(fBackgroundQCD->Get(tree)))->CopyTree(cutD&&cutBl); cout << "Copied QCD tree with Mt selection: " << ((TTree*)(fBackgroundQCD->Get(tree)))->GetEntries() << " --> " << backgroundQCD->GetEntries() << endl; allEvents->Reset(); backgroundQCD->Draw("eta1>>allEvents","sampleWeight"); totalEvents = allEvents->Integral(); allEvents->Reset(); backgroundQCD->Draw("eta1>>allEvents","sampleWeight*(tightestHPSWP>0 && diTauCharge==0 && combRelIsoLeg1<0.1)"); cutEvents = allEvents->Integral(); Double_t backgroundQCDWeight = cutEvents / totalEvents; cout << "QCD: expected yield " << cutEvents << " -- weight " << backgroundQCDWeight << endl; // TTbar: iso+Mt TTree *backgroundTTbar = ((TTree*)(fBackgroundTTbar->Get(tree)))->CopyTree(cutB&&cutD); cout << "Copied TTbar tree with iso+Mt selection: " << ((TTree*)(fBackgroundTTbar->Get(tree)))->GetEntries() << " --> " << backgroundTTbar->GetEntries() << endl; allEvents->Reset(); backgroundTTbar->Draw("eta1>>allEvents","sampleWeight"); totalEvents = allEvents->Integral(); allEvents->Reset(); backgroundTTbar->Draw("eta1>>allEvents","sampleWeight*(tightestHPSWP>0 && diTauCharge==0)"); cutEvents = allEvents->Integral(); Double_t backgroundTTbarWeight = cutEvents / totalEvents; cout << "TTbar: expected yield " << cutEvents << " -- weight " << backgroundTTbarWeight << endl; delete allEvents; factory->AddSignalTree ( signal, signalWeight ); //factory->AddBackgroundTree( backgroundDYJets, backgroundDYJetsWeight ); //factory->AddBackgroundTree( backgroundWJets, backgroundWJetsWeight ); factory->AddBackgroundTree( backgroundQCD, backgroundQCDWeight ); //factory->AddBackgroundTree( backgroundTTbar, backgroundTTbarWeight ); factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=1:nTest_Background=1:SplitMode=Random:NormMode=NumEvents:!V" ); factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=GA:EffSel:CutRangeMin[0]=25.:CutRangeMax[0]=999:CutRangeMin[1]=25.:CutRangeMax[1]=999.:CutRangeMin[2]=1.0:CutRangeMax[2]=9.:CutRangeMin[3]=100:CutRangeMax[3]=7000:VarProp=FSmart" ); /* factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=200:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); */ factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; //if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
//------------------------------------------------------------------------------ // MVATrain //------------------------------------------------------------------------------ void MVATrain(float metPfType1_cut, float mt2ll_cut, TString signal) { TFile* outputfile = TFile::Open(trainingdir + signal + ".root", "recreate"); // Factory //---------------------------------------------------------------------------- TMVA::Factory* factory = new TMVA::Factory(signal, outputfile, // "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification"); "!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification"); // Get the trees //---------------------------------------------------------------------------- _mctree.clear(); AddProcess("signal" , signal);//"01_Data_reduced_1outof6"); //signal AddProcess("background", "04_TTTo2L2Nu"); /*AddProcess("background", "14_HZ"); AddProcess("background", "10_HWW"); AddProcess("background", "06_WW"); AddProcess("background", "02_WZTo3LNu"); AddProcess("background", "03_VZ"); AddProcess("background", "11_Wg"); AddProcess("background", "07_ZJets"); AddProcess("background", "09_TTV"); AddProcess("background", "05_ST"); AddProcess("background", "00_Fakes_reduced_1outof6");*/ Double_t weight = 1.0; factory->AddSignalTree(_signaltree, weight); for (UInt_t i=0; i<_mctree.size(); i++) factory->AddBackgroundTree(_mctree[i], weight); factory->SetWeightExpression("eventW"); // Add variables //---------------------------------------------------------------------------- // Be careful with the order: it must be respected at the reading step // factory->AddVariable("<var1>+<var2>", "pretty title", "unit", 'F'); factory->AddVariable("newdarkpt" , "", "", 'F'); //factory->AddVariable("topRecoW" , "", "", 'F'); //factory->AddVariable("lep1pt" , "", "", 'F'); //factory->AddVariable("lep1eta" , "", "", 'F'); //factory->AddVariable("lep1phi" , "", "", 'F'); //factory->AddVariable("lep1mass" , "", "", 'F'); //factory->AddVariable("lep2pt" , "", "", 'F'); //factory->AddVariable("lep2eta" , "", "", 'F'); //factory->AddVariable("lep2phi" , "", "", 'F'); //factory->AddVariable("lep2mass" , "", "", 'F'); //factory->AddVariable("jet1pt " , "", "", 'F'); //factory->AddVariable("jet1eta" , "", "", 'F'); //factory->AddVariable("jet1phi" , "", "", 'F'); //factory->AddVariable("jet1mass" , "", "", 'F'); //factory->AddVariable("jet2pt" , "", "", 'F'); //factory->AddVariable("jet2eta" , "", "", 'F'); //factory->AddVariable("jet2phi" , "", "", 'F'); //factory->AddVariable("jet2mass" , "", "", 'F'); factory->AddVariable("metPfType1" , "", "", 'F'); //factory->AddVariable("metPfType1Phi", "", "", 'F'); //factory->AddVariable("m2l" , "", "", 'F'); factory->AddVariable("mt2ll" , "", "", 'F'); //factory->AddVariable("mt2lblb" , "", "", 'F'); //factory->AddVariable("mtw1" , "", "", 'F'); //factory->AddVariable("mtw2" , "", "", 'F'); //factory->AddVariable("ht" , "", "", 'F'); //factory->AddVariable("htjets" , "", "", 'F'); //factory->AddVariable("htnojets" , "", "", 'F'); //factory->AddVariable("njet" , "", "", 'F'); //factory->AddVariable("nbjet30csvv2l", "", "", 'F'); //factory->AddVariable("nbjet30csvv2m", "", "", 'F'); //factory->AddVariable("nbjet30csvv2t", "", "", 'F'); //factory->AddVariable("dphijet1met" , "", "", 'F'); //factory->AddVariable("dphijet2met" , "", "", 'F'); //factory->AddVariable("dphijj" , "", "", 'F'); //factory->AddVariable("dphijjmet" , "", "", 'F'); //factory->AddVariable("dphill" , "", "", 'F'); //factory->AddVariable("dphilep1jet1" , "", "", 'F'); //factory->AddVariable("dphilep1jet2" , "", "", 'F'); //factory->AddVariable("dphilep2jet1" , "", "", 'F'); //factory->AddVariable("dphilep2jet2" , "", "", 'F'); //factory->AddVariable("dphilmet1" , "", "", 'F'); //factory->AddVariable("dphilmet2" , "", "", 'F'); factory->AddVariable("dphillmet" , "", "", 'F'); //factory->AddVariable("sphericity" , "", "", 'F'); //factory->AddVariable("alignment" , "", "", 'F'); //factory->AddVariable("planarity" , "", "", 'F'); // Preselection cuts and preparation //---------------------------------------------------------------------------- //factory->PrepareTrainingAndTestTree(Form("metPfType1>%5.2f&&mt2ll>%5.2f&&newdarkpt>0.", metPfType1_cut, mt2ll_cut), "NormMode=EqualNumEvents:nTrain_Signal=80:nTest_Signal=80:nTrain_Background=400:nTest_Background=400:!V"); factory->PrepareTrainingAndTestTree("mt2ll>100.&&newdarkpt>0.&&metPfType1>80.", "NormMode=EqualNumEvents:nTrain_Signal=0:nTest_Signal=0:nTrain_Background=0:nTest_Background=0:!V"); // Book MVA //---------------------------------------------------------------------------- factory->BookMethod(TMVA::Types::kMLP, "MLP01", "H:!V:NeuronType=sigmoid:NCycles=500:VarTransform=Norm:HiddenLayers=6,3:TestRate=1:LearningRate=0.005"); //factory->BookMethod(TMVA::Types::kMLP, "MLP01", // "H:!V:NeuronType=sigmoid:NCycles=500:VarTransform=Norm:HiddenLayers=4,4:TestRate=3:LearningRate=0.005"); //factory->BookMethod(TMVA::Types::kMLP, "MLP02", // "H:!V:NeuronType=sigmoid:NCycles=40:VarTransform=Norm:HiddenLayers=20,10:TestRate=3:LearningRate=0.005"); //factory->BookMethod(TMVA::Types::kMLP, "MLP03", // "H:!V:NeuronType=sigmoid:NCycles=30:VarTransform=Norm:HiddenLayers=20,20:TestRate=3:LearningRate=0.005"); //factory->BookMethod(TMVA::Types::kBDT, "BDT04", "NTrees=50:MaxDepth=2" ); //factory->BookMethod(TMVA::Types::kBDT, "BDT05", "NTrees=50:MaxDepth=3" ); // Train, test and evaluate MVA //---------------------------------------------------------------------------- factory->TrainAllMethods(); // Train using the set of training events factory->TestAllMethods(); // Evaluate using the set of test events factory->EvaluateAllMethods(); // Evaluate and compare performance // Save the output //---------------------------------------------------------------------------- outputfile->Close(); delete factory; }
void TMVAClassification_qgl( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string,int> Use; Use["Fisher"] = 0; Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator // --- Boosted Decision Trees Use["BDTB"] = 0; // decorrelation + Adaptive Boost Use["BDTG"] = 0; Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 1; Use["LikelihoodMIX"] = 0; // // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. // TString outfileName( "TMVA_bjet_new_powheg.root" ); int set_type=1; TString type[2] = {"_double","_single"}; TString outfileName( "TMVA_qgl"+type[set_type]+"_2jet_1.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] // factory->AddVariable( "myvar1 := var1+var2", 'F' ); // factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ); factory->AddVariable( "Jet_pt", "Jet p_{T}", "GeV", 'F' ); factory->AddVariable( "Jet_eta", "Jet #eta", "", 'F' ); factory->AddVariable( "Jet_ptd", "Jet ptd", "", 'F' ); factory->AddVariable( "Jet_axis2", "Jet axis2", "", 'F' ); factory->AddVariable( "Jet_mult", "Jet multiplicity", "", 'I' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables // factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); // factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) // TString fname_signal ="/afs/cern.ch/work/n/nchernya/Hbb/qgl_mva/qgl_tmva_tree_VBFHToBB_M-125_13TeV_powheg_v14"+type[set_type]+"_2jets.root"; TString fname_bg ="/afs/cern.ch/work/n/nchernya/Hbb/qgl_mva/qgl_tmva_tree_BTagCSV_v14"+type[set_type]+"_2jets.root"; if (gSystem->AccessPathName( fname_signal )) { // file does not exist in local directory cout<<"input file "<< fname_signal<<" doesn't exist!"<<endl; //break; } if (gSystem->AccessPathName( fname_bg)) { // file does not exist in local directory cout<<"input file "<< fname_bg<<" doesn't exist!"<<endl; //break; } // gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root"); TFile *input_signal = TFile::Open( fname_signal ); TFile *input_bg = TFile::Open( fname_bg ); std::cout << "--- TMVAClassification : Using input signal file: " << input_signal->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input bg file: " << input_bg->GetName() << std::endl; // --- Register the training and test trees TTree *signal = (TTree*)input_signal->Get("QGL_1"); TTree *bg = (TTree*)input_bg->Get("QGL_1"); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t bgWeight = 1.0; // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( bg, bgWeight ); TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ); if (Use["BDTG"]) // factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=120:MinNodeSize=6%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=3:NegWeightTreatment=IgnoreNegWeightsInTraining" ); if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // ---- STILL EXPERIMENTAL and only implemented for BDT's ! // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","FitGA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVARegression( int optimIndex, int Cat=0, TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVARegression.C\(\"myMethod1,myMethod2,myMethod3\"\) // //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDEFoam"] = 0; Use["KNN"] = 0; // // --- Linear Discriminant Analysis Use["LD"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; // // --- Neural Network Use["MLP"] = 0; // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 0; Use["BDTG"] = 1; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVARegression" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a new root output file TString outfileName( Form("TMVAoutput/TMVAReg_%i.root",optimIndex) ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( Form("TMVARegression_%i_Cat%i",optimIndex,Cat), outputFile, "!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=Regression" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory->AddVariable( "jet_eta", "jet_eta", "units", 'F' ); factory->AddVariable( "jet_emfrac", "jet_emfrac", "units", 'F' ); factory->AddVariable( "jet_hadfrac", "jet_hadfrac", "units", 'F' ); factory->AddVariable( "jet_nconstituents", "jet_nconst", "units", 'F' ); factory->AddVariable( "jet_vtx3dL", "jet_vtx3dL", "units", 'F' ); factory->AddVariable( "MET", "MET", "units", 'F' ); factory->AddVariable( "jet_dPhiMETJet", "jet_dPhiMETJet", "units", 'F' ); //factory->AddVariable( "hJet_vtxPt", "hJet_vtxPt", "units", 'F' ); //factory->AddVariable( "hJet_JECUnc", "hJet_JECUnc", "units", 'F' ); //factory->AddVariable( "hJet_ptLeadTrack", "hJet_ptLeadTrack", "units", 'F' ); //factory->AddVariable( "hJet_SoftLeptPtCut:=(hJet_SoftLeptIdlooseMu>0. || hJet_SoftLeptId95>0.) ? (hJet_SoftLeptPt) : (-99)", "hJet_SoftLeptPt", "units", 'F' ); //factory->AddVariable( "hJet_En", "hJet_En", "units", 'F' ); //factory->AddVariable( "hJet_Et", "hJet_Et", "units", 'F' ); //factory->AddVariable( "hJet_Mt", "hJet_Mt", "units", 'F' ); //factory->AddVariable( "hJet_nch", "hJet_nch", "units", 'F' ); //factory->AddVariable( "hJet_vtx3deL", "hJet_vtx3deL", "units", 'F' ); //factory->AddVariable( "hJet_vtxMass", "hJet_vtxMass", "units", 'F' ); //factory->AddVariable( "hJet_ptRaw", "hJet_ptRaw", "units", 'F' ); //factory->AddVariable( "hJet_EnRaw", "hJet_EnRaw", "units", 'F' ); //factory->AddVariable( "hJet_SoftLeptptRelCut:=(hJet_SoftLeptIdlooseMu>0. || hJet_SoftLeptId95>0.) ? (hJet_SoftLeptptRel) : (-99)", "hJet_SoftLeptptRel", "units", 'F' ); //factory->AddVariable( "hJet_SoftLeptdRCut:=(hJet_SoftLeptIdlooseMu>0. || hJet_SoftLeptId95>0.) ? (hJet_SoftLeptdR) : (-99)", "hJet_SoftLeptdR", "units", 'F' ); //factory->AddVariable( "rho25", "rho25", "units", 'F' ); //factory->AddVariable( "dPhiMETJet", "dPhiMETJet", "units", 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables // Add the variable carrying the regression target //factory->AddTarget( "jet_genPt" ); factory->AddTarget( "jet_genJetPt/jet_pt" ); // It is also possible to declare additional targets for multi-dimensional regression, ie: // -- factory->AddTarget( "fvalue2" ); // BUT: this is currently ONLY implemented for MLP // Read training and test data (see TMVAClassification for reading ASCII files) // load the signal and background event samples from ROOT trees /* TFile *input(0); TString fname = "./tmva_reg_example.root"; if (!gSystem->AccessPathName( fname )) input = TFile::Open( fname ); // check if file in local directory exists else input = TFile::Open( "http://root.cern.ch/files/tmva_reg_example.root" ); // if not: download from ROOT server if (!input) { std::cout << "ERROR: could not open data file" << std::endl; exit(1); } std::cout << "--- TMVARegression : Using input file: " << input->GetName() << std::endl; // --- Register the regression tree TTree *regTree = (TTree*)input->Get("TreeR"); */ TChain chainTraining("Events"); chainTraining.Add("TrainingFiles/training.root"); TTree *regTreeTraining = (TTree*) chainTraining; TChain chainTesting("Events"); chainTesting.Add("TrainingFiles/testing.root"); TTree *regTreeTesting = (TTree*) chainTesting; // global event weights per tree (see below for setting event-wise weights) Double_t regWeight = 1.0; // You can add an arbitrary number of regression trees factory->AddTree( regTreeTraining, "Regression", regWeight, "", "training" ); factory->AddTree( regTreeTesting, "Regression", regWeight, "", "test" ); // Apply additional cuts on the signal and background samples (can be different) TCut mycut = "hJet_pt[0]>20. && hJet_pt[1]>20. && fabs(hJet_eta[0])<2.5 && fabs(hJet_eta[1])<2.5 && hJet_csv[0]>0. && hJet_csv[1]>0. && hJet_ptLeadTrack[0]<1500. && hJet_ptLeadTrack[1]<1500. && hJet_genJetPt[0]>0. && hJet_genJetPt[1]>0. && hJet_puJetIdL[0]>0.0 && hJet_puJetIdL[1]>0.0"; TCut testingCut = "hJet_pt[0]>20. && hJet_pt[1]>20. && abs(hJet_eta[0])<2.5 && abs(hJet_eta[1])<2.5"; TCut mjjCut = "sqrt(pow(hJet_pt[0]*cos(hJet_phi[0])+hJet_pt[1]*cos(hJet_phi[1]),2)+pow(hJet_pt[0]*sin(hJet_phi[0])+hJet_pt[1]*sin(hJet_phi[1]),2)) < 110"; if(Cat==1) mjjCut = "sqrt(pow(hJet_pt[0]*cos(hJet_phi[0])+hJet_pt[1]*cos(hJet_phi[1]),2)+pow(hJet_pt[0]*sin(hJet_phi[0])+hJet_pt[1]*sin(hJet_phi[1]),2)) > 110"; TCut jetPtCut="jet_pt>90"; if(Cat==1) jetPtCut="jet_pt>90"; //TCut trainingCut = "hJet_pt[0]>20. && hJet_pt[1]>20. && abs(hJet_eta[0])<2.5 && abs(hJet_eta[1])<2.5 && hJet_genJetPt[0]>0. && hJet_genJetPt[1]>0. && hJet_csv[0]>0.0 && hJet_csv[1]>0.0 && hJet_ptLeadTrack[0]<1500. && hJet_ptLeadTrack[1]<1500."; TCut trainingCut = "jet_pt>20. && abs(jet_eta)<2.5 && jet_genJetPt>0. && jet_dRJetGenJet < 0.4 && (jet_partonID)==5"; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1"; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1"; // tell the factory to use all remaining events in the trees after training for testing: // factory->PrepareTrainingAndTestTree(mycut, "nTrain_Regression=600000:nTest_Regression=600000:SplitMode=Random:NormMode=NumEvents:!V"); //factory->PrepareTrainingAndTestTree(mycut, "nTrain_Regression=337500:nTest_Regression=337500:SplitMode=Random:NormMode=NumEvents:!V"); // factory->PrepareTrainingAndTestTree(mycut, "nTrain_Regression=158393:nTest_Regression=158393:SplitMode=Random:NormMode=NumEvents:!V"); //factory->PrepareTrainingAndTestTree(mycut, "nTrain_Regression=14197:nTest_Regression=14197:SplitMode=Random:NormMode=NumEvents:!V"); factory->PrepareTrainingAndTestTree(trainingCut+jetPtCut, "!V"); // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // ---- Book MVA methods // // please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // PDE - RS method if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" ); // And the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // Linear discriminant if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "!H:!V:VarTransform=None" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); // Neural network (MLP) if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" ); // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); //100, 5 // Boosted Decision Tree //100,5 nCuts=-1 if (Use["BDT"]) factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:nEventsMin=4:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=-1:PruneMethod=CostComplexity:PruneStrength=30" ); const bool doScan1=0; int ntreesArray[8] = {100,200,300,400,500,600,700,800}; float shrinkageArray[3] = {0.1,0.2,0.3}; float gradbaggingfracArray[3] = {0.7,0.8,0.9}; int maxdepthArray[3] = {2,3,4}; int nnodesmaxArray[3] = {5,10,15}; if(!doScan1) shrinkageArray[2]=1.0; int nnodesmaxIndex=-1,maxdepthIndex=-1,gradbaggingfracIndex=-1,shrinkageIndex=-1,ntreesIndex=-1; if(doScan1){ nnodesmaxIndex = optimIndex/216; maxdepthIndex = (optimIndex-nnodesmaxIndex*216)/72; gradbaggingfracIndex = (optimIndex-nnodesmaxIndex*216-maxdepthIndex*72)/24; shrinkageIndex = (optimIndex-nnodesmaxIndex*216-maxdepthIndex*72-gradbaggingfracIndex*24)/8; ntreesIndex = (optimIndex-nnodesmaxIndex*216-maxdepthIndex*72-gradbaggingfracIndex*24-shrinkageIndex*8); } else{ nnodesmaxIndex = optimIndex/72; maxdepthIndex = (optimIndex-nnodesmaxIndex*72)/24; shrinkageIndex = (optimIndex-nnodesmaxIndex*72-maxdepthIndex*24)/8; ntreesIndex = (optimIndex-nnodesmaxIndex*72-maxdepthIndex*24-shrinkageIndex*8); } if (Use["BDTG"]){ if(doScan1) factory->BookMethod( TMVA::Types::kBDT, "BDTG", Form("!H:!V:NTrees=%i::BoostType=Grad:Shrinkage=%.1f:UseBaggedGrad:GradBaggingFraction=%.1f:nCuts=200:MaxDepth=%i:NNodesMax=%i",ntreesArray[ntreesIndex],shrinkageArray[shrinkageIndex],gradbaggingfracArray[gradbaggingfracIndex],maxdepthArray[maxdepthIndex],nnodesmaxArray[nnodesmaxIndex]) ); else factory->BookMethod( TMVA::Types::kBDT, "BDTG", Form("!H:!V:IgnoreNegWeights:NTrees=%i::BoostType=Grad:Shrinkage=%.1f:nCuts=200:MaxDepth=%i:NNodesMax=%i",ntreesArray[ntreesIndex],shrinkageArray[shrinkageIndex],maxdepthArray[maxdepthIndex],nnodesmaxArray[nnodesmaxIndex]) ); } if(doScan1) cout << Form("!H:!V:NTrees=%i::BoostType=Grad:Shrinkage=%.2f:UseBaggedGrad:GradBaggingFraction=%.2f:nCuts=200:MaxDepth=%i:NNodesMax=%i",ntreesArray[ntreesIndex],shrinkageArray[shrinkageIndex],gradbaggingfracArray[gradbaggingfracIndex],maxdepthArray[maxdepthIndex],nnodesmaxArray[nnodesmaxIndex])<<endl; else cout << Form("!H:!V:IgnoreNegWeights:NTrees=%i::BoostType=Grad:Shrinkage=%.1f:nCuts=200:MaxDepth=%i:NNodesMax=%i",ntreesArray[ntreesIndex],shrinkageArray[shrinkageIndex],maxdepthArray[maxdepthIndex],nnodesmaxArray[nnodesmaxIndex]) << endl; // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVARegression is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVARegGui( outfileName ); }
void TMVAClassification_cc1pcoh_bdt_verFF( TString myMethodList = "" ) { //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // to get access to the GUI and all tmva macros TString thisdir = gSystem->DirName(gInterpreter->GetCurrentMacroName()); gROOT->SetMacroPath(thisdir + ":" + gROOT->GetMacroPath()); gROOT->ProcessLine(".L TMVAGui.C"); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 1; Use["CutsD"] = 1; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 1; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 1; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 1; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 1; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 1; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 1; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 1; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 1; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 1; // // --- Boosted Decision Trees Use["BDT"] = 1; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 1; // --------------------------------------------------------------- // Choose method std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // --------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA_cc1pcoh_bdt_verFF.root" );//newchange TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification_verFF", outputFile,//newchange "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // Add variable //sprintf(select, "Ntrack==2&&mumucl>0.6&&pmucl>0.25&&pang<90&&muang_t<15 && veract*7.66339869e-2<34"); factory->AddVariable( "mumucl", 'F' ); factory->AddVariable( "pmucl", 'F' ); factory->AddVariable( "pang_t", 'F' );//this is fixed with backward factory->AddVariable( "muang_t", 'F' ); factory->AddVariable( "ppe", 'F'); factory->AddVariable( "mupe", 'F'); factory->AddVariable( "rangetot", 'F');//total range in PM and INGRID factory->AddVariable( "prangetot", 'F');//total range in PM and INGRID factory->AddVariable( "coplanarity", 'F'); factory->AddVariable( "opening", 'F');//newadd // Add spectator factory->AddSpectator( "fileIndex", 'I' ); factory->AddSpectator( "nuE", 'F' ); factory->AddSpectator( "inttype", 'I' ); factory->AddSpectator( "norm", 'F' ); factory->AddSpectator( "totcrsne", 'F' ); factory->AddSpectator( "veract", 'F' ); // --------------------------------------------------------------- // --- Get weight TString fratioStr="/home/kikawa/macros/nd34_tuned_11bv3.1_250ka.root"; // --------------------------------------------------------------- // --- Add sample TString fsignalStr="/home/cvson/cc1picoh/dataProcess/fix20150420/pm_merged_ccqe_tot.root"; TString fbarStr="/home/cvson/cc1picoh/dataProcess/fix20150420/pmbar_merged_ccqe.root"; TString fbkgStr="/home/cvson/cc1picoh/dataProcess/fix20150420/wall_merged_ccqe_tot.root"; TString fbkg2Str="/home/cvson/cc1picoh/dataProcess/fix20150420/ingrid_merged_nd3_ccqe_tot.root"; TFile *pfileSignal = new TFile(fsignalStr); TFile *pfileBar = new TFile(fbarStr); TFile *pfileBkg = new TFile(fbkgStr); TFile *pfileBkg2 = new TFile(fbkg2Str); TFile *pfileRatio = new TFile(fratioStr); TTree *ptree_sig = (TTree*)pfileSignal->Get("tree"); TTree *ptree_bar = (TTree*)pfileBar->Get("tree"); TTree *ptree_bkg = (TTree*)pfileBkg->Get("tree"); TTree *ptree_bkg2 = (TTree*)pfileBkg2->Get("tree"); // POT normalization const int nmcFile = 3950; const int nbarFile = 986; const int nbkgFile = 55546;//(31085+24461); const int nbkg2File = 7882;//(3941+3941); // global event weights per tree (see below for setting event-wise weights) // adding for signal sample // using this as standard and add other later Double_t signalWeight_sig = 1.0; Double_t backgroundWeight_sig = 1.0; factory->AddSignalTree ( ptree_sig, signalWeight_sig ); factory->AddBackgroundTree( ptree_sig, backgroundWeight_sig ); // Add Numubar sample //Double_t signalWeight_bar = nmcFile/float(nbarFile); Double_t backgroundWeight_bar = nmcFile/float(nbarFile); //factory->AddSignalTree ( ptree_bar, signalWeight_bar ); factory->AddBackgroundTree( ptree_bar, backgroundWeight_bar ); // Add wall background //Double_t signalWeight_bkg = nmcFile/float(nbkgFile); Double_t backgroundWeight_bkg = nmcFile/float(nbkgFile); //factory->AddSignalTree ( ptree_bkg, signalWeight_bkg ); factory->AddBackgroundTree( ptree_bkg, backgroundWeight_bkg ); // Add INGRID background //Double_t signalWeight_bkg2 = nmcFile/float(nbkg2File); Double_t backgroundWeight_bkg2 = nmcFile/float(nbkg2File); //factory->AddSignalTree ( ptree_bkg2, signalWeight_bkg2 ); factory->AddBackgroundTree( ptree_bkg2, backgroundWeight_bkg2 ); //factory->SetSignalWeightExpression ("norm*totcrsne*2.8647e-13"); //factory->SetBackgroundWeightExpression( "norm*totcrsne*2.8647e-13" ); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = "Ntrack==2 && abs(inttype)==16 && fileIndex==1"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = "Ntrack==2 && (abs(inttype)!=16 || fileIndex>1)"; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" ); /*if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );*/ if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // ---- STILL EXPERIMENTAL and only implemented for BDT's ! // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","FitGA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros //if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void tmva_test_lite(const int sigreg = true){ const string outfileName("TMVA.root"); TFile* outputFile = TFile::Open(outfileName.c_str(),"RECREATE"); string name; if(!sigreg) name = string("MVAnalysisLite"); else name = string("MVAnalysis_sig_Lite"); TMVA::Factory *factory = new TMVA::Factory(name.c_str(),outputFile,"!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification"); TFile *inputSig = TFile::Open("/home/vitaly/B0toDh0/Tuples/fil_b2dh_sigmc.root"); TFile *inputBack = TFile::Open("/home/vitaly/B0toDh0/Tuples/fil_b2dh_cont.root"); // TFile *tfile = new TFile("","RECREATE"); TTree* insigtree = (TTree*)inputSig->Get("TEventTr"); TTree* inbacktree = (TTree*)inputBack->Get("TEventTr"); factory->AddVariable("abs(cos_b0)"); // factory->AddVariable("p_ks"); factory->AddVariable("log(chi2_ndf_D0)"); // factory->AddVariable("log(chi2_ndf_B0)"); // factory->AddVariable("log(chi2_tag_vtx/ndf_tag_vtx)"); factory->AddVariable("abs(cos_thr)"); factory->AddVariable("thr_sig"); factory->AddVariable("thr_oth"); factory->AddVariable("log(tag_LH_err)"); // factory->AddVariable("log(dzerr)"); // factory->AddVariable("log(pi0_chi2)"); // factory->AddVariable("log(egamma)"); // factory->AddVariable("log(ptgamma)"); factory->AddVariable("k1mm2"); factory->AddVariable("k1et"); factory->AddVariable("k1hso00"); // factory->AddVariable("k1hso01"); factory->AddVariable("k1hso02"); // factory->AddVariable("k1hso03"); factory->AddVariable("k1hso04"); factory->AddVariable("k1hso10"); factory->AddVariable("k1hso12"); factory->AddVariable("k1hso14"); factory->AddVariable("k1hso20"); factory->AddVariable("k1hso22"); factory->AddVariable("k1hso24"); factory->AddVariable("k1hoo0"); factory->AddVariable("k1hoo1"); factory->AddVariable("k1hoo2"); factory->AddVariable("k1hoo3"); factory->AddVariable("k1hoo4"); // factory->AddVariable("k0mm2"); // factory->AddVariable("k0et"); // factory->AddVariable("k0hso00"); // factory->AddVariable("k0hso02"); // factory->AddVariable("k0hso04"); // factory->AddVariable("k0hso10"); // factory->AddVariable("k0hso12"); // factory->AddVariable("k0hso14"); // factory->AddVariable("k0hso20"); // factory->AddVariable("k0hso22"); // factory->AddVariable("k0hso24"); // factory->AddVariable("k0hoo0"); // factory->AddVariable("k0hoo1"); // factory->AddVariable("k0hoo2"); // factory->AddVariable("k0hoo4"); factory->AddSignalTree(insigtree,1.0); factory->AddBackgroundTree(inbacktree,1.0); // string Common_precuts("mbc>5.271 && mbc<5.289 && de<0.08 && de>-0.1 && chi2_ndf_B0<1000 && abs(mks_raw-0.4975)<0.009 && abs(md0_raw-1.865)<0.015 && abs(mpi0_raw-0.135)<0.012"); string Common_precuts("chi2_ndf_B0<1000 && de<0.1 && de>-0.1 && mbc>5.25 && mbc<5.289"); if(sigreg) Common_precuts += string(" && mbc>5.271 && mbc<5.289 && de<0.08 && de>-0.1"); string sig_cuts = Common_precuts;// + string(""); string back_cuts = Common_precuts;// + string(" && abs(md0_raw-1.865)<0.030 && abs(mpi0_raw-0.135)<0.030"); TCut cutsig(sig_cuts.c_str()); TCut cutback(back_cuts.c_str()); factory->PrepareTrainingAndTestTree(cutsig,cutback,"nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // factory->BookMethod( TMVA::Types::kCuts, "Cuts","!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); // factory->BookMethod(TMVA::Types::kBDT,"BDTG","!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggedFraction=0.6:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrenght=50:NNodesMax=5"); factory->BookMethod(TMVA::Types::kBDT,"BDTG","!H:!V:NTrees=800:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:NNodesMax=5"); factory->BookMethod(TMVA::Types::kBDT,"BDT","!H:!V:NTrees=850:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20"); factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName.c_str() ); }
void TMVAClassification( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) // this loads the library TMVA::Tools::Instance(); //--------------------------------------------------------------- // default MVA methods to be trained + tested std::map<std::string,int> Use; Use["Cuts"] = 1; Use["CutsD"] = 1; Use["CutsPCA"] = 1; Use["CutsGA"] = 1; Use["CutsSA"] = 1; // --- Use["Likelihood"] = 1; Use["LikelihoodD"] = 1; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 1; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 1; Use["LikelihoodMIX"] = 1; // --- Use["PDERS"] = 1; Use["PDERSD"] = 1; Use["PDERSPCA"] = 1; Use["PDERSkNN"] = 1; // depreciated until further notice Use["PDEFoam"] = 1; // -- Use["KNN"] = 1; // --- Use["HMatrix"] = 1; Use["Fisher"] = 1; Use["FisherG"] = 1; Use["BoostedFisher"] = 1; Use["LD"] = 1; // --- Use["FDA_GA"] = 1; Use["FDA_SA"] = 1; Use["FDA_MC"] = 1; Use["FDA_MT"] = 1; Use["FDA_GAMT"] = 1; Use["FDA_MCMT"] = 1; // --- Use["MLP"] = 1; // this is the recommended ANN Use["MLPBFGS"] = 1; // recommended ANN with optional training method Use["CFMlpANN"] = 1; // *** missing Use["TMlpANN"] = 1; // --- Use["SVM"] = 1; // --- Use["BDT"] = 1; Use["BDTD"] = 0; Use["BDTG"] = 1; Use["BDTB"] = 0; // --- Use["RuleFit"] = 1; // --- Use["Plugin"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // Create a new root output file. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory->AddVariable( "myvar1 := var1+var2", 'F' ); factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ); factory->AddVariable( "var3", "Variable 3", "units", 'F' ); factory->AddVariable( "var4", "Variable 4", "units", 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables factory->AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ); factory->AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ); // read training and test data if (ReadDataFromAsciiIFormat) { // load the signal and background event samples from ascii files // format in file must be: // var1/F:var2/F:var3/F:var4/F // 0.04551 0.59923 0.32400 -0.19170 // ... TString datFileS = "tmva_example_sig.dat"; TString datFileB = "tmva_example_bkg.dat"; factory->SetInputTrees( datFileS, datFileB ); } else { // load the signal and background event samples from ROOT trees TFile *input(0); TString fname = "../macros/tmva_example.root"; if (!gSystem->AccessPathName( fname )) { input = TFile::Open( fname ); // check if file in local directory exists } else { input = TFile::Open( "http://root.cern.ch/files/tmva_class_example.root" ); // if not: download from ROOT server } if (!input) { std::cout << "ERROR: could not open data file" << std::endl; exit(1); } std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl; TTree *signal = (TTree*)input->Get("TreeS"); TTree *background = (TTree*)input->Get("TreeB"); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // ====== register trees ==================================================== // // the following method is the prefered one: // you can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); // To give different trees for training and testing, do as follows: // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // // // --- begin ---------------------------------------------------------- // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables // Float_t treevars[4]; // for (Int_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (Int_t i=0; i<signal->GetEntries(); i++) { // signal->GetEntry(i); // for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < signal->GetEntries()/2) factory->AddSignalTrainingEvent( vars, signalWeight ); // else factory->AddSignalTestEvent ( vars, signalWeight ); // } // // for (Int_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (Int_t i=0; i<background->GetEntries(); i++) { // background->GetEntry(i); // for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight ); // else factory->AddBackgroundTestEvent ( vars, backgroundWeight ); // } // // --- end ------------------------------------------------------------ // // ====== end of register trees ============================================== } // This would set individual event weights (the variables defined in the // expression need to exist in the original TTree) // for signal : factory->SetSignalWeightExpression("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); factory->SetBackgroundWeightExpression("weight"); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // If no numbers of events are given, half of the events in the tree are used for training, and // the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); // ---- Book MVA methods // // please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // test the decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // test the new kernel density estimator if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // test the mixed splines and kernel density estimator (depending on which variable) if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSkNN"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSkNN", "!H:!V:VolumeRangeMode=kNN:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:CutNmin=T:Nmin=100:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" ); // Fisher discriminant if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=60:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2"); // Linear discriminant (same as Fisher) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS" ); // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // -------------------------------------------------------------------------------------------------- // As an example how to use the ROOT plugin mechanism, book BDT via // plugin mechanism if (Use["Plugin"]) { // // first the plugin has to be defined, which can happen either through the following line in the local or global .rootrc: // // # plugin handler plugin name(regexp) class to be instanciated library constructor format // Plugin.TMVA@@MethodBase: ^BDT TMVA::MethodBDT TMVA.1 "MethodBDT(TString,TString,DataSet&,TString)" // // or by telling the global plugin manager directly gPluginMgr->AddHandler("TMVA@@MethodBase", "BDT", "TMVA::MethodBDT", "TMVA.1", "MethodBDT(TString,TString,DataSet&,TString)"); factory->BookMethod( TMVA::Types::kPlugins, "BDT", "!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=50" ); } // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethodsForClassification(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void Boost2(){ // This loads the library TMVA::Tools::Instance(); // to get access to the GUI and all tmva macros TString tmva_dir(TString(gRootDir) + "/tmva"); if(gSystem->Getenv("TMVASYS")) tmva_dir = TString(gSystem->Getenv("TMVASYS")); gROOT->SetMacroPath(tmva_dir + "/test/:" + gROOT->GetMacroPath() ); gROOT->ProcessLine(".L TMVAGui.C"); TString outfileName = "boost.root"; TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" ); factory->AddVariable( "var0", 'F' ); factory->AddVariable( "var1", 'F' ); TFile *input(0); TString fname = "./circledata.root"; if (!gSystem->AccessPathName( fname )) { // first we try to find data.root in the local directory std::cout << "--- BOOST : Accessing " << fname << std::endl; input = TFile::Open( fname ); } else { gROOT->LoadMacro( "./createData.C"); create_fullcirc(20000); cout << " created circledata.root with data and circle arranged in circles"<<endl; input = TFile::Open( fname ); } if (!input) { std::cout << "ERROR: could not open data file" << std::endl; exit(1); } TTree *signal = (TTree*)input->Get("TreeS"); TTree *background = (TTree*)input->Get("TreeB"); Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; gROOT->cd( outfileName+TString(":/") ); factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); factory->PrepareTrainingAndTestTree( "", "", "nTrain_Signal=10000:nTrain_Background=10000:SplitMode=Random:NormMode=NumEvents:!V" ); TString fisher="!H:!V"; factory->BookMethod( TMVA::Types::kFisher, "Fisher", fisher ); factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:V:NTrees=150:NCuts=101:MaxDepth=1:UseFisherCuts:UseExclusiveVars:MinLinCorrForFisher=0." ); // factory->BookMethod( TMVA::Types::kFisher, "FisherBS", fisher+":Boost_Num=100:Boost_Type=Bagging:Boost_Transform=step" ); factory->BookMethod( TMVA::Types::kFisher, "FisherS", fisher+":Boost_Num=150:Boost_Type=AdaBoost:Boost_Transform=step" ); // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void trainMVACat() { char name[1000]; float XSEC[6] = {3.67e+5,2.94e+4,6.524e+03,1.064e+03,121.5,2.542e+01}; float NORM[6]; TCut preselectionCut = "ht>400 && jetPt[5]>40 && (triggerBit[0] || triggerBit[2]) && nBJets>1 && nLeptons==0"; TFile *bkgSrc[6]; bkgSrc[0] = TFile::Open("flatTree_QCD_HT300to500.root"); bkgSrc[1] = TFile::Open("flatTree_QCD_HT500to700.root"); bkgSrc[2] = TFile::Open("flatTree_QCD_HT700to1000.root"); bkgSrc[3] = TFile::Open("flatTree_QCD_HT1000to1500.root"); bkgSrc[4] = TFile::Open("flatTree_QCD_HT1500to2000.root"); bkgSrc[5] = TFile::Open("flatTree_QCD_HT2000toInf.root"); TFile *sigSrc = TFile::Open("flatTree_ttHJetTobb_M125.root"); //TFile *sigSrc = TFile::Open("flatTree_TT.root"); TTree *sigTree = (TTree*)sigSrc->Get("hadtop/events"); TTree *bkgTree[6]; TFile *outf = new TFile("mva_Cat_QCD.root","RECREATE"); TMVA::Factory* factory = new TMVA::Factory("factory_mva_Cat_QCD_",outf,"!V:!Silent:Color:DrawProgressBar:Transformations=I;G:AnalysisType=Classification"); factory->AddSignalTree(sigTree); for(int k=0;k<6;k++) { NORM[k] = ((TH1F*)bkgSrc[k]->Get("hadtop/pileup"))->GetEntries(); bkgTree[k] = (TTree*)bkgSrc[k]->Get("hadtop/events"); factory->AddBackgroundTree(bkgTree[k],XSEC[k]/NORM[k]); } //int N_SIG(sigTree->GetEntries(preselectionCut)); //int N_BKG0(bkgTree[0]->GetEntries(preselectionCut)); //int N_BKG1(bkgTree[1]->GetEntries(preselectionCut)); //int N_BKG2(bkgTree[2]->GetEntries(preselectionCut)); //int N_BKG3(bkgTree[3]->GetEntries(preselectionCut)); //float N_BKG_EFF = N_BKG0*XSEC[0]/NORM[0]+N_BKG1*XSEC[1]/NORM[1]+N_BKG2*XSEC[2]/NORM[2]+N_BKG3*XSEC[3]/NORM[3]; //int N = TMath::Min((float)N_SIG,N_BKG_EFF); //cout<<N_SIG<<" "<<N_BKG_EFF<<endl; const int NVAR = 21; TString VAR[NVAR] = { "nJets", //"nBJets", "ht", "jetPt[0]","jetPt[1]","jetPt[2]","jetPt[3]","jetPt[4]","jetPt[5]", "mbbMin","dRbbMin", //"dRbbAve","mbbAve", //"btagAve","btagMax","btagMin", //"qglAve","qglMin","qglMedian", "sphericity","aplanarity","foxWolfram[0]","foxWolfram[1]","foxWolfram[2]","foxWolfram[3]", "mTop[0]","ptTTbar","mTTbar","dRbbTop","chi2" }; char TYPE[NVAR] = { 'I', //'I', 'F', 'F','F','F','F','F','F', 'F','F', //'F','F', //'F','F','F', //'F','F','F', 'F','F','F','F','F','F', 'F','F','F','F','F' }; for(int i=0;i<NVAR;i++) { factory->AddVariable(VAR[i],TYPE[i]); } factory->AddSpectator("status",'I'); factory->AddSpectator("nBJets",'I'); sprintf(name,"nTrain_Signal=%d:nTrain_Background=%d:nTest_Signal=%d:nTest_Background=%d",-1,-1,-1,-1); factory->PrepareTrainingAndTestTree(preselectionCut,name); TMVA::IMethod* BDT_Category = factory->BookMethod( TMVA::Types::kCategory,"BDT_Category"); TMVA::MethodCategory* mcategory_BDT = dynamic_cast<TMVA::MethodCategory*>(BDT_Category); mcategory_BDT->AddMethod("status == 0 && nBJets == 2", "nJets:ht:jetPt[0]:jetPt[1]:jetPt[2]:jetPt[3]:jetPt[4]:jetPt[5]:mbbMin:dRbbMin:sphericity:aplanarity:foxWolfram[0]:foxWolfram[1]:foxWolfram[2]:foxWolfram[3]:mTop[0]:ptTTbar:mTTbar:dRbbTop:chi2:", TMVA::Types::kBDT, "BDT_Cat1", "NTrees=2000:BoostType=Grad:Shrinkage=0.1"); mcategory_BDT->AddMethod("status == 0 && nBJets > 2", "nJets:ht:jetPt[0]:jetPt[1]:jetPt[2]:jetPt[3]:jetPt[4]:jetPt[5]:mbbMin:dRbbMin:sphericity:aplanarity:foxWolfram[0]:foxWolfram[1]:foxWolfram[2]:foxWolfram[3]:mTop[0]:ptTTbar:mTTbar:dRbbTop:chi2:", TMVA::Types::kBDT, "BDT_Cat2", "NTrees=2000:BoostType=Grad:Shrinkage=0.1"); mcategory_BDT->AddMethod("status < 0 && nBJets == 2", "nJets:ht:jetPt[0]:jetPt[1]:jetPt[2]:jetPt[3]:jetPt[4]:jetPt[5]:mbbMin:dRbbMin:sphericity:aplanarity:foxWolfram[0]:foxWolfram[1]:foxWolfram[2]:foxWolfram[3]:", TMVA::Types::kBDT, "BDT_Cat3", "NTrees=2000:BoostType=Grad:Shrinkage=0.1"); mcategory_BDT->AddMethod("status < 0 && nBJets > 2", "nJets:ht:jetPt[0]:jetPt[1]:jetPt[2]:jetPt[3]:jetPt[4]:jetPt[5]:mbbMin:dRbbMin:sphericity:aplanarity:foxWolfram[0]:foxWolfram[1]:foxWolfram[2]:foxWolfram[3]:", TMVA::Types::kBDT, "BDT_Cat4", "NTrees=2000:BoostType=Grad:Shrinkage=0.1"); TMVA::IMethod* Fisher_Category = factory->BookMethod( TMVA::Types::kCategory,"Fisher_Category"); TMVA::MethodCategory* mcategory_Fisher = dynamic_cast<TMVA::MethodCategory*>(Fisher_Category); mcategory_Fisher->AddMethod("status == 0 && nBJets == 2", "nJets:ht:jetPt[0]:jetPt[1]:jetPt[2]:jetPt[3]:jetPt[4]:jetPt[5]:mbbMin:dRbbMin:sphericity:aplanarity:foxWolfram[0]:foxWolfram[1]:foxWolfram[2]:foxWolfram[3]:mTop[0]:ptTTbar:mTTbar:dRbbTop:chi2:", TMVA::Types::kFisher, "Fisher_Cat1","H:!V:Fisher"); mcategory_Fisher->AddMethod("status == 0 && nBJets > 2", "nJets:ht:jetPt[0]:jetPt[1]:jetPt[2]:jetPt[3]:jetPt[4]:jetPt[5]:mbbMin:dRbbMin:sphericity:aplanarity:foxWolfram[0]:foxWolfram[1]:foxWolfram[2]:foxWolfram[3]:mTop[0]:ptTTbar:mTTbar:dRbbTop:chi2:", TMVA::Types::kFisher, "Fisher_Cat2","H:!V:Fisher"); mcategory_Fisher->AddMethod("status < 0 && nBJets == 2", "nJets:ht:jetPt[0]:jetPt[1]:jetPt[2]:jetPt[3]:jetPt[4]:jetPt[5]:mbbMin:dRbbMin:sphericity:aplanarity:foxWolfram[0]:foxWolfram[1]:foxWolfram[2]:foxWolfram[3]:", TMVA::Types::kFisher, "Fisher_Cat3","H:!V:Fisher"); mcategory_Fisher->AddMethod("status < 0 && nBJets > 2", "nJets:ht:jetPt[0]:jetPt[1]:jetPt[2]:jetPt[3]:jetPt[4]:jetPt[5]:mbbMin:dRbbMin:sphericity:aplanarity:foxWolfram[0]:foxWolfram[1]:foxWolfram[2]:foxWolfram[3]:", TMVA::Types::kFisher, "Fisher_Cat4","H:!V:Fisher"); // specify the training methods //factory->BookMethod(TMVA::Types::kFisher,"Fisher"); //factory->BookMethod(TMVA::Types::kBDT,"BDT_GRAD_2000","NTrees=2000:BoostType=Grad:Shrinkage=0.1"); factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); outf->Close(); }
void Training_BDT(TString channel, bool isZHvsOther, TString cat){ TString name; if (isZHvsOther) name="ZHvsOther"; else name="TTvsDY"; if (channel=="Mu") name=name+"_Mu"; else name=name+"_El"; if (cat=="2j")name=name+"_2j"; else if(cat=="2j") name=name+"_3j"; else name=name+"_noCat"; TMVA::Tools::Instance(); TFile* outputFile = TFile::Open( "outputtrainning"+name+".root", "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "BDT_NN_trainning"+name, outputFile,"!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );; //TFile *input = TFile::Open( "study_histo.root" ); TFile *inputTT = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/TTFullLept_Summer12_final_skimed_llbbX_withWeights_V3.root" ); TFile *inputDYM10 = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/DYjets_M10to50_Summer12_final_skimed_llbbX_withWeights_V3.root" ); TFile *inputDY = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/DYjets_Summer12_final_skimed_llbbX_withWeights_V3.root" ); TFile *inputZZ = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/ZZ_Summer12_final_skimed_llbbX_withWeights_V3.root" ); TFile *inputZH = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/ZH125_Summer12_final_skimed_llbbX_withWeights_V3.root" ); TFile *inputWW = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/WW_Summer12_final_skimed_llbbX_withWeights_V3.root" ); TFile *inputWZ = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/WZ_Summer12_final_skimed_llbbX_withWeights_V3.root" ); TFile *inputWt = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/Wt_Summer12_final_skimed_llbbX_withWeights_V3.root" ); TFile *inputWtbar = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/Wtbar_Summer12_final_skimed_llbbX_withWeights_V3.root" ); TFile *inputTTSemi = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/TTSemiLept_Summer12_final_skimed_llbbX_withWeights_V3.root" ); TTree *signal = (TTree*)inputZH->Get("rds_zbb"); TTree *backgroundDY = (TTree*)inputDY->Get("rds_zbb"); TTree *backgroundZZ = (TTree*)inputZZ->Get("rds_zbb"); TTree *backgroundTT = (TTree*)inputTT->Get("rds_zbb"); TTree *backgroundDYM10 = (TTree*)inputDYM10->Get("rds_zbb"); TTree *backgroundWW = (TTree*)inputWW->Get("rds_zbb"); TTree *backgroundWZ = (TTree*)inputWZ->Get("rds_zbb"); TTree *backgroundWt = (TTree*)inputWt->Get("rds_zbb"); TTree *backgroundWtbar = (TTree*)inputWtbar->Get("rds_zbb"); TTree *backgroundTTSemi = (TTree*)inputTTSemi->Get("rds_zbb"); if (isZHvsOther ){ factory->AddSignalTree( signal , 1.2); factory->AddBackgroundTree( backgroundDY, 0.54); factory->AddBackgroundTree( backgroundZZ, 0.1); factory->AddBackgroundTree( backgroundDYM10, 0.02); factory->AddBackgroundTree( backgroundTT, 0.3); factory->AddBackgroundTree(backgroundWW , 0.005); factory->AddBackgroundTree(backgroundWZ , 0.005); factory->AddBackgroundTree(backgroundWt , 0.01); factory->AddBackgroundTree(backgroundWtbar ,0.01 ); factory->AddBackgroundTree(backgroundTTSemi ,0.01 ); } else{ factory->AddSignalTree( backgroundDY , 1.0); factory->AddBackgroundTree( backgroundTT, 1.0); } if (isZHvsOther ){ factory->AddVariable("MinusLogW_ZH_cor3", 'F'); factory->AddVariable("MinusLogW_ZH_cor0", 'F'); factory->AddVariable("MinusLogW_ZZ_cor3", 'F'); factory->AddVariable("MinusLogW_ZZ_cor0", 'F'); factory->AddVariable("MinusLogW_TT", 'F'); factory->AddVariable("MinusLogW_gg_Zbb", 'F'); factory->AddVariable("MinusLogW_qq_Zbb", 'F'); } else{ factory->AddVariable("MinusLogW_TT", 'F'); factory->AddVariable("MinusLogW_gg_Zbb", 'F'); factory->AddVariable("MinusLogW_qq_Zbb", 'F'); } TCut mycuts, mycutb; if(isZHvsOther){ if(channel=="Mu"){ if(cat=="2j"){ mycuts = "(MinusLogW_ZH_cor3 > 2 && MinusLogW_ZH_cor0 > 2 &&MinusLogW_ZZ_cor0> 2 &&MinusLogW_ZZ_cor3> 2 &&MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_ZH_cor3<50 && MinusLogW_ZH_cor0<50 &&MinusLogW_ZZ_cor0<50 &&MinusLogW_ZZ_cor3<50 &&MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_6_idx && eventSelectionmu1pt_inc>20 && eventSelectionmu2pt_inc>20 && eventSelectiondilepM_inc > 76 && eventSelectiondilepM_inc < 106 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30&& jetmetnj==2 && (eventSelectiondijetM_inc > 80 && eventSelectiondijetM_inc < 150))"; mycutb = "(MinusLogW_ZH_cor3 > 2 && MinusLogW_ZH_cor0 > 2 &&MinusLogW_ZZ_cor0> 2 &&MinusLogW_ZZ_cor3> 2 &&MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_ZH_cor3<50 && MinusLogW_ZH_cor0<50 &&MinusLogW_ZZ_cor0<50 &&MinusLogW_ZZ_cor3<50 &&MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_6_idx && eventSelectionmu1pt_inc>20 && eventSelectionmu2pt_inc>20 && eventSelectiondilepM_inc > 76 && eventSelectiondilepM_inc < 106 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30&& jetmetnj==2 && (eventSelectiondijetM_inc > 80 && eventSelectiondijetM_inc < 150))"; } else{ mycuts = "(MinusLogW_ZH_cor3 > 2 && MinusLogW_ZH_cor0 > 2 &&MinusLogW_ZZ_cor0> 2 &&MinusLogW_ZZ_cor3> 2 &&MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_ZH_cor3<50 && MinusLogW_ZH_cor0<50 &&MinusLogW_ZZ_cor0<50 &&MinusLogW_ZZ_cor3<50 &&MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_6_idx && eventSelectionmu1pt_inc>20 && eventSelectionmu2pt_inc>20 && eventSelectiondilepM_inc > 76 && eventSelectiondilepM_inc < 106 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30&& jetmetnj>2 && (eventSelectiondijetM_inc > 50 && eventSelectiondijetM_inc < 150))"; mycutb = "(MinusLogW_ZH_cor3 > 2 && MinusLogW_ZH_cor0 > 2 &&MinusLogW_ZZ_cor0> 2 &&MinusLogW_ZZ_cor3> 2 &&MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_ZH_cor3<50 && MinusLogW_ZH_cor0<50 &&MinusLogW_ZZ_cor0<50 &&MinusLogW_ZZ_cor3<50 &&MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_6_idx && eventSelectionmu1pt_inc>20 && eventSelectionmu2pt_inc>20 && eventSelectiondilepM_inc > 76 && eventSelectiondilepM_inc < 106 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30&& jetmetnj>2 && (eventSelectiondijetM_inc > 50 && eventSelectiondijetM_inc < 150))"; } } else { if(cat=="2j"){ mycuts = "(MinusLogW_ZH_cor3 > 2 && MinusLogW_ZH_cor0 > 2 &&MinusLogW_ZZ_cor0> 2 &&MinusLogW_ZZ_cor3> 2 &&MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_ZH_cor3<50 && MinusLogW_ZH_cor0<50 &&MinusLogW_ZZ_cor0<50 &&MinusLogW_ZZ_cor3<50 &&MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_16_idx && eventSelectionel1pt_inc>20 && eventSelectionel2pt_inc>20 && eventSelectiondilepM_inc > 76 && eventSelectiondilepM_inc < 106 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30&& jetmetnj==2 && (eventSelectiondijetM_inc > 80 && eventSelectiondijetM_inc < 150))"; mycutb = "(MinusLogW_ZH_cor3 > 2 && MinusLogW_ZH_cor0 > 2 &&MinusLogW_ZZ_cor0> 2 &&MinusLogW_ZZ_cor3> 2 &&MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_ZH_cor3<50 && MinusLogW_ZH_cor0<50 &&MinusLogW_ZZ_cor0<50 &&MinusLogW_ZZ_cor3<50 &&MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_16_idx && eventSelectionel1pt_inc>20 && eventSelectionel2pt_inc>20 && eventSelectiondilepM_inc > 76 && eventSelectiondilepM_inc < 106 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30&& jetmetnj==2 && (eventSelectiondijetM_inc > 80 && eventSelectiondijetM_inc < 150))"; } else{ mycuts = "(MinusLogW_ZH_cor3 > 2 && MinusLogW_ZH_cor0 > 2 &&MinusLogW_ZZ_cor0> 2 &&MinusLogW_ZZ_cor3> 2 &&MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_ZH_cor3<50 && MinusLogW_ZH_cor0<50 &&MinusLogW_ZZ_cor0<50 &&MinusLogW_ZZ_cor3<50 &&MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_16_idx && eventSelectionel1pt_inc>20 && eventSelectionel2pt_inc>20 && eventSelectiondilepM_inc > 76 && eventSelectiondilepM_inc < 106 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30&& jetmetnj>2 && (eventSelectiondijetM_inc > 50 && eventSelectiondijetM_inc < 150))"; mycutb = "(MinusLogW_ZH_cor3 > 2 && MinusLogW_ZH_cor0 > 2 &&MinusLogW_ZZ_cor0> 2 &&MinusLogW_ZZ_cor3> 2 &&MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_ZH_cor3<50 && MinusLogW_ZH_cor0<50 &&MinusLogW_ZZ_cor0<50 &&MinusLogW_ZZ_cor3<50 &&MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_16_idx && eventSelectionel1pt_inc>20 && eventSelectionel2pt_inc>20 && eventSelectiondilepM_inc > 76 && eventSelectiondilepM_inc < 106 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30&& jetmetnj>2 && (eventSelectiondijetM_inc > 50 && eventSelectiondijetM_inc < 150))"; } } } else{ if(channel=="Mu"){ mycuts = "(MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_6_idx && eventSelectionmu1pt_inc>20 && eventSelectionmu2pt_inc>20 && eventSelectiondilepM_inc > 60 && eventSelectiondilepM_inc < 120 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30)"; mycutb = "(MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_6_idx && eventSelectionmu1pt_inc>20 && eventSelectionmu2pt_inc>20 && eventSelectiondilepM_inc > 60 && eventSelectiondilepM_inc < 120 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30)"; } else { mycuts = "(MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_16_idx && eventSelectionel1pt_inc>20 && eventSelectionel2pt_inc>20 && eventSelectiondilepM_inc > 60 && eventSelectiondilepM_inc < 120 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30)"; mycutb = "(MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_16_idx && eventSelectionel1pt_inc>20 && eventSelectionel2pt_inc>20 && eventSelectiondilepM_inc > 60 && eventSelectiondilepM_inc < 120 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30)"; } } factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); if (isZHvsOther)factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=40:nEventsMin=600:MaxDepth=10:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); else factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=50:nEventsMin=1000:MaxDepth=20:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); if (isZHvsOther)factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=1000:HiddenLayers=N+6:TestRate=5:!UseRegulator" ); else factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=N+2:TestRate=5:!UseRegulator" ); // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch() ) TMVAGui( "outputtrainning"+name+".root" ); }
void TMVARegression( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVARegression.C\(\"myMethod1,myMethod2,myMethod3\"\) // //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDEFoam"] = 1; Use["KNN"] = 1; // // --- Linear Discriminant Analysis Use["LD"] = 1; // // --- Function Discriminant analysis Use["FDA_GA"] = 1; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; // // --- Neural Network Use["MLP"] = 1; // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 0; Use["BDTG"] = 1; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVARegression" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a new root output file TString outfileName( "TMVAReg.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVARegression", outputFile, "!V:!Silent:Color:DrawProgressBar" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory->AddVariable( "var1", "Variable 1", "units", 'F' ); factory->AddVariable( "var2", "Variable 2", "units", 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables factory->AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ); factory->AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ); // Add the variable carrying the regression target factory->AddTarget( "fvalue" ); // It is also possible to declare additional targets for multi-dimensional regression, ie: // -- factory->AddTarget( "fvalue2" ); // BUT: this is currently ONLY implemented for MLP // Read training and test data (see TMVAClassification for reading ASCII files) // load the signal and background event samples from ROOT trees TFile *input(0); TString fname = "./tmva_reg_example.root"; if (!gSystem->AccessPathName( fname )) input = TFile::Open( fname ); // check if file in local directory exists else input = TFile::Open( "http://root.cern.ch/files/tmva_reg_example.root" ); // if not: download from ROOT server if (!input) { std::cout << "ERROR: could not open data file" << std::endl; exit(1); } std::cout << "--- TMVARegression : Using input file: " << input->GetName() << std::endl; // --- Register the regression tree TTree *regTree = (TTree*)input->Get("TreeR"); // global event weights per tree (see below for setting event-wise weights) Double_t regWeight = 1.0; // You can add an arbitrary number of regression trees factory->AddRegressionTree( regTree, regWeight ); // This would set individual event weights (the variables defined in the // expression need to exist in the original TTree) factory->SetWeightExpression( "var1", "Regression" ); // Apply additional cuts on the signal and background samples (can be different) TCut mycut = ""; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1"; // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycut, "nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents:!V" ); // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // ---- Book MVA methods // // please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // PDE - RS method if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" ); // And the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // Linear discriminant if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "!H:!V:VarTransform=None" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); // Neural network (MLP) if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" ); // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDT"]) factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:MinNodeSize=1.0%:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" ); if (Use["BDTG"]) factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000::BoostType=Grad:Shrinkage=0.1:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=3:MaxDepth=4" ); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVARegression is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVARegGui( outfileName ); }
void ZTMVAClassification_norm( TString myMethodList = "" ) { //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 0; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 0; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 1; // uses Adaptive Boost Use["BDTG"] = 1; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // ADD IN OUR VARIABLES HERE factory->AddVariable("log(kaon_IPCHI2_OWNPV)", "log(kaon_IPCHI2_OWNPV)", "", 'D'); factory->AddVariable("kaon_TRACK_GhostProb", "kaon_TRACK_GhostProb", "", 'D'); factory->AddVariable("kaon_PT", "kaon_PT", "", 'D'); factory->AddVariable("log(proton_IPCHI2_OWNPV)", "log(proton_IPCHI2_OWNPV)", "", 'D'); factory->AddVariable("proton_TRACK_GhostProb", "proton_TRACK_GhostProb", "", 'D'); factory->AddVariable("proton_PT", "proton_PT", "", 'D'); factory->AddVariable("muminus_ProbNNmu", "muminus_ProbNNmu", "", 'D'); factory->AddVariable("muminus_TRACK_GhostProb", "muminus_TRACK_GhostProb", "", 'D'); factory->AddVariable("muplus_ProbNNmu", "muplus_ProbNNmu", "", 'D'); factory->AddVariable("muplus_TRACK_GhostProb", "muplus_TRACK_GhostProb", "", 'D'); factory->AddVariable("Lambda_b0_DTF_CHI2NDOF", "Lambda_b0_DTF_CHI2NDOF", "", 'D'); factory->AddVariable("log(Lambda_b0_IPCHI2_OWNPV)", "log(Lambda_b0_IPCHI2_OWNPV)", "", 'D'); factory->AddVariable("Lambda_b0_PT", "Lambda_b0_PT", "", 'D'); factory->AddVariable("Lambda_b0_FDS", "Lambda_b0_FDS", "", 'D'); // TFile * input_Background = new TFile("../back.root"); TFile * input_Signal = new TFile("/afs/cern.ch/work/a/apmorris/private/cern/ntuples/new_tuples/normalisation_samples/reduced_Lb2JpsipK_MC_2011_2012_norm.root"); TFile * input_Background = new TFile("/afs/cern.ch/work/a/apmorris/private/cern/ntuples/new_tuples/normalisation_samples/background.root"); std::cout << "--- TMVAClassification : Using input file for signal : " << input_Signal->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input file for background : " << input_Background->GetName() << std::endl; // --- Register the training and test trees TTree *signal = (TTree*)input_Signal->Get("DecayTree"); TTree *background = (TTree*)input_Background->Get("DecayTree"); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]){ // factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); //factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=Norm:NCycles=600:HiddenLayers=N+5:TestRate=5" ); // factory->BookMethod( TMVA::Types::kMLP, "MLPCE", "H:!V:NeuronType=sigmoid:VarTransform=Norm:NCycles=600:HiddenLayers=N+5:TestRate=5:EstimatorType=CE" ); factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=sigmoid:VarTransform=Norm:NCycles=600:HiddenLayers=9:TestRate=5:EstimatorType=CE" ); // factory->BookMethod( TMVA::Types::kMLP, "MLPCE83", "H:!V:NeuronType=tanh:VarTransform=Norm:NCycles=600:HiddenLayers=8,3:TestRate=5:EstimatorType=CE" ); } if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) { // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5" ); // factory->BookMethod( TMVA::Types::kBDT, "BDTGI", // "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5:SeparationType=GiniIndexWithLaplace" ); // factory->BookMethod( TMVA::Types::kBDT, "BDTG6", // "!H:!V:NTrees=600:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.4:nCuts=20:NNodesMax=6" ); //factory->BookMethod( TMVA::Types::kBDT, "BDTG2", // "!H:!V:NTrees=800:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.4:nCuts=20:NNodesMax=6" ); factory->BookMethod( TMVA::Types::kBDT, "BDTG3", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.4:nCuts=20:NNodesMax=6" ); // factory->BookMethod( TMVA::Types::kBDT, "BDTG4", // "!H:!V:NTrees=1200:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.4:nCuts=20:NNodesMax=6" ); // factory->BookMethod( TMVA::Types::kBDT, "BDTG5", // "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.4:nCuts=20:NNodesMax=5" ); } if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:nEventsMin=150:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","GA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros // if (!gROOT->IsBatch()) TMVAGui( outfileName ); }