void TMVAtest(){ //gSystem->Load("../lib/slc5_amd64_gcc462/libTAMUWWMEPATNtuple.so"); gSystem->Load("libPhysics"); //gSystem->Load("EvtTreeForAlexx_h.so"); gSystem->Load("libTMVA.1"); gSystem->Load("AutoDict_vector_TLorentzVector__cxx.so"); TMVA::Tools::Instance(); TFile* outputFile = TFile::Open("TMVA1.root", "RECREATE"); TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification",outputFile,"V=true:Color:DrawProgressBar");// ":Transformations=I;D;P;G,D" ); TFile* signal = TFile::Open("/uscms_data/d2/aperloff/Spring12ME7TeV/MEResults/microNtuples_oldStructure/microWW_EPDv01.root"); TFile* bkg = TFile::Open("/uscms_data/d2/aperloff/Spring12ME7TeV/MEResults/microNtuples_oldStructure/microWJets_EPDv01.root"); TTree* stree = (TTree*)signal->Get("METree"); TTree* btree = (TTree*)bkg->Get("METree"); factory->AddSignalTree(stree,1.0); factory->AddBackgroundTree(btree,1.0); factory->SetSignalWeightExpression("1.0"); factory->SetBackgroundWeightExpression("1.0"); factory->AddVariable("tEventProb[0]"); factory->AddVariable("tEventProb[1]"); factory->AddVariable("tEventProb[2]"); //factory->AddVariable("tEventProb0 := tEventProb[0]",'F'); //factory->AddVariable("tEventProb1 := tEventProb[1]",'F'); //factory->AddVariable("tEventProb2 := tEventProb[2]",'F'); TCut test("Entry$>-2 && jLV[1].Pt()>30"); TCut mycuts (test); factory->PrepareTrainingAndTestTree(mycuts,mycuts,"nTrain_Signal=0:nTrain_Background=0:nTest_Signal=0:nTest_Background=0:SplitMode=Random:NormMode=None:V=true:VerboseLevel=DEBUG"); factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); outputFile->Close(); }
void TMVA_stop( TString signal_name = "T2tt", int train_region = 1, float x_parameter = 0.25) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVA_stop.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVA_stop.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //----------------------------------------------------- // define event selection (store in TCut sel) //----------------------------------------------------- TCut njets4("mini_njets>=4"); TCut met100("mini_met>=100"); TCut mt120("mini_mt>=120"); TCut nb1("mini_nb>=1"); TCut isotrk("mini_passisotrk==1"); TCut lep_pt30("mini_nlep>=1 && mini_lep1pt>30.0"); TCut sig("mini_sig==1"); TCut sel0 = njets4 + met100 + mt120 + nb1 + isotrk + lep_pt30 + sig; cout << "Using selection : " << sel0.GetTitle() << endl; cout << "Doing signal point : " << train_region << endl; //----------------------------------------------------- // choose which variables to include in MVA training //----------------------------------------------------- std::map<std::string,int> mvaVar; mvaVar[ "met" ] = 1; mvaVar[ "lep1pt" ] = 0; mvaVar[ "mt2w" ] = 1; mvaVar[ "htratiom" ] = 1; mvaVar[ "chi2" ] = 1; mvaVar[ "dphimjmin" ] = 1; mvaVar[ "pt_b" ] = 0; mvaVar[ "nb" ] = 0; mvaVar[ "pt_J1" ] = 0; mvaVar[ "pt_J2" ] = 0; mvaVar[ "rand" ] = 0; mvaVar[ "mt" ] = 0; mvaVar[ "mt2bl" ] = 0; mvaVar[ "mt2b" ] = 0; mvaVar[ "lep1eta" ] = 0; mvaVar[ "thrjetlm" ] = 0; mvaVar[ "apljetlm" ] = 0; mvaVar[ "sphjetlm" ] = 0; mvaVar[ "cirjetlm" ] = 0; mvaVar[ "chi2min" ] = 0; mvaVar[ "chi2min_mt2b" ] = 0; mvaVar[ "chi2min_mt2bl" ] = 0; mvaVar[ "chi2min_mt2w" ] = 0; mvaVar[ "mt2bmin" ] = 0; mvaVar[ "mt2blmin" ] = 0; mvaVar[ "mt2wmin_chi2" ] = 0; mvaVar[ "mt2bmin_chi2" ] = 0; mvaVar[ "mt2blmin_chi2" ] = 0; mvaVar[ "mt2wmin_chi2prob" ] = 0; mvaVar[ "mt2bmin_chi2prob" ] = 0; mvaVar[ "mt2blmin_chi2prob" ] = 0; mvaVar[ "htratiol" ] = 0; mvaVar[ "dphimj1" ] = 0; mvaVar[ "dphimj2" ] = 0; mvaVar[ "metsig" ] = 0; //--------------------------------- //choose bkg samples to include //--------------------------------- cout << "Background trees: " << endl; int n_backgrounds = 8; TString backgrounds[] = {"ttdl_powheg", "ttsl_powheg", "w1to4jets", "tW_lep", "triboson", "diboson", "ttV", "DY1to4Jtot" }; TString bkgPath = "/nfs-3/userdata/stop/Train/V00-02-18__V00-03-00_4jetsMET100_bkg/"; TChain* chBackground = new TChain("t"); for (int i = 0; i < n_backgrounds; i++) { TString backgroundChain = bkgPath + "/" + backgrounds[i] + ".root"; cout << " " << backgroundChain << endl; chBackground ->Add(backgroundChain ); } //--------------------------------- //choose signal sample to include //--------------------------------- cout << "Signal trees: " << endl; TString s_train_region = ""; s_train_region += train_region; TString s_x_parameter = ""; s_x_parameter = Form("%.2f",x_parameter); TString signalPath = "/nfs-3/userdata/stop/Train/"; TString signalVersion = "V00-02-18__V00-03-00_4jetsMET100_"; TChain *chSignal = new TChain("t"); TString base_name = signalPath + "/" + signalVersion + signal_name + "/" + signal_name + "_" + s_train_region; if (signal_name == "T2bw") base_name = base_name + "_" + s_x_parameter; TString signalChain = base_name + ".root" ; cout << " " << signalChain << endl; chSignal->Add(signalChain); //----------------------------------------------------- // choose backgrounds to include for multiple outputs //----------------------------------------------------- // bool doMultipleOutputs = false; // TChain *chww = new TChain("Events"); // chww->Add(Form("%s/WWTo2L2Nu_PU_testFinal_baby.root",babyPath)); // chww->Add(Form("%s/GluGluToWWTo4L_PU_testFinal_baby.root",babyPath)); // TChain *chwjets = new TChain("Events"); // chwjets->Add(Form("%s/WJetsToLNu_PU_testFinal_baby.root",babyPath)); // TChain *chtt = new TChain("Events"); // chtt->Add(Form("%s/TTJets_PU_testFinal_baby.root",babyPath)); // std::map<std::string,int> includeBkg; // includeBkg["ww"] = 1; // includeBkg["wjets"] = 0; // includeBkg["tt"] = 0; //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 0; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 0; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 1; // uses Adaptive Boost Use["BDT1"] = 0; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // // --- multi-output MVA's Use["multi_BDTG"] = 0; Use["multi_MLP"] = 0; Use["multi_FDA_GA"] = 0; // // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName = "TMVA_" + signal_name + "_" + s_train_region; if (signal_name == "T2bw") outfileName = outfileName +"_" + s_x_parameter; outfileName += ".root"; TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TString classification_name = "classification_" + signal_name + "_" + s_train_region; if (signal_name == "T2bw") classification_name = classification_name +"_" + s_x_parameter; /* TString multioutfileName( "TMVA_HWW_multi.root" ); TFile* multioutputFile; if( doMultipleOutputs ) multioutputFile = TFile::Open( multioutfileName, "RECREATE" ); */ // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( classification_name, outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); /* TMVA::Factory *multifactory; if( doMultipleOutputs ) multifactory= new TMVA::Factory( "TMVAMulticlass", multioutputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=multiclass" ); */ // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] //factory->AddVariable( "myvar1 := var1+var2", 'F' ); //factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ); //factory->AddVariable( "var3", "Variable 3", "units", 'F' ); //factory->AddVariable( "var4", "Variable 4", "units", 'F' ); //-------------------------------------------------------- // choose which variables to include in training //-------------------------------------------------------- if( mvaVar[ "met" ] == 1 ) factory->AddVariable( "mini_met" , "E_{T}^{miss}" , "GeV", 'F' ); if( mvaVar[ "mt" ] == 1 ) factory->AddVariable( "mini_mt" , "M_{T}" , "GeV", 'F' ); if( mvaVar[ "mt2w" ] == 1 ) factory->AddVariable( "mini_mt2w" , "MT2W" , "GeV", 'F' ); if( mvaVar[ "mt2bl" ] == 1 ) factory->AddVariable( "mini_mt2bl" , "MT2bl" , "GeV", 'F' ); if( mvaVar[ "mt2b" ] == 1 ) factory->AddVariable( "mini_mt2b" , "MT2b" , "GeV", 'F' ); if( mvaVar[ "chi2" ] == 1 ) factory->AddVariable( "mini_chi2" , "chi2" , "" , 'F' ); if( mvaVar[ "lep1pt" ] == 1 ) factory->AddVariable( "mini_lep1pt" , "lepton pt" , "" , 'F' ); if( mvaVar[ "lep1eta" ] == 1 ) factory->AddVariable( "mini_lep1eta" , "lepton eta" , "" , 'F' ); if( mvaVar[ "thrjetlm" ] == 1 ) factory->AddVariable( "mini_thrjetlm" , "thrust" , "" , 'F' ); if( mvaVar[ "apljetlm" ] == 1 ) factory->AddVariable( "mini_apljetlm" , "aplanarity" , "" , 'F' ); if( mvaVar[ "sphjetlm" ] == 1 ) factory->AddVariable( "mini_sphjetlm" , "sphericity" , "" , 'F' ); if( mvaVar[ "cirjetlm" ] == 1 ) factory->AddVariable( "mini_cirjetlm" , "circularity" , "" , 'F' ); if( mvaVar[ "chi2min" ] == 1 ) factory->AddVariable( "mini_min(chi2min,100)" , "#chi^{2}_{min}" , "" , 'F' ); if( mvaVar[ "chi2minprob" ] == 1 ) factory->AddVariable( "mini_chi2minprob" , "Prob(#chi^{2}_{min})" , "" , 'F' ); if( mvaVar[ "chi2min_mt2b" ] == 1 ) factory->AddVariable( "mini_chi2min_mt2b" , "MT2b(#chi^{2}_{min})" , "" , 'F' ); if( mvaVar[ "chi2min_mt2bl" ] == 1 ) factory->AddVariable( "mini_chi2min_mt2bl" , "MT2bl(#chi^{2}_{min})" , "" , 'F' ); if( mvaVar[ "chi2min_mt2w" ] == 1 ) factory->AddVariable( "mini_chi2min_mt2w" , "MT2W(#chi^{2}_{min})" , "" , 'F' ); if( mvaVar[ "mt2bmin" ] == 1 ) factory->AddVariable( "mini_mt2bmin" , "MT2b_{min}" , "" , 'F' ); if( mvaVar[ "mt2blmin" ] == 1 ) factory->AddVariable( "mini_mt2blmin" , "MT2bl_{min}" , "" , 'F' ); if( mvaVar[ "mt2wmin" ] == 1 ) factory->AddVariable( "mini_mt2wmin" , "MT2W_{min}" , "" , 'F' ); if( mvaVar[ "mt2bmin_chi2" ] == 1 ) factory->AddVariable( "min(mt2bmin_chi2,100)" , "#chi^{2}(MT2b_{min})" , "" , 'F' ); if( mvaVar[ "mt2blmin_chi2" ] == 1 ) factory->AddVariable( "min(mt2blmin_chi2,100)" , "#chi^{2}(MT2bl_{min})" , "" , 'F' ); if( mvaVar[ "mt2wmin_chi2" ] == 1 ) factory->AddVariable( "min(mt2wmin_chi2,100)" , "#chi^{2}(MT2W_{min})" , "" , 'F' ); if( mvaVar[ "mt2bmin_chi2prob" ] == 1 ) factory->AddVariable( "mt2bmin_chi2prob" , "Prob(#chi^{2}(MT2b_{min}))" , "" , 'F' ); if( mvaVar[ "mt2blmin_chi2prob" ] == 1 ) factory->AddVariable( "mt2blmin_chi2prob" , "Prob(#chi^{2}(MT2bl_{min}))" , "" , 'F' ); if( mvaVar[ "mt2wmin_chi2prob" ] == 1 ) factory->AddVariable( "mt2wmin_chi2prob" , "Prob(#chi^{2}(MT2W_{min}))" , "" , 'F' ); if( mvaVar[ "htratiol" ] == 1 ) factory->AddVariable( "mini_htssl/(mini_htosl+mini_htssl)" , "H_{T}^{SSL}/H_{T}" , "" , 'F' ); if( mvaVar[ "htratiom" ] == 1 ) factory->AddVariable( "mini_htssm/(mini_htosm+mini_htssm)" , "H_{T}^{SSM}/H_{T}" , "" , 'F' ); if( mvaVar[ "dphimj1" ] == 1 ) factory->AddVariable( "mini_dphimj1" , "#Delta#phi(j1,E_{T}^{miss})", "" , 'F' ); if( mvaVar[ "dphimj2" ] == 1 ) factory->AddVariable( "mini_dphimj2" , "#Delta#phi(j2,E_{T}^{miss})", "" , 'F' ); if( mvaVar[ "dphimjmin" ] == 1 ) factory->AddVariable( "mini_dphimjmin" , "min(#Delta#phi(j_{1,2},E_{T}^{miss}))", "" , 'F' ); if( mvaVar[ "rand" ] == 1 ) factory->AddVariable( "mini_rand" , "random(0,1)" , "" , 'F' ); if( mvaVar[ "metsig" ] == 1 ) factory->AddVariable( "met/sqrt(htosl+htssl)" , "E_{T}^{miss}/#sqrt{H_{T}}" , "#sqrt{GeV}" , 'F' ) ; if( mvaVar[ "pt_b" ] == 1 ) factory->AddVariable( "mini_pt_b" , "P_T(b) GeV" , 'F' ); if( mvaVar[ "nb" ] == 1 ) factory->AddVariable( "mini_nb" , "P_T(b) GeV" , 'F' ); if( mvaVar[ "pt_J1" ] == 1 ) factory->AddVariable( "pt_J1" , "P_T(J1) GeV" , 'F' ); if( mvaVar[ "pt_J2" ] == 1 ) factory->AddVariable( "pt_J2" , "P_T(J2) GeV" , 'F' ); /* if( doMultipleOutputs ){ if (mvaVar["lephard_pt"]) multifactory->AddVariable( "lephard_pt", "1st lepton pt", "GeV", 'F' ); if (mvaVar["lepsoft_pt"]) multifactory->AddVariable( "lepsoft_pt", "2nd lepton pt", "GeV", 'F' ); if (mvaVar["dil_dphi"]) multifactory->AddVariable( "dil_dphi", "dphi(ll)", "", 'F' ); if (mvaVar["dil_mass"]) multifactory->AddVariable( "dil_mass", "M(ll)", "GeV", 'F' ); if (mvaVar["event_type"]) multifactory->AddVariable( "event_type", "Dil Flavor Type", "", 'F' ); if (mvaVar["met_projpt"]) multifactory->AddVariable( "met_projpt", "Proj. MET", "GeV", 'F' ); if (mvaVar["met_pt"]) multifactory->AddVariable( "met_pt", "MET", "GeV", 'F' ); if (mvaVar["mt_lephardmet"]) multifactory->AddVariable( "mt_lephardmet", "MT(lep1,MET)", "GeV", 'F' ); if (mvaVar["mt_lepsoftmet"]) multifactory->AddVariable( "mt_lepsoftmet", "MT(lep2,MET)", "GeV", 'F' ); if (mvaVar["mthiggs"]) multifactory->AddVariable( "mthiggs", "MT(Higgs)", "GeV", 'F' ); if (mvaVar["dphi_lephardmet"]) multifactory->AddVariable( "dphi_lephardmet", "dphi(lep1,MET)", "GeV", 'F' ); if (mvaVar["dphi_lepsoftmet"]) multifactory->AddVariable( "dphi_lepsoftmet", "dphi(lep2,MET)", "GeV", 'F' ); if (mvaVar["lepsoft_fbrem"]) multifactory->AddVariable( "lepsoft_fbrem", "2nd lepton f_{brem}", "", 'F' ); if (mvaVar["lepsoft_eOverPIn"]) multifactory->AddVariable( "lepsoft_eOverPIn", "2nd lepton E/p", "", 'F' ); if (mvaVar["lepsoft_qdphi"]) multifactory->AddVariable( "lepsoft_q * lepsoft_dPhiIn", "2nd lepton q#times#Delta#phi", "", 'F' ); } */ // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables //factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); //factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // TTree* signalTrainingTree = (TTree*) chSignalTrain; // TTree* signalTestTree = (TTree*) chSignalTest; // // TTree* bkgTrainingTree = (TTree*) chBkgTrain; // TTree* bkgTestTree = (TTree*) chBkgTest; // std::cout << "--- TMVAClassification : Using bkg input files: -------------------" << std::endl; // // TObjArray *listOfBkgFiles = chbackground->GetListOfFiles(); // TIter bkgFileIter(listOfBkgFiles); // TChainElement* currentBkgFile = 0; // // while((currentBkgFile = (TChainElement*)bkgFileIter.Next())) { // std::cout << currentBkgFile->GetTitle() << std::endl; // } // // std::cout << "--- TMVAClassification : Using sig input files: -------------------" << std::endl; // // TObjArray *listOfSigFiles = chsignal->GetListOfFiles(); // TIter sigFileIter(listOfSigFiles); // TChainElement* currentSigFile = 0; // // while((currentSigFile = (TChainElement*)sigFileIter.Next())) { // std::cout << currentSigFile->GetTitle() << std::endl; // } // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // You can add an arbitrary number of signal or background trees // factory->AddSignalTree ( chSignal, signalWeight ); // factory->AddBackgroundTree( chBackground, backgroundWeight ); factory->AddTree(chSignal, "Signal", signalWeight, sel0+"mini_rand < 0.5", "train"); factory->AddTree(chSignal, "Signal", signalWeight, sel0+"mini_rand >= 0.5", "test"); factory->AddTree(chBackground, "Background", backgroundWeight, sel0+"mini_rand < 0.5", "train"); factory->AddTree(chBackground, "Background", backgroundWeight, sel0+"mini_rand >= 0.5", "test"); // To give different trees for training and testing, do as follows: //factory->AddSignalTree( signalTrainingTree, signalWeight, "Training" ); //factory->AddSignalTree( signalTestTree, signalWeight, "Test" ); //factory->AddBackgroundTree( bkgTrainingTree, backgroundWeight, "Training" ); //factory->AddBackgroundTree( bkgTestTree, backgroundWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // // // --- begin ---------------------------------------------------------- // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables // Float_t treevars[4], weight; // // // Signal // for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<signal->GetEntries(); i++) { // signal->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight ); // else factory->AddSignalTestEvent ( vars, signalWeight ); // } // // // Background (has event weights) // background->SetBranchAddress( "weight", &weight ); // for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<background->GetEntries(); i++) { // background->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight ); // else factory->AddBackgroundTestEvent ( vars, backgroundWeight*weight ); // } // // --- end ------------------------------------------------------------ // // --- end of tree registration // Set individual event weights (the variables must exist in the original TTree) factory->SetSignalWeightExpression ("mini_weight"); factory->SetBackgroundWeightExpression("mini_weight"); /* if( doMultipleOutputs ){ multifactory->AddTree(signal,"Signal"); multifactory->SetSignalWeightExpression ("event_scale1fb"); multifactory->SetBackgroundWeightExpression("event_scale1fb"); multifactory->SetWeightExpression("event_scale1fb"); if( includeBkg["ww"] ){ TTree* ww = (TTree*) chww; multifactory->AddTree(ww,"WW"); cout << "Added WW to multi-MVA" << endl; } if( includeBkg["wjets"] ){ TTree* wjets = (TTree*) chwjets; multifactory->AddTree(wjets,"WJets"); cout << "Added W+jets to multi-MVA" << endl; } if( includeBkg["tt"] ){ TTree* tt = (TTree*) chtt; multifactory->AddTree(tt,"tt"); cout << "Added ttbar multi-MVA" << endl; } } */ // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = sel0; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = sel0; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); //Use random splitting // factory->PrepareTrainingAndTestTree( mycuts, mycutb, // "nTrain_Signal=100000:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); factory->PrepareTrainingAndTestTree( "", "", "nTrain_Signal=0:nTrain_Background=0:NormMode=None:!V" ); // if( doMultipleOutputs ){ // multifactory->PrepareTrainingAndTestTree( mycuts, mycutb, // "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // } //Use alternate splitting //(this is preferable since its easier to track which events were used for training, but the job crashes! need to fix this...) //factory->PrepareTrainingAndTestTree( mycuts, mycutb, // "nTrain_Signal=0:nTrain_Background=0:SplitMode=Alternate:NormMode=NumEvents:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); // factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:VarTransform=N:NCycles=1000:HiddenLayers=N+N:TestRate=5:!UseRegulator:LearningRate=0.2:DecayRate=0.001:BPMode=batch:BatchSize=500"); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDT1"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT1", "!H:!V:NTrees=200:nEventsMin=300:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=4:PruneMethod=NoPruning" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // if( doMultipleOutputs ){ // if (Use["multi_BDTG"]) // gradient boosted decision trees // multifactory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.50:nCuts=20:NNodesMax=8"); // if (Use["multi_MLP"]) // neural network // multifactory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:NCycles=1000:HiddenLayers=N+5,5:TestRate=5:EstimatorType=MSE"); // if (Use["multi_FDA_GA"]) // functional discriminant with GA minimizer // multifactory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); // } // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","GA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // if( doMultipleOutputs ){ // // Train nulti-MVAs using the set of training events // multifactory->TrainAllMethods(); // // ---- Evaluate all multi-MVAs using the set of test events // multifactory->TestAllMethods(); // // ----- Evaluate and compare performance of all configured multi-MVAs // multifactory->EvaluateAllMethods(); // } // -------------------------------------------------------------- // Save the output outputFile->Close(); //if( doMultipleOutputs ) multioutputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
int main( int argc, char** argv ) {//main std::string folder; if (argc > 1) { folder = argv[1]; } else { folder = "output_tmva/nunu/MET130/"; } bool useQCD = true; bool useOthers = false; bool useOthersAsSignal = true; //List of input signal files std::vector<std::string> sigfiles; //sigfiles.push_back("MC_VBF_HToZZTo4Nu_M-120"); sigfiles.push_back("MC_Powheg-Htoinv-mH125"); if (useOthersAsSignal) { sigfiles.push_back("MC_TTJets"); //powheg samples //sigfiles.push_back("MC_TT-v1"); //sigfiles.push_back("MC_TT-v2"); // sigfiles.push_back("MC_T-tW"); sigfiles.push_back("MC_Tbar-tW"); sigfiles.push_back("MC_SingleT-s-powheg-tauola"); sigfiles.push_back("MC_SingleTBar-s-powheg-tauola"); sigfiles.push_back("MC_SingleT-t-powheg-tauola"); sigfiles.push_back("MC_SingleTBar-t-powheg-tauola"); sigfiles.push_back("MC_WW-pythia6-tauola"); sigfiles.push_back("MC_WZ-pythia6-tauola"); sigfiles.push_back("MC_ZZ-pythia6-tauola"); sigfiles.push_back("MC_W1JetsToLNu_enu"); sigfiles.push_back("MC_W2JetsToLNu_enu"); sigfiles.push_back("MC_W3JetsToLNu_enu"); sigfiles.push_back("MC_W4JetsToLNu_enu"); sigfiles.push_back("MC_WJetsToLNu-v1_enu"); sigfiles.push_back("MC_WJetsToLNu-v2_enu"); sigfiles.push_back("MC_W1JetsToLNu_munu"); sigfiles.push_back("MC_W2JetsToLNu_munu"); sigfiles.push_back("MC_W3JetsToLNu_munu"); sigfiles.push_back("MC_W4JetsToLNu_munu"); sigfiles.push_back("MC_WJetsToLNu-v1_munu"); sigfiles.push_back("MC_WJetsToLNu-v2_munu"); sigfiles.push_back("MC_W1JetsToLNu_taunu"); sigfiles.push_back("MC_W2JetsToLNu_taunu"); sigfiles.push_back("MC_W3JetsToLNu_taunu"); sigfiles.push_back("MC_W4JetsToLNu_taunu"); sigfiles.push_back("MC_WJetsToLNu-v1_taunu"); sigfiles.push_back("MC_WJetsToLNu-v2_taunu"); sigfiles.push_back("MC_DYJetsToLL"); sigfiles.push_back("MC_DY1JetsToLL"); sigfiles.push_back("MC_DY2JetsToLL"); sigfiles.push_back("MC_DY3JetsToLL"); sigfiles.push_back("MC_DY4JetsToLL"); sigfiles.push_back("MC_ZJetsToNuNu_100_HT_200"); sigfiles.push_back("MC_ZJetsToNuNu_200_HT_400"); sigfiles.push_back("MC_ZJetsToNuNu_400_HT_inf"); sigfiles.push_back("MC_ZJetsToNuNu_50_HT_100"); sigfiles.push_back("MC_GJets-HT-200To400-madgraph"); sigfiles.push_back("MC_GJets-HT-400ToInf-madgraph"); sigfiles.push_back("MC_WGamma"); sigfiles.push_back("MC_EWK-Z2j"); sigfiles.push_back("MC_EWK-Z2jiglep"); sigfiles.push_back("MC_EWK-W2jminus_enu"); sigfiles.push_back("MC_EWK-W2jplus_enu"); sigfiles.push_back("MC_EWK-W2jminus_munu"); sigfiles.push_back("MC_EWK-W2jplus_munu"); sigfiles.push_back("MC_EWK-W2jminus_taunu"); sigfiles.push_back("MC_EWK-W2jplus_taunu"); } //List of input files std::vector<std::string> bkgfiles; if (useQCD){ bkgfiles.push_back("MC_QCD-Pt-30to50-pythia6"); bkgfiles.push_back("MC_QCD-Pt-50to80-pythia6"); bkgfiles.push_back("MC_QCD-Pt-80to120-pythia6"); bkgfiles.push_back("MC_QCD-Pt-120to170-pythia6"); bkgfiles.push_back("MC_QCD-Pt-170to300-pythia6"); bkgfiles.push_back("MC_QCD-Pt-300to470-pythia6"); bkgfiles.push_back("MC_QCD-Pt-470to600-pythia6"); bkgfiles.push_back("MC_QCD-Pt-600to800-pythia6"); bkgfiles.push_back("MC_QCD-Pt-800to1000-pythia6"); bkgfiles.push_back("MC_QCD-Pt-1000to1400-pythia6"); bkgfiles.push_back("MC_QCD-Pt-1400to1800-pythia6"); bkgfiles.push_back("MC_QCD-Pt-1800-pythia6"); } if (useOthers) { bkgfiles.push_back("MC_TTJets"); //powheg samples //bkgfiles.push_back("MC_TT-v1"); //bkgfiles.push_back("MC_TT-v2"); // bkgfiles.push_back("MC_T-tW"); bkgfiles.push_back("MC_Tbar-tW"); bkgfiles.push_back("MC_SingleT-s-powheg-tauola"); bkgfiles.push_back("MC_SingleTBar-s-powheg-tauola"); bkgfiles.push_back("MC_SingleT-t-powheg-tauola"); bkgfiles.push_back("MC_SingleTBar-t-powheg-tauola"); bkgfiles.push_back("MC_WW-pythia6-tauola"); bkgfiles.push_back("MC_WZ-pythia6-tauola"); bkgfiles.push_back("MC_ZZ-pythia6-tauola"); bkgfiles.push_back("MC_W1JetsToLNu_enu"); bkgfiles.push_back("MC_W2JetsToLNu_enu"); bkgfiles.push_back("MC_W3JetsToLNu_enu"); bkgfiles.push_back("MC_W4JetsToLNu_enu"); bkgfiles.push_back("MC_WJetsToLNu-v1_enu"); bkgfiles.push_back("MC_WJetsToLNu-v2_enu"); bkgfiles.push_back("MC_W1JetsToLNu_munu"); bkgfiles.push_back("MC_W2JetsToLNu_munu"); bkgfiles.push_back("MC_W3JetsToLNu_munu"); bkgfiles.push_back("MC_W4JetsToLNu_munu"); bkgfiles.push_back("MC_WJetsToLNu-v1_munu"); bkgfiles.push_back("MC_WJetsToLNu-v2_munu"); bkgfiles.push_back("MC_W1JetsToLNu_taunu"); bkgfiles.push_back("MC_W2JetsToLNu_taunu"); bkgfiles.push_back("MC_W3JetsToLNu_taunu"); bkgfiles.push_back("MC_W4JetsToLNu_taunu"); bkgfiles.push_back("MC_WJetsToLNu-v1_taunu"); bkgfiles.push_back("MC_WJetsToLNu-v2_taunu"); bkgfiles.push_back("MC_DYJetsToLL"); bkgfiles.push_back("MC_DY1JetsToLL"); bkgfiles.push_back("MC_DY2JetsToLL"); bkgfiles.push_back("MC_DY3JetsToLL"); bkgfiles.push_back("MC_DY4JetsToLL"); bkgfiles.push_back("MC_ZJetsToNuNu_100_HT_200"); bkgfiles.push_back("MC_ZJetsToNuNu_200_HT_400"); bkgfiles.push_back("MC_ZJetsToNuNu_400_HT_inf"); bkgfiles.push_back("MC_ZJetsToNuNu_50_HT_100"); bkgfiles.push_back("MC_GJets-HT-200To400-madgraph"); bkgfiles.push_back("MC_GJets-HT-400ToInf-madgraph"); bkgfiles.push_back("MC_WGamma"); bkgfiles.push_back("MC_EWK-Z2j"); bkgfiles.push_back("MC_EWK-Z2jiglep"); bkgfiles.push_back("MC_EWK-W2jminus_enu"); bkgfiles.push_back("MC_EWK-W2jplus_enu"); bkgfiles.push_back("MC_EWK-W2jminus_munu"); bkgfiles.push_back("MC_EWK-W2jplus_munu"); bkgfiles.push_back("MC_EWK-W2jminus_taunu"); bkgfiles.push_back("MC_EWK-W2jplus_taunu"); } // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TFile *output_tmva = TFile::Open((folder+"/TMVA_QCDrej.root").c_str(),"RECREATE"); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", output_tmva, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); //fill the variables with event weight from the trees //const unsigned nVars = 4; factory->AddSpectator("jet1_pt","Jet 1 p_{T}", "GeV", 'F'); factory->AddSpectator("jet2_pt","Jet 2 p_{T}", "GeV", 'F'); factory->AddSpectator("jet1_eta","Jet 1 #eta", "", 'F'); factory->AddVariable("jet2_eta","Jet 2 #eta", "", 'F');// ** factory->AddSpectator("jet1_phi","Jet 1 #phi", "", 'F'); factory->AddSpectator("jet2_phi","Jet 2 #phi", "", 'F'); factory->AddSpectator("dijet_M","M_{jj}", " GeV", 'F'); factory->AddSpectator("dijet_deta","#Delta#eta_{jj}", "", 'F'); factory->AddSpectator("dijet_sumeta","#eta_{j1}+#eta_{j2}", "", 'F'); factory->AddSpectator("dijet_dphi","#Delta#phi_{jj}", "", 'F'); factory->AddSpectator("met","MET", "GeV", 'F');// ** factory->AddSpectator("met_phi","MET #phi", "", 'F'); factory->AddVariable("met_significance","MET significance", "", 'F');// ** factory->AddSpectator("sumet","#Sum E_{T}", "GeV", 'F'); factory->AddSpectator("ht","H_{T}", "GeV", 'F'); factory->AddVariable("mht","MH_{T}", "GeV", 'F');// ** factory->AddSpectator("sqrt_ht","#sqrt{H_{T}}", "GeV^{0.5}", 'F'); factory->AddSpectator("unclustered_et","Unclustered E_{T}", "GeV", 'F'); factory->AddSpectator("unclustered_phi","Unclustered #phi", "GeV", 'F'); factory->AddSpectator("jet1met_dphi","#Delta#phi(MET,jet1)", "", 'F'); factory->AddVariable("jet2met_dphi","#Delta#phi(MET,jet2)", "", 'F');// ** factory->AddVariable("jetmet_mindphi","minimum #Delta#phi(MET,jet)", "", 'F');// ** factory->AddVariable("jetunclet_mindphi","minimum #Delta#phi(unclustered,jet)", "", 'F');// ** factory->AddVariable("metunclet_dphi","#Delta#phi(MET,unclustered)", "", 'F');// ** factory->AddVariable("dijetmet_scalarSum_pt", "p_{T}^{jet1}+p_{T}^{jet2}+MET", "GeV", 'F');// ** factory->AddSpectator("dijetmet_vectorialSum_pt","p_{T}(#vec{j1}+#vec{j2}+#vec{MET})", "GeV", 'F'); factory->AddVariable("dijetmet_ptfraction","p_{T}^{dijet}/(p_{T}^{dijet}+MET)", "", 'F');// ** //factory->AddVariable("jet1met_scalarprod := (jet1_pt*cos(jet1_phi)*met_x+jet1_pt*sin(jet1_phi)*met_y)/met", "#vec{p_{T}^{jet1}}.#vec{MET}/MET", "GeV" , 'F'); //factory->AddVariable("jet2met_scalarprod := (jet2_pt*cos(jet2_phi)*met_x+jet2_pt*sin(jet2_phi)*met_y)/met", "#vec{p_{T}^{jet2}}.#vec{MET}/MET", "GeV" , 'F'); factory->AddVariable("jet1met_scalarprod", "#vec{p_{T}^{jet1}}.#vec{MET}/MET", "GeV" , 'F');// ** factory->AddVariable("jet2met_scalarprod", "#vec{p_{T}^{jet2}}.#vec{MET}/MET", "GeV" , 'F');// ** factory->AddVariable("jet1met_scalarprod_frac := jet1met_scalarprod/met", "#vec{p_{T}^{jet1}}.#vec{MET}/MET^{2}", "" , 'F');// ** factory->AddVariable("jet2met_scalarprod_frac := jet2met_scalarprod/met", "#vec{p_{T}^{jet2}}.#vec{MET}/MET^{2}", "" , 'F');// ** factory->AddSpectator("n_jets_cjv_30","CJV jets (30 GeV)", "" , 'I'); factory->AddSpectator("n_jets_cjv_20EB_30EE","CJV jets (|#eta|<2.4 and 20 GeV, or 30 GeV)", "" , 'I'); //test with only VBF variables used in cut-based analysis //factory->AddVariable("dijet_M","M_{jj}", " GeV", 'F'); //factory->AddVariable("dijet_deta","#Delta#eta_{jj}", "", 'F'); //factory->AddVariable("dijet_dphi","#Delta#phi_{jj}", "", 'F'); //factory->AddVariable("met","MET", "GeV", 'F'); //factory->AddVariable("n_jets_cjv_30","CJV jets (30 GeV)", "" , 'I'); //get input files //signal //TFile *signalfile = TFile::Open((folder+"/"+"MC_VBF_HToZZTo4Nu_M-120.root").c_str()); //TTree *signal = (TTree*)signalfile->Get("TmvaInputTree"); //Double_t signalWeight = 1.0; //factory->AddSignalTree(signal,signalWeight); //Set individual event weights (the variables must exist in the original TTree) //factory->SetSignalWeightExpression("total_weight"); //background std::map<std::string, TFile *> tfiles; for (unsigned i = 0; i < bkgfiles.size(); ++i) { std::string filename = (bkgfiles[i]+".root"); TFile * tmp = new TFile((folder+"/"+filename).c_str()); if (!tmp) { std::cerr << "Warning, file " << filename << " could not be opened." << std::endl; } else { tfiles[bkgfiles[i]] = tmp; } } TTree *background[bkgfiles.size()]; //signal std::map<std::string, TFile *> sfiles; for (unsigned i = 0; i < sigfiles.size(); ++i) { std::string filename = (sigfiles[i]+".root"); TFile * tmp = new TFile((folder+"/"+filename).c_str()); if (!tmp) { std::cerr << "Warning, file " << filename << " could not be opened." << std::endl; } else { sfiles[sigfiles[i]] = tmp; } } TTree *signal[sigfiles.size()]; for (unsigned i = 0; i < bkgfiles.size(); ++i) { std::string f = bkgfiles[i]; if (tfiles[f]){ background[i] = (TTree*)tfiles[f]->Get("TmvaInputTree"); //if (f.find("QCD-Pt")!=f.npos){ //} Double_t backgroundWeight = 1.0; factory->AddBackgroundTree(background[i],backgroundWeight); factory->SetBackgroundWeightExpression("total_weight"); }//if file exist else { std::cout << " Cannot find background file " << f << std::endl; } }//loop on files for (unsigned i = 0; i < sigfiles.size(); ++i) { std::string f = sigfiles[i]; if (sfiles[f]){ signal[i] = (TTree*)sfiles[f]->Get("TmvaInputTree"); //if (f.find("QCD-Pt")!=f.npos){ //} Double_t signalWeight = 1.0; factory->AddSignalTree(signal[i],signalWeight); factory->SetSignalWeightExpression("total_weight"); }//if file exist else { std::cout << " Cannot find signal file " << f << std::endl; } }//loop on files // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = "";//dijet_deta>3.8 && dijet_M > 1100 && met > 100 && met_significance>5"; TCut mycutb = "";//dijet_deta>3.8 && dijet_M > 1100 && met > 100 && met_significance>5"; factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // Likelihood ("naive Bayes estimator") //factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", //"H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Linear discriminant (same as Fisher discriminant) //factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons //factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=60:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); // Boosted Decision Trees // Gradient Boost //factory->BookMethod( TMVA::Types::kBDT, "BDTG", //"!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" ); //factory->BookMethod( TMVA::Types::kBDT, "BDTG", // "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:nCuts=20:MaxDepth=2" ); // Adaptive Boost //factory->BookMethod( TMVA::Types::kBDT, "BDT1000", // "!H:!V:NTrees=1000:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" ); factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=1000:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.2:SeparationType=GiniIndex:nCuts=20" ); // Bagging //factory->BookMethod( TMVA::Types::kBDT, "BDTB", // "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ); // Decorrelation + Adaptive Boost //factory->BookMethod( TMVA::Types::kBDT, "BDTD", // "!H:!V:NTrees=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ); // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables //factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", // "!H:!V:NTrees=50:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" ); // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output output_tmva->Close(); std::cout << "==> Wrote root file: " << output_tmva->GetName() << std::endl << "==> TMVAClassification is done!" << std::endl << std::endl << "==> To view the results, launch the GUI: \"root -l ./TMVAGui.C\"" << std::endl << std::endl; // Clean up delete factory; return 0; }//main
void TMVAClassification( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) // this loads the library TMVA::Tools::Instance(); //--------------------------------------------------------------- // default MVA methods to be trained + tested std::map<std::string,int> Use; Use["Cuts"] = 0; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // --- Use["Likelihood"] = 1; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // --- Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDERSkNN"] = 0; // depreciated until further notice Use["PDEFoam"] = 0; // -- Use["KNN"] = 0; // --- Use["HMatrix"] = 0; Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; Use["LD"] = 0; // --- Use["FDA_GA"] = 0; Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // --- Use["MLP"] = 0; // this is the recommended ANN Use["MLPBFGS"] = 0; // recommended ANN with optional training method Use["MLPBNN"] = 0; // recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // *** missing Use["TMlpANN"] = 0; // --- Use["SVM"] = 0; // --- Use["BDT"] = 1; Use["BDTD"] = 0; Use["BDTG"] = 0; Use["BDTB"] = 0; // --- Use["RuleFit"] = 0; // --- Use["Plugin"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 0; } } // Create a new root output file. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] // factory->AddVariable( "myvar1 := var1+var2", 'F' ); // factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ); // factory->AddVariable( "var3", "Variable 3", "units", 'F' ); // factory->AddVariable( "var4", "Variable 4", "units", 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables // factory->AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ); // factory->AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ); // read training and test data factory->AddVariable("CScostheta",'F'); factory->AddVariable("ZRapidity",'F'); factory->AddVariable("REDmet",'F'); if (ReadDataFromAsciiIFormat) { // load the signal and background event samples from ascii files // format in file must be: // var1/F:var2/F:var3/F:var4/F // 0.04551 0.59923 0.32400 -0.19170 // ... TString datFileS = "tmva_example_sig.dat"; TString datFileB = "tmva_example_bkg.dat"; factory->SetInputTrees( datFileS, datFileB ); } else { // load the signal and background event samples from ROOT trees TString fname = "./tmva_class_example.root"; TString fname_Data7TeV_DoubleElectron2011B_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleElectron2011B_0.root"; TString fname_Data7TeV_MuEG2011B_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_MuEG2011B_0.root"; TString fname_ZH125 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH125.root"; TString fname_SingleT_tW = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleT_tW.root"; TString fname_SingleT_s = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleT_s.root"; TString fname_Data7TeV_DoubleMu2011B_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleMu2011B_0.root"; TString fname_Data7TeV_DoubleElectron2011A_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleElectron2011A_0.root"; TString fname_ZH135 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH135.root"; TString fname_DYJetsToLL = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//DYJetsToLL.root"; TString fname_ZH115 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH115.root"; TString fname_SingleTbar_t = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleTbar_t.root"; TString fname_Data7TeV_DoubleElectron2011B_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleElectron2011B_1.root"; TString fname_Data7TeV_DoubleMu2011A_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleMu2011A_1.root"; TString fname_Data7TeV_MuEG2011A_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_MuEG2011A_1.root"; TString fname_TTJets = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//TTJets.root"; TString fname_SingleTbar_s = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleTbar_s.root"; TString fname_WJetsToLNu = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//WJetsToLNu.root"; TString fname_Data7TeV_DoubleElectron2011A_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleElectron2011A_1.root"; TString fname_ZZ = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZZ.root"; TString fname_ZH150 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH150.root"; TString fname_Data7TeV_MuEG2011B_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_MuEG2011B_1.root"; TString fname_WW = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//WW.root"; TString fname_ZH105 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH105.root"; TString fname_Data7TeV_DoubleMu2011A_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleMu2011A_0.root"; TString fname_SingleTbar_tW = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleTbar_tW.root"; TString fname_WZ = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//WZ.root"; TString fname_Data7TeV_MuEG2011A_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_MuEG2011A_0.root"; TString fname_Data7TeV_DoubleMu2011B_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleMu2011B_1.root"; TString fname_ZH145 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH145.root"; TString fname_SingleT_t = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleT_t.root"; if (gSystem->AccessPathName( fname )) // file does not exist in local directory gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root"); TFile *input_Data7TeV_DoubleElectron2011B_0 = TFile::Open( fname_Data7TeV_DoubleElectron2011B_0 ); TFile *input_Data7TeV_MuEG2011B_0 = TFile::Open( fname_Data7TeV_MuEG2011B_0 ); TFile *input_ZH125 = TFile::Open( fname_ZH125 ); TFile *input_SingleT_tW = TFile::Open( fname_SingleT_tW ); TFile *input_SingleT_s = TFile::Open( fname_SingleT_s ); TFile *input_Data7TeV_DoubleMu2011B_0 = TFile::Open( fname_Data7TeV_DoubleMu2011B_0 ); TFile *input_Data7TeV_DoubleElectron2011A_0 = TFile::Open( fname_Data7TeV_DoubleElectron2011A_0 ); TFile *input_ZH135 = TFile::Open( fname_ZH135 ); TFile *input_DYJetsToLL = TFile::Open( fname_DYJetsToLL ); TFile *input_ZH115 = TFile::Open( fname_ZH115 ); TFile *input_SingleTbar_t = TFile::Open( fname_SingleTbar_t ); TFile *input_Data7TeV_DoubleElectron2011B_1 = TFile::Open( fname_Data7TeV_DoubleElectron2011B_1 ); TFile *input_Data7TeV_DoubleMu2011A_1 = TFile::Open( fname_Data7TeV_DoubleMu2011A_1 ); TFile *input_Data7TeV_MuEG2011A_1 = TFile::Open( fname_Data7TeV_MuEG2011A_1 ); TFile *input_TTJets = TFile::Open( fname_TTJets ); TFile *input_SingleTbar_s = TFile::Open( fname_SingleTbar_s ); TFile *input_WJetsToLNu = TFile::Open( fname_WJetsToLNu ); TFile *input_Data7TeV_DoubleElectron2011A_1 = TFile::Open( fname_Data7TeV_DoubleElectron2011A_1 ); TFile *input_ZZ = TFile::Open( fname_ZZ ); TFile *input_ZH150 = TFile::Open( fname_ZH150 ); TFile *input_Data7TeV_MuEG2011B_1 = TFile::Open( fname_Data7TeV_MuEG2011B_1 ); TFile *input_WW = TFile::Open( fname_WW ); TFile *input_ZH105 = TFile::Open( fname_ZH105 ); TFile *input_Data7TeV_DoubleMu2011A_0 = TFile::Open( fname_Data7TeV_DoubleMu2011A_0 ); TFile *input_SingleTbar_tW = TFile::Open( fname_SingleTbar_tW ); TFile *input_WZ = TFile::Open( fname_WZ ); TFile *input_Data7TeV_MuEG2011A_0 = TFile::Open( fname_Data7TeV_MuEG2011A_0 ); TFile *input_Data7TeV_DoubleMu2011B_1 = TFile::Open( fname_Data7TeV_DoubleMu2011B_1 ); TFile *input_ZH145 = TFile::Open( fname_ZH145 ); TFile *input_SingleT_t = TFile::Open( fname_SingleT_t ); std::cout << "--- TMVAClassification : Using input_Data7TeV_DoubleElectron2011B_0 file: " << input_Data7TeV_DoubleElectron2011B_0->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_Data7TeV_MuEG2011B_0 file: " << input_Data7TeV_MuEG2011B_0->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_ZH125 file: " << input_ZH125->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_SingleT_tW file: " << input_SingleT_tW->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_SingleT_s file: " << input_SingleT_s->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_Data7TeV_DoubleMu2011B_0 file: " << input_Data7TeV_DoubleMu2011B_0->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_Data7TeV_DoubleElectron2011A_0 file: " << input_Data7TeV_DoubleElectron2011A_0->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_ZH135 file: " << input_ZH135->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_DYJetsToLL file: " << input_DYJetsToLL->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_ZH115 file: " << input_ZH115->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_SingleTbar_t file: " << input_SingleTbar_t->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_Data7TeV_DoubleElectron2011B_1 file: " << input_Data7TeV_DoubleElectron2011B_1->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_Data7TeV_DoubleMu2011A_1 file: " << input_Data7TeV_DoubleMu2011A_1->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_Data7TeV_MuEG2011A_1 file: " << input_Data7TeV_MuEG2011A_1->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_TTJets file: " << input_TTJets->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_SingleTbar_s file: " << input_SingleTbar_s->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_WJetsToLNu file: " << input_WJetsToLNu->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_Data7TeV_DoubleElectron2011A_1 file: " << input_Data7TeV_DoubleElectron2011A_1->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_ZZ file: " << input_ZZ->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_ZH150 file: " << input_ZH150->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_Data7TeV_MuEG2011B_1 file: " << input_Data7TeV_MuEG2011B_1->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_WW file: " << input_WW->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_ZH105 file: " << input_ZH105->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_Data7TeV_DoubleMu2011A_0 file: " << input_Data7TeV_DoubleMu2011A_0->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_SingleTbar_tW file: " << input_SingleTbar_tW->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_WZ file: " << input_WZ->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_Data7TeV_MuEG2011A_0 file: " << input_Data7TeV_MuEG2011A_0->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_Data7TeV_DoubleMu2011B_1 file: " << input_Data7TeV_DoubleMu2011B_1->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_ZH145 file: " << input_ZH145->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_SingleT_t file: " << input_SingleT_t->GetName() << std::endl; TTree *signal_ZH145 = (TTree*)input_ZH145->Get("tmvatree"); TTree *background_ZZ = (TTree*)input_ZZ->Get("tmvatree"); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // ====== register trees ==================================================== // // the following method is the prefered one: // you can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal_ZH145, 1.0 ); factory->AddBackgroundTree( background_ZZ, 1.0 ); // To give different trees for training and testing, do as follows: // factory->AddSignalTree( signal_ZH145TrainingTree, signal_ZH145TrainWeight, "Training" ); // factory->AddSignalTree( signal_ZH145TestTree, signal_ZH145TestWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // // // --- begin ---------------------------------------------------------- // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables // Float_t treevars[4]; // for (Int_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (Int_t i=0; i<signal->GetEntries(); i++) { // signal->GetEntry(i); // for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < signal->GetEntries()/2) factory->AddSignalTrainingEvent( vars, signalWeight ); // else factory->AddSignalTestEvent ( vars, signalWeight ); // } // // for (Int_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (Int_t i=0; i<background->GetEntries(); i++) { // background->GetEntry(i); // for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight ); // else factory->AddBackgroundTestEvent ( vars, backgroundWeight ); // } // // --- end ------------------------------------------------------------ // // ====== end of register trees ============================================== } // This would set individual event weights (the variables defined in the // expression need to exist in the original TTree) // for signal : factory->SetSignalWeightExpression("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); factory->SetBackgroundWeightExpression("Eweight*XS*BR*LUM*(1/NGE)*(B2/B3)*CUT"); factory->SetSignalWeightExpression("Eweight*XS*BR*LUM*(1/NGE)*(B2/B3)*CUT"); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = "(CUT>2)"; TCut mycutb = "(CUT>2)"; // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycuts, "SplitMode=random:!V" ); // "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // If no numbers of events are given, half of the events in the tree are used for training, and // the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); // ---- Book MVA methods // // please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:Seed=0:EffSel:Steps=50:Cycles=3:PopSize=1000:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // test the decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // test the new kernel density estimator if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // test the mixed splines and kernel density estimator (depending on which variable) if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSkNN"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSkNN", "!H:!V:VolumeRangeMode=kNN:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:CutNmin=T:Nmin=100:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" ); // Fisher discriminant if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=60:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2"); // Linear discriminant (same as Fisher) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=1000:nEventsMin=400:MaxDepth=6:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=1000:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=1000:nEventsMin=400:MaxDepth=6:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // As an example how to use the ROOT plugin mechanism, book BDT via // plugin mechanism if (Use["Plugin"]) { // // first the plugin has to be defined, which can happen either through the following line in the local or global .rootrc: // // # plugin handler plugin name(regexp) class to be instanciated library constructor format // Plugin.TMVA@@MethodBase: ^BDT TMVA::MethodBDT TMVA.1 "MethodBDT(TString,TString,DataSet&,TString)" // // or by telling the global plugin manager directly gPluginMgr->AddHandler("TMVA@@MethodBase", "BDT", "TMVA::MethodBDT", "TMVA.1", "MethodBDT(TString,TString,DataSet&,TString)"); factory->BookMethod( TMVA::Types::kPlugins, "BDT", "!H:!V:NTrees=1000:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=50" ); } // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros gROOT->ProcessLine(".q;"); }
int TMVAKaggleHiggs ( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 0; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 0; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN Use["NN"] = 1; // improved implementation of a NN // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 1; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAKaggleHiggs" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return 1; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) TString fname = "./training.root"; if (gSystem->AccessPathName( fname )) // file does not exist in local directory gSystem->Exec("curl -O http://root.cern.ch/files/tmva_class_example.root"); TFile *input = TFile::Open( fname ); std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl; // --- Register the training and test trees TTree *tree = (TTree*)input->Get("data"); // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] // factory->AddVariable( "myvar1 := var1+var2", 'F' ); // factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ); // factory->AddVariable( "var3", "Variable 3", "units", 'F' ); // factory->AddVariable( "var4", "Variable 4", "units", 'F' ); TString limit ("-900.0"); TString replacementValue ("0.0"); std::vector<std::string> vars = {"DER_mass_MMC","DER_mass_transverse_met_lep","DER_mass_vis","DER_pt_h","DER_deltaeta_jet_jet","DER_mass_jet_jet","DER_prodeta_jet_jet","DER_deltar_tau_lep","DER_pt_tot","DER_sum_pt","DER_pt_ratio_lep_tau","DER_met_phi_centrality","DER_lep_eta_centrality","PRI_tau_pt","PRI_tau_eta","PRI_tau_phi","PRI_lep_pt","PRI_lep_eta","PRI_lep_phi","PRI_met","PRI_met_phi","PRI_met_sumet","PRI_jet_num","PRI_jet_leading_pt","PRI_jet_leading_eta","PRI_jet_leading_phi","PRI_jet_subleading_pt","PRI_jet_subleading_eta","PRI_jet_subleading_phi","PRI_jet_all_pt"}; for (std::vector<std::string>::iterator it = vars.begin (), itEnd = vars.end (); it != itEnd; ++it) { std::string s = *it; TString current; current.Form ("%s:=(%s<%s?%s:%s)",s.c_str (), s.c_str (), limit.Data (), replacementValue.Data (), s.c_str ()); factory->AddVariable (current, 'F'); } // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables // factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); // factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // global event weights per tree (see below for setting event-wise weights) Double_t weight = 1.0; // Double_t backgroundWeight = 1.0; // You can add an arbitrary number of signal or background trees // factory->AddBackgroundTree( background, backgroundWeight ); factory->AddTree(tree, "Signal", 1., "Label == 1"); factory->AddTree(tree, "Background", 1., "Label == 0"); // To give different trees for training and testing, do as follows: // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // // // --- begin ---------------------------------------------------------- // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables // Float_t treevars[4], weight; // // // Signal // for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<signal->GetEntries(); i++) { // signal->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight ); // else factory->AddSignalTestEvent ( vars, signalWeight ); // } // // // Background (has event weights) // background->SetBranchAddress( "weight", &weight ); // for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<background->GetEntries(); i++) { // background->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight ); // else factory->AddBackgroundTestEvent ( vars, backgroundWeight*weight ); // } // --- end ------------------------------------------------------------ // // --- end of tree registration // Set individual event weights (the variables must exist in the original TTree) // for signal : factory->SetSignalWeightExpression ("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); factory->SetSignalWeightExpression( "Weight" ); factory->SetBackgroundWeightExpression( "Weight" ); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = "Label==1"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = "Label==0"; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=0:nTest_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // factory->PrepareTrainingAndTestTree( mycuts, mycutb, // "nTrain_Signal=5000:nTrain_Background=5000:nTest_Signal=5000:nTest_Background=5000:SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // improved neural network implementation if (Use["NN"]) { // TString layoutString ("Layout=TANH|(N+100)*2,LINEAR"); // TString layoutString ("Layout=SOFTSIGN|100,SOFTSIGN|50,SOFTSIGN|20,LINEAR"); // TString layoutString ("Layout=RELU|300,RELU|100,RELU|30,RELU|10,LINEAR"); // TString layoutString ("Layout=SOFTSIGN|50,SOFTSIGN|30,SOFTSIGN|20,SOFTSIGN|10,LINEAR"); // TString layoutString ("Layout=TANH|50,TANH|30,TANH|20,TANH|10,LINEAR"); // TString layoutString ("Layout=SOFTSIGN|50,SOFTSIGN|20,LINEAR"); TString layoutString ("Layout=SOFTSIGN|70,SOFTSIGN|30,LINEAR"); std::vector<TString> strategy; strategy.push_back (TString ("LearningRate=1e-2,Momentum=0.9,Repetitions=1,ConvergenceSteps=70,BatchSize=120,TestRepetitions=7,WeightDecay=0.001,Regularization=NONE,DropConfig=0.5+0.5+0.5+0.5,DropRepetitions=1,Multithreading=True")); strategy.push_back (TString ("LearningRate=1e-4,Momentum=0.5,Repetitions=1,ConvergenceSteps=70,BatchSize=80,TestRepetitions=7,WeightDecay=0.001,Regularization=L2,Multithreading=True,DropConfig=0.1+0.1+0.1+0.1,DropRepetitions=1")); strategy.push_back (TString ("LearningRate=1e-5,Momentum=0.3,Repetitions=1,ConvergenceSteps=70,BatchSize=60,TestRepetitions=7,WeightDecay=0.0001,Regularization=L2,Multithreading=True")); strategy.push_back (TString ("LearningRate=1e-6,Momentum=0.0,Repetitions=1,ConvergenceSteps=70,BatchSize=40,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True")); // strategy.push_back (TString ("LearningRate=1e-6,Momentum=0.0,Repetitions=1,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True")); TString trainingStrategyString ("TrainingStrategy="); for (std::vector<TString>::const_iterator it = strategy.begin (), itEnd = strategy.end (); it != itEnd; ++it) { if (it != strategy.begin ()) trainingStrategyString += "|"; trainingStrategyString += *it; } // TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CROSSENTROPY"); TString nnOptions ("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=G:WeightInitialization=XAVIERUNIFORM"); // TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CHECKGRADIENTS"); nnOptions.Append (":"); nnOptions.Append (layoutString); nnOptions.Append (":"); nnOptions.Append (trainingStrategyString); factory->BookMethod( TMVA::Types::kNN, "NN", nnOptions ); // NN } // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // ---- STILL EXPERIMENTAL and only implemented for BDT's ! // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","FitGA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVA::TMVAGui( outfileName ); return 0; }
void Reg(){ TMVA::Tools::Instance(); std::cout << "==> Start TMVARegression" << std::endl; ifstream myfile; myfile.open("99per.txt"); ostringstream xcS,xcH,xcP,xcC,xcN; double xS,xH,xC,xN,xP; if(myfile.is_open()){ while(!myfile.eof()){ myfile>>xS>>xH>>xC>>xN>>xP; } } xcS<<xS; xcH<<xH; xcC<<xC; xcN<<xN; xcP<<xP; //Output file TString outfileName( "Ex1out_FullW_def.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); //Declaring the factory TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar" ); //Declaring Input Varibles factory->AddVariable( "Sieie",'F'); factory->AddVariable( "ToE", 'F' ); factory->AddVariable( "isoC",'F' ); factory->AddVariable( "isoN",'F' ); factory->AddVariable( "isoP",'F' ); TString fname = "../../CutTMVATrees_Barrel.root"; input = TFile::Open( fname ); // --- Register the regression tree TTree *signal = (TTree*)input->Get("t_S"); TTree *background = (TTree*)input->Get("t_B"); //Just Some more settings Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // You can add an arbitrary number of regression trees factory->AddSignalTree( signal, signalWeight ); factory->AddBackgroundTree( background , backgroundWeight ); TCut mycuts =""; TCut mycutb =""; // factory->PrepareTrainingAndTestTree(mycuts,mycutb,"nTrain_Signal=9000:nTrain_Background=9000:nTest_Signal=10000:nTest_Background=10000"); factory->SetBackgroundWeightExpression("weightPT*weightXS"); factory->SetSignalWeightExpression("weightPT*weightXS"); TString methodName = "Cuts_FullsampleW_def"; TString methodOptions ="!H:!V:FitMethod=GA:EffMethod=EffSEl"; methodOptions +=":VarProp[0]=FMin:VarProp[1]=FMin:VarProp[2]=FMin:VarProp[3]=FMin:VarProp[4]=FMin"; methodOptions +=":CutRangeMax[0]="+xcS.str(); methodOptions +=":CutRangeMax[1]="+xcH.str(); methodOptions +=":CutRangeMax[2]="+xcC.str(); methodOptions +=":CutRangeMax[3]="+xcN.str(); methodOptions +=":CutRangeMax[4]="+xcP.str(); //************ factory->BookMethod(TMVA::Types::kCuts,methodName,methodOptions); factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVARegression is done!" << std::endl; delete factory; }
//void TMVAClassification( TString myMethodList = "" ) void tmvaClassifier( TString myMethodList = "", TString inputDir="~/work/ewkzp2j_5311/ll/", bool minimalTrain=false, bool useQG=false) { gSystem->ExpandPathName(inputDir); TString pf("base_weights"); if(!minimalTrain){ if(useQG) pf="full_weights"; else pf="weights"; } TMVA::gConfig().GetIONames().fWeightFileDir = inputDir + pf; // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 0; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 0; Use["LikelihoodD"] = 1; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 0; // Linear Discriminant identical to Fisher Use["Fisher"] = 1; Use["FisherCat"] = 0;//added by loic Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 0; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 1; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables // factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); // factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) // TString fname = "./tmva_class_example.root"; //if (gSystem->AccessPathName( fname )) // file does not exist in local directory // gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root"); // std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl; // --- Register the training and test trees TChain *signal = new TChain("ewkzp2j"); TChain *background = new TChain("ewkzp2j"); TSystemDirectory dir(inputDir,inputDir); TList *files = dir.GetListOfFiles(); if (files) { TSystemFile *file; TString fname; TIter next(files); while ((file=(TSystemFile*)next())) { fname = file->GetName(); if(!fname.EndsWith("_summary.root")) continue; if(fname.Contains("Data")) continue; if(!fname.Contains("DY")) continue; bool isSignal(false); if(fname.Contains("JJ")) { signal->Add(fname); isSignal=true; } else if(fname.Contains("50toInf") && fname.Contains("DY")) background->Add(fname); cout << fname << " added as " << (isSignal ? "signal" : "background") << endl; } }else{ cout << "[Error] no files found in " << inputDir << endl; } cout << "Signal has " << signal->GetEntries() << " raw events" << endl << "Background has " << background->GetEntries() << " raw events"<< endl; // global event weights per tree Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); // event-per-event weights per tree factory->SetBackgroundWeightExpression( "weight/cnorm" ); factory->SetSignalWeightExpression( "weight/cnorm" ); //define variables for the training if(minimalTrain) { factory->AddVariable( "mjj", "M_{jj}" "GeV", 'F' ); factory->AddVariable( "detajj", "#Delta#eta_{jj}", "", 'F' ); factory->AddVariable( "spt", "#Delta_{rel}", "GeV", 'F' ); } else { factory->AddVariable( "mjj", "M_{jj}" "GeV", 'F' ); factory->AddVariable( "detajj", "#Delta#eta_{jj}", "", 'F' ); factory->AddVariable( "setajj", "#Sigma#eta_{j}", "", 'F' ); factory->AddVariable( "eta1", "#eta(1)", "", 'F' ); factory->AddVariable( "eta2", "#eta(2)", "", 'F' ); factory->AddVariable( "pt1", "p_{T}(1)", "GeV", 'F' ); factory->AddVariable( "pt2", "p_{T}(2)", "GeV", 'F' ); factory->AddVariable( "spt", "#Delta_{rel}", "GeV", 'F' ); if(useQG) factory->AddVariable( "qg1", "q/g(1)", "", 'F' ); if(useQG) factory->AddVariable( "qg2", "q/g(2)", "", 'F' ); } // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); if (Use["FisherCat"]){ TMVA::MethodBase* fiCat = factory->BookMethod( TMVA::Types::kCategory, "FisherCat","" ); TMVA::MethodCategory* mcategory = dynamic_cast<TMVA::MethodCategory*>(fiCat); mcategory->AddMethod( "mjj<250", "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat1", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); mcategory->AddMethod( "mjj>=250&&mjj<350" , "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat0000", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); mcategory->AddMethod( "mjj>=350&&mjj<450" , "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat0350", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); mcategory->AddMethod( "mjj>=450&&mjj<550" , "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat0450", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); mcategory->AddMethod( "mjj>=550&&mjj<750" , "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat0550", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); mcategory->AddMethod( "mjj>=750&&mjj<1000", "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat0750", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); mcategory->AddMethod( "mjj>=1000" , "mjj:detajj:spt:", TMVA::Types::kFisher, "Fisher_Cat1000", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); } // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:MaxDepth=2" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=25:PruneMethod=CostComplexity:PruneStrength=25.0:VarTransform=Decorrelate"); //"!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","FitGA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; std::cout << " ==> Weights are stored in " << TMVA::gConfig().GetIONames().fWeightFileDir << std::endl; delete factory; // Launch the GUI for the root macros // if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
int main(int argc, char* argv[]){ // Configurable parameters // int max_events; // Maximum number of events to process //string filelist; // The file containing a list of files to use as input //string input_prefix; // A prefix that will be added to the path of each input file string folder; string output_name; // Name of the ouput ROOT File string output_folder; // Folder to write the output in string paramfile; string paramfile2; string classname; bool twotag; bool onetag; po::options_description config("Configuration"); po::variables_map vm; po::notify(vm); config.add_options() ("folder", po::value<string>(&folder)->default_value("output/Paper_2012/")) // ("input_prefix", po::value<string>(&input_prefix)->default_value("")) ("output_name", po::value<string>(&output_name)->default_value("test_tmva.root")) ("output_folder", po::value<string>(&output_folder)->default_value("")) ("paramfile", po::value<string>(¶mfile)->default_value("./scripts/Paper_params_2012.dat")) ("paramfile2", po::value<string>(¶mfile2)->default_value("./scripts/TMVAinputshad.dat")) ("classname", po::value<string>(&classname)->default_value("HhhMVA")) ("twotag", po::value<bool>(&twotag)->default_value(true)) ("onetag", po::value<bool>(&onetag)->default_value(false)) ; po::store(po::command_line_parser(argc, argv). options(config).allow_unregistered().run(), vm); po::notify(vm); std::cout << "-------------------------------------" << std::endl; std::cout << "Train MVA" << std::endl; std::cout << "-------------------------------------" << std::endl; string param_fmt = "%-25s %-40s\n"; std::vector<string> bckglist; bckglist.push_back("TTJetsFullLept"); bckglist.push_back("TTJetsSemiLept"); bckglist.push_back("TTJetsHadronicExt"); // bckglist.push_back("WWJetsTo2L2Nu"); // bckglist.push_back("WZJetsTo2L2Q"); // bckglist.push_back("WZJetsTo3LNu"); // bckglist.push_back("ZZJetsTo2L2Nu"); // bckglist.push_back("ZZJetsTo2L2Q"); // bckglist.push_back("ZZJetsTo4L"); // bckglist.push_back("DYJetsToTauTauSoup"); // bckglist.push_back("DYJetsToLLSoup"); // bckglist.push_back("DYJetsToTauTau"); // bckglist.push_back("DYJetsToLL"); // bckglist.push_back("T-tW"); // bckglist.push_back("Tbar-tW"); std::vector<string> signallist; signallist.push_back("GluGluToHTohhTo2Tau2B_mH-300"); sample_names_.reserve(bckglist.size()+signallist.size()); sample_names_.insert(sample_names_.end(),bckglist.begin(),bckglist.end()); sample_names_.insert(sample_names_.end(),signallist.begin(),signallist.end()); std::vector<TFile*> BackgroundSamples; for(unsigned int iter=0;iter<bckglist.size();++iter){ BackgroundSamples.push_back(TFile::Open((folder+bckglist.at(iter)+"_mt_2012.root").c_str())); } std::vector<TFile*> SignalSamples; for(unsigned int sigIter=0;sigIter<signallist.size();++sigIter){ SignalSamples.push_back(TFile::Open((folder+signallist.at(sigIter)+"_mt_2012.root").c_str())); } std::vector<TTree*> backgroundTrees; for(unsigned int iter2=0;iter2<BackgroundSamples.size();++iter2){ backgroundTrees.push_back(dynamic_cast<TTree*>(BackgroundSamples.at(iter2)->Get("ntuple"))); } std::vector<TTree*> signalTrees; for(unsigned int sigIter2=0;sigIter2<SignalSamples.size();++sigIter2){ signalTrees.push_back(dynamic_cast<TTree*>(SignalSamples.at(sigIter2)->Get("ntuple"))); } TFile *outfile = new TFile((output_folder+output_name).c_str(),"RECREATE"); TMVA::Factory *factory = new TMVA::Factory(classname,outfile,"!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification"); std::vector<std::string> vars; std::ifstream parafile(paramfile2.c_str()); std::cout<<paramfile2.c_str()<<std::endl; string line; while(getline(parafile,line)){ vars.push_back(line); } parafile.close(); std::cout<<(vars.at(0)).c_str()<<std::endl; std::vector<float> var2; for(unsigned int variter=0;variter<vars.size();++variter){ var2.push_back(::atof((vars.at(variter)).c_str())); } for(unsigned int variter=0;variter<vars.size();++variter){ factory->AddVariable((vars.at(variter)).c_str(),(vars.at(variter)).c_str(),"",'F'); } factory->AddSpectator("mt_1","mt_1","",'F'); factory->AddSpectator("n_prebjets","n_prebjets","",'I'); factory->AddSpectator("prebjetbcsv_1","prebjetbcsv_1","",'F'); factory->AddSpectator("prebjetbcsv_2","prebjetbcsv_2","",'F'); double weightval_=0; ParseParamFile(paramfile); for(unsigned int bckgit=0;bckgit<backgroundTrees.size();++bckgit){ auto it = sample_info_.find(bckglist.at(bckgit).c_str()); if(it!=sample_info_.end()){ double evt = it->second.first; double xs = it->second.second; weightval_=(double) xs/evt; std::cout<<weightval_<<std::endl; } factory->AddBackgroundTree(backgroundTrees.at(bckgit),weightval_); } for(unsigned int sgit=0;sgit<signalTrees.size();++sgit){ auto it = sample_info_.find(signallist.at(sgit).c_str()); if(it!=sample_info_.end()){ double evt = it->second.first; double xs=it->second.second; weightval_=(Double_t) xs/evt; } std::cout<<weightval_<<std::endl; factory->AddSignalTree(signalTrees.at(sgit),weightval_); } factory->SetBackgroundWeightExpression("wt"); factory->SetSignalWeightExpression("wt"); TCut mycutb, mycuts; if(twotag){ mycutb="n_prebjets>1&&mt_1<30&&prebjetbcsv_1>0.679&&prebjetbcsv_2>0.679"; mycuts="n_prebjets>1&&mt_1<30&&prebjetbcsv_1>0.679&&prebjetbcsv_2>0.679"; } else if(onetag){ mycutb="n_prebjets>1&&mt_1<30&&prebjetbcsv_1>0.679&&prebjetbcsv_2<0.679"; mycuts="n_prebjets>1&&mt_1<30&&prebjetbcsv_1>0.679&&prebjetbcsv_2<0.679"; } else{ mycutb="n_prebjets>1&&mt_1<30"; mycuts="n_prebjets>1&&mt_1<30"; } //TCut mycutb=""; //TCut mycuts=""; factory->PrepareTrainingAndTestTree( mycuts, mycutb,"SplitMode=Random:!V"); factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); outfile->Close(); delete factory; return 0; }
void test2(){ //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); TString outfileName( "trainingBDT_tZq.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "BDT_trainning_tzq", outputFile,"!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); TFile *input_sig = TFile::Open( "../TreeReader/outputroot/histofile_tZq.root" ); TFile *input_wz = TFile::Open( "../TreeReader/outputroot/histofile_WZ.root" ); TTree *signal = (TTree*)input_sig->Get("Ttree_tZq"); TTree *background = (TTree*)input_wz->Get("Ttree_WZ"); factory->AddSignalTree ( signal, 1.); factory->AddBackgroundTree( background, 1.); std::vector<TString > varList; varList.push_back("tree_cosThetaStar");; varList.push_back("tree_topMass"); varList.push_back("tree_totMass"); varList.push_back("tree_deltaPhilb"); varList.push_back("tree_deltaRlb"); varList.push_back("tree_deltaRTopZ"); varList.push_back("tree_asym"); varList.push_back("tree_Zpt"); varList.push_back("tree_ZEta"); varList.push_back("tree_topPt"); varList.push_back("tree_topEta"); varList.push_back("tree_NJets"); varList.push_back("tree_NBJets"); varList.push_back("tree_deltaRZl"); varList.push_back("tree_deltaPhiZmet"); varList.push_back("tree_btagDiscri"); varList.push_back("tree_totPt"); varList.push_back("tree_totEta"); varList.push_back("tree_leptWPt"); varList.push_back("tree_leptWEta"); varList.push_back("tree_leadJetPt"); varList.push_back("tree_leadJetEta"); varList.push_back("tree_deltaRZleptW"); varList.push_back("tree_deltaPhiZleptW"); varList.push_back("tree_met" ); varList.push_back("tree_mTW" ); for(unsigned int i=0; i< varList.size() ; i++) factory->AddVariable( varList[i].Data(), 'F'); factory->SetSignalWeightExpression ("tree_EvtWeight"); factory->SetBackgroundWeightExpression("tree_EvtWeight"); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); //factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); // factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:nEventsMin=100:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:nEventsMin=100:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVAClassification( TString myMethodList = "", int isMC=1, int useSvtx=1) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // to get access to the GUI and all tmva macros TString thisdir = gSystem->DirName(gInterpreter->GetCurrentMacroName()); gROOT->SetMacroPath(thisdir + ":" + gROOT->GetMacroPath()); //gROOT->ProcessLine(".L /Users/kjung/root5-34-23/tmva/test/TMVAGui.C"); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 0; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 0; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 0; // uses Adaptive Boost Use["BDTG"] = 1; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName; string svtxExt = "noSvtx"; if(useSvtx) svtxExt = "withSvtx"; if(!isMC) outfileName = "TMVA_trained_data.root"; else outfileName = Form("TMVA_trained_cJet_medDCuts_BvC_%s.root",svtxExt.c_str()); cout << "fn: "<< outfileName << endl; TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); outputFile->cd(); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] //factory->AddVariable( "myvar1 := var1+var2", 'F' ); if(useSvtx) factory->AddVariable("svtxptFrac","svtx pt fraction","units",'F'); //factory->AddVariable( "djetR", "Closest Meson to Jet dr", "", 'F' ); factory->AddVariable( "nIP","number of IP trks","units",'I'); if(useSvtx) factory->AddVariable( "svtxm", "svtx mass", "units", 'F'); if(useSvtx) factory->AddVariable( "svtxmEnergyFrac","svtxmEnergyFrac","units",'F'); if(useSvtx) factory->AddVariable( "svtxpt", "svtx pt", "units", 'F'); if(useSvtx) factory->AddVariable( "svtxmcorr", "corrected svtx mass", "units", 'F'); if(useSvtx) factory->AddVariable( "svtxdl", "svtx displacement", "units", 'F'); if(useSvtx) factory->AddVariable( "svtxdls", "svtx displacement significance", "units", 'F'); if(useSvtx) factory->AddVariable( "svtxntrk", "svtx ntracks", "units", 'F'); if(useSvtx) factory->AddVariable( "sv2Trkdl", "2trk svtx close2PV dl", "units", 'F'); if(useSvtx) factory->AddVariable( "svtxTrkSumChi2", "svtx trk sum chi2", "units", 'F'); if(useSvtx) factory->AddVariable( "svtxTrkNetCharge", "svtx trk net chg", "units", 'F'); if(useSvtx) factory->AddVariable( "svtxNtrkInCone", "svtx ntrk in cone", "units", 'F'); //factory->AddVariable( "jteta", "Jet eta", "units", 'F' ); factory->AddVariable( "closestDMass","Closest DMass", "units", 'F' ); factory->AddVariable( "closestDType","Closest Type","units",'F'); factory->AddVariable( "closestDPt","Closest DpT", "units", 'F' ); /*factory->AddVariable( "chargedMax","chargedMax","units",'F'); factory->AddVariable( "chargedSum","chargedSum","units",'F'); factory->AddVariable( "neutralMax","neutralMax","units",'F'); factory->AddVariable( "neutralSum","neutralSum","units",'F'); factory->AddVariable( "photonMax","photonMax","units",'F'); factory->AddVariable( "photonSum","photonSum","units",'F'); factory->AddVariable( "eSum","eSum","units",'F'); factory->AddVariable( "muSum","muSum","units",'F');*/ for(int i=0; i<3; i++){ //factory->AddVariable( Form("ipProb0_%d",i),Form("prob0 IP part %d",i),"units",'F'); //factory->AddVariable( Form("ipPt_%d",i),Form("IP pt part %d",i),"units",'F'); factory->AddVariable( Form("trackIP2dSig_%d",i),Form("IP trk 2d sig part %d",i),"units",'F'); factory->AddVariable( Form("trackIP3dSig_%d",i),Form("IP trk 3d sig part %d",i),"units",'F'); factory->AddVariable( Form("trackIP2d_%d",i),Form("IP trk 2d part %d",i),"units",'F'); factory->AddVariable( Form("trackIP3d_%d",i),Form("IP trk 3d part %d",i),"units",'F'); } for(int i=0; i<1; i++){ //factory->AddVariable( Form("trackPtRel_%d",i),Form("pt rel part %d",i),"units",'F'); //factory->AddVariable( Form("trackPPar_%d",i),Form("track ppar part %d",i),"units",'F'); //factory->AddVariable( Form("trackPParRatio_%d",i),Form("track ppar part %d",i),"units",'F'); factory->AddVariable( Form("trackJetDist_%d",i),Form("dist to jet part %d",i),"units",'F'); factory->AddVariable( Form("trackDecayLenVal_%d",i),Form("trk decay len part %d",i),"units",'F'); //factory->AddVariable( Form("trackDeltaR_%d",i),Form("trk dr to jet part %d",i),"units",'F'); //factory->AddVariable( Form("trackPtRatio_%d",i),Form("trk pt ratio part %d",i),"units",'F'); } //factory->AddVariable( "trackSip2dSigAboveCharm","trackSip2dSigAboveCharm","units",'F'); //factory->AddVariable( "trackSip3dSigAboveCharm","trackSip3dSigAboveCharm","units",'F'); //factory->AddVariable( "trackSip2dValAboveCharm","trackSip2dValAboveCharm","units",'F'); //factory->AddVariable( "trackSip3dValAboveCharm","trackSip3dValAboveCharm","units",'F'); //factory->AddVariable( "svJetDeltaR","svJetDeltaR","units",'F'); factory->AddVariable( "trackSumJetDeltaR","trackSumJetDeltaR","units",'F'); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables //factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); if(isMC) factory->AddSpectator( "refpt", "ref pT", "units", 'F' ); factory->AddSpectator( "rawpt", "raw pT", "units", 'F' ); //factory->AddSpectator( "dCandPt", "D-Meson pT", "units" , 'F' ); if(isMC) factory->AddSpectator( "refparton_flavorForB", "jet flavor", "units" , 'F' ); //factory->AddSpectator( "evtSelection", "event selection", "units" , 'F' ); //factory->AddSpectator( "vz", "z-vertex", "units" , 'F' ); //if(isMC) factory->AddSpectator( "subid", "subid", "units" , 'F' ); //factory->AddSpectator( "pthat", "pthat", "units" , 'F' ); //factory->AddSpectator( "run", "run", "units" , 'I' ); //factory->AddSpectator( "bin", "centrality", "units" , 'I' ); factory->AddSpectator( "jtpt", "Jet pT", "units", 'F' ); // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) TString fname; if(!isMC) fname = "/Users/kjung/charmJets/pPb/input/DMesonCJet_pPbData_ppReco_akPu3PF_convertToJetTree_withLHCbVars_medDCuts.root"; else fname = "/Users/kjung/charmJets/pPb/input/DMesonCJet_QCDJetOnly_pPbMC_ppReco_akPu3PF_convertToJetTree_medDCuts.root"; //if (gSystem->AccessPathName( fname )) // file does not exist in local directory // gSystem->Exec("curl -O http://root.cern.ch/files/tmva_class_example.root"); TFile *input = TFile::Open( fname ); std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl; // --- Register the training and test trees TTree *signal, *background; if(useSvtx){ signal = (TTree*)input->Get("jets"); background = (TTree*)input->Get("jets"); } else{ signal = (TTree*)input->Get("jetsNoSvtx"); background = (TTree*)input->Get("jetsNoSvtx"); } TTree *signal_2 = (TTree*)input->Get("dMesons"); TTree *background_2 = (TTree*)input->Get("dMesons"); signal->AddFriend(signal_2); background->AddFriend(background_2); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); // To give different trees for training and testing, do as follows: // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // // // --- begin ---------------------------------------------------------- // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables // Float_t treevars[4], weight; // // // Signal const int nvars = 35; //67; const int nvarsWithInt = 2; double weight; std::vector<double> vars(nvars); double treevars[nvars-nvarsWithInt]; int treevars2[nvarsWithInt]; //std::string variables[nvars] = {"djetR","closestDPt","svtxm","svtxdl","jtpt","refpt","rawpt","refparton_flavorForB","svtxdls","trackIP2dSig_0","trackIP3dSig_0","trackIP2d_0","trackIP3d_0","ipProb0_0","trackPtRel_0","trackPPar_0","trackPParRatio_0","trackJetDist_0","trackDecayLenVal_0","trackDeltaR_0","trackPtRatio_0","ipPt_0","trackIP2dSig_1","trackIP3dSig_1","trackIP2d_1","trackIP3d_1","ipProb0_1","trackPtRel_1","trackPPar_1","trackPParRatio_1","trackJetDist_1","trackDecayLenVal_1","trackDeltaR_1","trackPtRatio_1","ipPt_1","trackIP2dSig_2","trackIP3dSig_2","trackIP2d_2","trackIP3d_2","ipProb0_2","trackPtRel_2","trackPPar_2","trackPParRatio_2","trackJetDist_2","trackDecayLenVal_2","trackDeltaR_2","trackPtRatio_2","ipPt_2","trackIP2dSig_3","trackIP3dSig_3","trackIP2d_3","trackIP3d_3","ipProb0_3","trackPtRel_3","trackPPar_3","trackPParRatio_3","trackJetDist_3","trackDecayLenVal_3","trackDeltaR_3","trackPtRatio_3","ipPt_3","trackSip2dValAboveCharm","trackSip3dValAboveCharm","svJetDeltaR","trackSumJetDeltaR","nIP","svtxntrk"}; std::string variables[nvars] = {"jtpt","refpt","rawpt","refparton_flavorForB","svtxptFrac","svtxmEnergyFrac","svtxpt","svtxm", "svtxdl","svtxdls","svtxTrkSumChi2","svtxTrkNetCharge","sv2Trkdl", "closestDMass","closestDType","closestDPt","trackIP2dSig_0","trackIP2dSig_1", "trackIP2dSig_2","trackIP3dSig_0","trackIP3dSig_1","trackIP3dSig_2","trackIP2d_0","trackIP2d_1", "trackIP2d_2","trackIP3d_0","trackIP3d_1","trackIP3d_2","trackJetDist_0","trackDecayLenVal_0","svJetDeltaR", "trackSumJetDeltaR","svtxNtrkInCone","svtxntrk","nIP"}; //std::string variables[nvars] = {"jtpt","refpt","rawpt","refparton_flavorForB","svtxptFrac","svtxdl","svtxdls","closestDPt","closestDType","closestDMass","svtxm","svtxmcorr","svJetDeltaR","trackSumJetDeltaR","svtxpt","sv2Trkdl","svtxTrkSumChi2","svtxTrkNetCharge","svtxNtrkInCone","svtxntrk"}; signal->SetBranchAddress("weight", &weight); for (UInt_t ivar=0; ivar<nvars-nvarsWithInt; ivar++) signal->SetBranchAddress( variables[ivar].c_str(), &(treevars[ivar]) ); for (UInt_t ivar=nvars-nvarsWithInt; ivar<nvars; ivar++) signal->SetBranchAddress( variables[ivar].c_str(), &(treevars2[ivar]) ); for (UInt_t i=0; i<signal->GetEntries(); i++) { signal->GetEntry(i); for (UInt_t ivar=0; ivar<nvars-nvarsWithInt; ivar++) vars[ivar] = treevars[ivar]; for (UInt_t ivar=nvars-nvarsWithInt; ivar<nvars; ivar++) vars[ivar] = treevars2[ivar]; // add training and test events; here: first half is training, second is testing // note that the weight can also be event-wise //for(int ij=0; ij<nvars; ij++) cout << ij << " " << vars[ij] << endl; if(isMC && (abs(vars[3])==4)) { if (i%2==0) factory->AddSignalTrainingEvent( vars, weight ); else factory->AddSignalTestEvent ( vars, weight ); } } // // // Background (has event weights) background->SetBranchAddress( "weight", &weight ); for (UInt_t ivar=0; ivar<nvars-nvarsWithInt; ivar++) background->SetBranchAddress( variables[ivar].c_str(), &(treevars[ivar]) ); for (UInt_t ivar=nvars-nvarsWithInt; ivar<nvars; ivar++) background->SetBranchAddress( variables[ivar].c_str(), &(treevars2[ivar]) ); for (UInt_t i=0; i<background->GetEntries(); i++) { background->GetEntry(i); for (UInt_t ivar=0; ivar<nvars-nvarsWithInt; ivar++) vars[ivar] = treevars[ivar]; for (UInt_t ivar=nvars-nvarsWithInt; ivar<nvars; ivar++) vars[ivar] = treevars2[ivar]; // add training and test events; here: first half is training, second is testing // note that the weight can also be event-wise if(isMC && (abs(vars[3])==5)) { if (i%2==0) factory->AddBackgroundTrainingEvent( vars, weight ); else factory->AddBackgroundTestEvent ( vars, weight ); } } // --- end ------------------------------------------------------------ // // --- end of tree registration // Set individual event weights (the variables must exist in the original TTree) // for signal : factory->SetSignalWeightExpression ("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); factory->SetSignalWeightExpression("weight"); factory->SetBackgroundWeightExpression( "weight" ); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", //"!H:!V:NTrees=850:MinNodeSize=2%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" ); "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.9:SeparationType=GiniIndex:nCuts=500:MaxDepth=2" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // ---- STILL EXPERIMENTAL and only implemented for BDT's ! // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","FitGA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros //if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVAClassification( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // to get access to the GUI and all tmva macros TString tmva_dir(TString(gRootDir) + "/tmva"); if(gSystem->Getenv("TMVASYS")) tmva_dir = TString(gSystem->Getenv("TMVASYS")); gROOT->SetMacroPath(tmva_dir + "/test/:" + gROOT->GetMacroPath() ); gROOT->ProcessLine(".L TMVAGui.C"); gROOT->ProcessLine(".L BDT.C"); gROOT->ProcessLine(".L BDTControlPlots.C"); gROOT->ProcessLine(".L BDT_Reg.C"); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // // Turn on-off the MVA to run // // --- Cut optimisation Use["Cuts"] = 0; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 1; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 0; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 1; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 0; // uses Adaptive Boost Use["BDTG"] = 1; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. string outFileName = "TMVA_" + sSelection[mode-1] + ".root"; TString outfileName( outFileName.c_str()); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] /* factory->AddVariable( "myvar1 := var1+var2", 'F' ); factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ); factory->AddVariable( "var3", "Variable 3", "units", 'F' ); factory->AddVariable( "var4", "Variable 4", "units", 'F' ); */ //Not order matter for the CutRange declarations if(mode==1){ //SRZjets inside Z use mT2, pTll, mjj, dR_ll, mEff, mWWT factory->AddVariable( "mT2", 'F'); factory->AddVariable( "pTll", 'F'); factory->AddVariable( "mjj", 'F'); factory->AddVariable( "dR_ll", 'F'); factory->AddVariable( "mEff", 'F'); factory->AddVariable( "mWWT", 'F'); factory->AddVariable( "mctPerp", 'F'); factory->AddVariable( "metrel", 'F'); factory->AddVariable( "met/mEff", 'F'); factory->AddVariable( "mT2jj", 'F'); factory->AddVariable( "sphericity", 'F'); factory->AddVariable( "sphericityTrans", 'F'); factory->AddVariable( "abs(llAcoplanarity+3.1415)", 'F'); factory->AddVariable( "abs(jjAcoplanarity+3.1415)", 'F'); factory->AddVariable( "mTl[0]", 'F'); factory->AddVariable( "mTl[1]", 'F'); //factory->AddVariable( "mctPara", 'F'); //issue with var content NAN } else if(mode==2){ factory->AddVariable( "mT2", 'F'); factory->AddVariable( "pTll", 'F'); factory->AddVariable( "mjj", 'F'); factory->AddVariable( "dR_ll", 'F'); factory->AddVariable( "mEff", 'F'); factory->AddVariable( "mWWT", 'F'); factory->AddVariable( "mctPerp", 'F'); factory->AddVariable( "metrel", 'F'); factory->AddVariable( "met/mEff", 'F'); factory->AddVariable( "mT2jj", 'F'); factory->AddVariable( "sphericity", 'F'); factory->AddVariable( "sphericityTrans", 'F'); factory->AddVariable( "abs(llAcoplanarity+3.1415)", 'F'); factory->AddVariable( "abs(jjAcoplanarity+3.1415)", 'F'); factory->AddVariable( "mTl[0]", 'F'); factory->AddVariable( "mTl[1]", 'F'); } else if(mode==3){ factory->AddVariable( "mT2", 'F'); factory->AddVariable( "pTll", 'F'); factory->AddVariable( "mjj", 'F'); factory->AddVariable( "dR_ll", 'F'); factory->AddVariable( "mEff", 'F'); factory->AddVariable( "mWWT", 'F'); factory->AddVariable( "mctPerp", 'F'); factory->AddVariable( "metrel", 'F'); factory->AddVariable( "met/mEff", 'F'); factory->AddVariable( "mT2jj", 'F'); factory->AddVariable( "sphericity", 'F'); factory->AddVariable( "sphericityTrans", 'F'); factory->AddVariable( "abs(llAcoplanarity+3.1415)", 'F'); factory->AddVariable( "abs(jjAcoplanarity+3.1415)", 'F'); factory->AddVariable( "mTl[0]", 'F'); factory->AddVariable( "mTl[1]", 'F'); } else if(mode==4){ factory->AddVariable( "mT2", 'F'); factory->AddVariable( "pTll", 'F'); factory->AddVariable( "dR_ll", 'F'); factory->AddVariable( "mWWT", 'F'); factory->AddVariable( "metrel", 'F'); factory->AddVariable( "mTl[0]", 'F'); factory->AddVariable( "mTl[1]", 'F'); } // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables //factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); //factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) //TString fname = "./tmva_class_example.root"; TString fname_bkg = string(getenv("HISTOANA")) + "/SusyAna/" + ver + "toyNt_Bkg_Zjets_SherpaAlpgen_WZ_ZZ_PowHeg_WW_PowHeg_TopMCNLO_" + toySkim + "_rlep.root"; if(toySkim == "DIL_optimSRSS") fname_bkg = string(getenv("HISTOANA")) + "/SusyAna/" + ver + "toyNt_Bkg_Zjets_SherpaAlpgen_WZ_ZZ_PowHeg_WW_PowHeg_TopMCNLO_FAKE_" + toySkim + ".root"; std::cout << "Bkg file: " << fname_bkg << std::endl; TFile* fBkg = TFile::Open( fname_bkg.Data() ); TString fname_sig; fname_sig = string(getenv("HISTOANA")) + "/SusyAna/" + ver + "ToyNtOutputs/"; if(mode==1) fname_sig += "164339_" + toySkim + ".root"; //wA_noslep_300_50 else if(mode==2) fname_sig += "164326_" + toySkim + ".root"; //wA_noslep_150_0 else if(mode==3) fname_sig += "157955_" + toySkim + ".root"; //wA_slep_142_107 //else if(mode==4) fname_sig += "176559_" + toySkim + ".root"; //wC_slep_117_47 else if(mode==4) fname_sig += "144907_" + toySkim + ".root"; //wC_slep_150_50 std::cout << "Signal file: " << fname_sig << std::endl; TFile* fSig = TFile::Open( fname_sig.Data() ); //if (gSystem->AccessPathName( fname )) // file does not exist in local directory // gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root"); std::cout << "--- TMVAClassification : Using input Bkg file: " << fBkg->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input Sig file: " << fSig->GetName() << std::endl; // --- Register the training and test trees TTree *signal = (TTree*)fSig->Get("ToyNt"); TTree *background = (TTree*)fBkg->Get("ToyNt"); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); // To give different trees for training and testing, do as follows: // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // // // --- begin ---------------------------------------------------------- // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables // Float_t treevars[4], weight; // // // Signal // for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<signal->GetEntries(); i++) { // signal->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight ); // else factory->AddSignalTestEvent ( vars, signalWeight ); // } // // // Background (has event weights) // background->SetBranchAddress( "weight", &weight ); // for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<background->GetEntries(); i++) { // background->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight ); // else factory->AddBackgroundTestEvent ( vars, backgroundWeight*weight ); // } // --- end ------------------------------------------------------------ // // --- end of tree registration // Set individual event weights (the variables must exist in the original TTree) // for signal : factory->SetSignalWeightExpression ("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); factory->SetBackgroundWeightExpression( "w" ); factory->SetSignalWeightExpression( "w"); // Apply additional cuts on the signal and background samples (can be different) // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycuts = ""; if(mode==1){ //Optimisation inside Z peak mycuts = "(llType==0 || llType==1) && abs(mll-91.2)<10 && nCJets>=2 \ && j_pt[0]>20 && j_isC20[0] && Alt$(j_pt[1],0) && Alt$(j_isC20[1],0)"; /* \&& mEff>250 && met/mEff>0.3"; */ } else if(mode==2){ mycuts = "(llType==0 || llType==1) && abs(mll-91.2)<10 && nCJets>=2 \ && j_pt[0]>20 && j_isC20[0] && Alt$(j_pt[1],0) && Alt$(j_isC20[1],0) \ && mT2jj>60"; } else if(mode==3){ mycuts = "llType==1 && nBJets==0 &&nFJets==0 "; } else if(mode==4){ mycuts = "llType==2"; } /* //Optimation close to diagonal TCut mycuts = "(llType==0 || llType==1) && mll<50 && nCJets>=2 \ && j_pt[0]>20 && j_isC20[0] && Alt$(j_pt[1],0) && Alt$(j_isC20[1],0)"; */ // for example: TCut mycutb = "abs(var1)<0.5"; TCut mycutb = mycuts; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); if(mode==1) factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=4000:nTrain_Background=100000:SplitMode=Random:NormMode=NumEvents:!V" ); else if(mode==2) factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=4000:nTrain_Background=100000:SplitMode=Random:NormMode=NumEvents:!V" ); else if(mode==3) factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=1000:nTrain_Background=100000:SplitMode=Random:NormMode=NumEvents:!V" ); else if(mode==4) factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=1000:nTrain_Background=100000:SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:V:FitMethod=MC:CutRangeMin[0]=0:CutRangeMax[0]=1000:CutRangeMin[1]=0:CutRangeMax[1]=1000:CutRangeMin[2]=0:CutRangeMax[2]=2000:CutRangeMin[3]=0:CutRangeMax[3]=5:CutRangeMin[4]=0:CutRangeMax[4]=3000:CutRangeMin[5]=0:CutRangeMax[5]=2000:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]){ if(mode==1) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "!H:V:FitMethod=GA:CutRangeMin[0]=0:CutRangeMax[0]=1000:CutRangeMin[1]=0:CutRangeMax[1]=1000:CutRangeMin[2]=0:CutRangeMax[2]=2000:CutRangeMin[3]=0:CutRangeMax[3]=5:CutRangeMin[4]=0:CutRangeMax[4]=3000:CutRangeMin[5]=0:CutRangeMax[5]=2000:VarProp=FSmart:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); else if(mode==2) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "!H:V:FitMethod=GA:CutRangeMin[0]=0:CutRangeMax[0]=1000:CutRangeMin[1]=0:CutRangeMax[1]=3000:CutRangeMin[2]=0:CutRangeMax[2]=3000:CutRangeMin[3]=0:CutRangeMax[3]=1000:CutRangeMin[4]=0:CutRangeMax[4]=1000:CutRangeMin[5]=0:CutRangeMax[5]=3000:VarProp=FSmart:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); else if(mode==3) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "!H:V:FitMethod=GA:CutRangeMin[0]=0:CutRangeMax[0]=1000:CutRangeMin[1]=0:CutRangeMax[1]=2000:CutRangeMin[2]=0:CutRangeMax[2]=1000:CutRangeMin[3]=0:CutRangeMax[3]=5:VarProp=FSmart:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); } if (Use["CutsSA"]){ if(mode==1) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:CutRangeMin[0]=0:CutRangeMax[0]=1000:CutRangeMin[1]=0:CutRangeMax[1]=1000:CutRangeMin[2]=0:CutRangeMax[2]=2000:CutRangeMin[3]=0:CutRangeMax[3]=5:CutRangeMin[4]=0:CutRangeMax[4]=3000:CutRangeMin[5]=0:CutRangeMax[5]=2000:VarProp=FSmart:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); else if(mode==2) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:CutRangeMin[0]=0:CutRangeMax[0]=1000:CutRangeMin[1]=0:CutRangeMax[1]=3000:CutRangeMin[2]=0:CutRangeMax[2]=3000:CutRangeMin[3]=0:CutRangeMax[3]=1000:CutRangeMin[4]=0:CutRangeMax[4]=1000:CutRangeMin[5]=0:CutRangeMax[5]=3000:VarProp=FSmart:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); else if(mode==3) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:CutRangeMin[0]=0:CutRangeMax[0]=1000:CutRangeMin[1]=0:CutRangeMax[1]=2000:CutRangeMin[2]=0:CutRangeMax[2]=1000:CutRangeMin[3]=0:CutRangeMax[3]=5:VarProp=FSmart:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); } // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "!H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=N+1:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3:IgnoreNegWeightsInTraining=True" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees [MY DEFAULT] if (Use["BDTG"]){ // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=300:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=10:NNodesMax=5:IgnoreNegWeightsInTraining=True" ); } if (Use["BDT"]){ // Adaptive Boost if(mode<3){ factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:V:NTrees=100:nEventsMin=2000:MaxDepth=4:UseRandomisedTrees=True:UseNVars=4:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:PruneStrength=-1:IgnoreNegWeightsInTraining=True" ); } else if(mode==3){ factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:V:NTrees=50:nEventsMin=2000:MaxDepth=4:UseRandomisedTrees=True:UseNVars=4:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:PruneStrength=-1:IgnoreNegWeightsInTraining=True" ); } } if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:nEventsMin=150:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","GA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
std::pair<TString,TString> TMVAClassification ( TString infilename, AnalysisType analysisType = AnalysisType::DIRECT, TString additionalRootFileName = "") { TMVA::Tools::Instance(); std::string tmstr (now ()); TString tmstmp (tmstr.c_str ()); std::cout << "==> Start TMVAClassification" << std::endl; std::cout << "-------------------- open input file ---------------- " << std::endl; TString fname = infilename; //pathToData + infilename + TString (".root"); if (analysisType != AnalysisType::TRANSFORMED) fname = pathToData + infilename + TString (".root"); std::cout << "open file " << std::endl << fname.Data () << std::endl; std::cout << "-------------------- get tree ---------------- " << std::endl; TString treeName = "data"; if (analysisType == AnalysisType::TRANSFORMED) treeName = "transformed"; std::cout << "-------------------- create tchain with treeName ---------------- " << std::endl; std::cout << treeName << std::endl; TChain* tree = new TChain (treeName); std::cout << "add file" << std::endl; std::cout << fname << std::endl; tree->Add (fname); TChain* treeFriend (NULL); if (additionalRootFileName.Length () > 0) { std::cout << "-------------------- add additional input file ---------------- " << std::endl; std::cout << additionalRootFileName << std::endl; treeFriend = new TChain (treeName); treeFriend->Add (additionalRootFileName); tree->AddFriend (treeFriend,"p"); } // tree->Draw ("mass:prediction"); // return std::make_pair(TString("hallo"),TString ("nix")); TString outfileName; if (analysisType == AnalysisType::BACKGROUND) { outfileName = TString ("BACK_" + infilename) + tmstmp + TString (".root"); } else outfileName += TString ( "TMVA__" ) + tmstmp + TString (".root"); std::cout << "-------------------- open output file ---------------- " << std::endl; TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); std::cout << "-------------------- prepare factory ---------------- " << std::endl; TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "AnalysisType=Classification:Transformations=I:!V" ); std::cout << "-------------------- add variables ---------------- " << std::endl; for (auto varname : variableNames) { factory->AddVariable (varname.c_str (), 'F'); } for (auto varname : spectatorNames) { factory->AddSpectator (varname.c_str (), 'F'); } std::cout << "-------------------- add trees ---------------- " << std::endl; TCut signalCut ("signal==1"); TCut backgroundCut ("signal==0"); if (analysisType == AnalysisType::TRANSFORMED) { signalCut = "(signal_original==1 && signal_in==0)"; backgroundCut = "(signal_original==0 && signal_in==0)"; } if (analysisType == AnalysisType::BACKGROUND) { signalCut = TString("(signal==0) * (prediction > 0.7)"); backgroundCut = TString("(signal==0) * (prediction < 0.4)"); } //tree->Draw ("prediction",signalCut); //return std::make_pair(TString("hallo"),TString ("nix")); factory->AddTree(tree, "Signal", 1.0, baseCut + signalCut, "TrainingTesting"); factory->AddTree(tree, "Background", 1.0, baseCut + backgroundCut, "TrainingTesting"); TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; /* // Set individual event weights (the variables must exist in the original TTree) */ if (analysisType == AnalysisType::BACKGROUND) { factory->SetSignalWeightExpression ("prediction"); factory->SetBackgroundWeightExpression ("1"); } std::cout << "-------------------- prepare ---------------- " << std::endl; factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=0:nTest_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); TString methodName (""); if (analysisType == AnalysisType::BACKGROUND) methodName = TString ("TONBKG_") + tmstmp; if (false) { // gradient boosting training methodName += TString("GBDT"); factory->BookMethod(TMVA::Types::kBDT, methodName, "NTrees=40:BoostType=Grad:Shrinkage=0.01:MaxDepth=7:UseNvars=6:nCuts=20:MinNodeSize=10"); } if (false) { methodName += TString("Likelihood"); factory->BookMethod( TMVA::Types::kLikelihood, methodName, "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); } if (false) { TString layoutString ("Layout=TANH|100,LINEAR"); TString training0 ("LearningRate=1e-1,Momentum=0.0,Repetitions=1,ConvergenceSteps=300,BatchSize=20,TestRepetitions=15,WeightDecay=0.001,Regularization=NONE,DropConfig=0.0+0.5+0.5+0.5,DropRepetitions=1,Multithreading=True"); TString training1 ("LearningRate=1e-2,Momentum=0.5,Repetitions=1,ConvergenceSteps=300,BatchSize=30,TestRepetitions=7,WeightDecay=0.001,Regularization=L2,Multithreading=True,DropConfig=0.0+0.1+0.1+0.1,DropRepetitions=1"); TString training2 ("LearningRate=1e-2,Momentum=0.3,Repetitions=1,ConvergenceSteps=300,BatchSize=40,TestRepetitions=7,WeightDecay=0.0001,Regularization=L2,Multithreading=True"); TString training3 ("LearningRate=1e-3,Momentum=0.1,Repetitions=1,ConvergenceSteps=200,BatchSize=70,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True"); TString trainingStrategyString ("TrainingStrategy="); trainingStrategyString += training0 + "|" + training1 + "|" + training2 + "|" + training3; TString nnOptions ("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=G:WeightInitialization=XAVIERUNIFORM"); nnOptions.Append (":"); nnOptions.Append (layoutString); nnOptions.Append (":"); nnOptions.Append (trainingStrategyString); methodName += TString("NNgauss"); factory->BookMethod( TMVA::Types::kNN, methodName, nnOptions ); // NN } if (false) { TString layoutString ("Layout=TANH|200,TANH|70,LINEAR"); TString training0 ("LearningRate=1e-2,Momentum=0.0,Repetitions=1,ConvergenceSteps=300,BatchSize=20,TestRepetitions=15,WeightDecay=0.001,Regularization=NONE,DropConfig=0.0+0.5+0.5+0.5,DropRepetitions=1,Multithreading=True"); TString training1 ("LearningRate=1e-3,Momentum=0.5,Repetitions=1,ConvergenceSteps=300,BatchSize=30,TestRepetitions=7,WeightDecay=0.001,Regularization=L2,Multithreading=True,DropConfig=0.0+0.1+0.1+0.1,DropRepetitions=1"); TString training2 ("LearningRate=1e-4,Momentum=0.3,Repetitions=1,ConvergenceSteps=300,BatchSize=40,TestRepetitions=7,WeightDecay=0.0001,Regularization=L2,Multithreading=True"); TString training3 ("LearningRate=1e-5,Momentum=0.1,Repetitions=1,ConvergenceSteps=200,BatchSize=70,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True"); TString trainingStrategyString ("TrainingStrategy="); trainingStrategyString += training0 + "|" + training1 + "|" + training2 + "|" + training3; // trainingStrategyString += training0 + "|" + training2 + "|" + training3; // trainingStrategyString += training0 + "|" + training2; // TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CROSSENTROPY"); TString nnOptions ("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=N:WeightInitialization=XAVIERUNIFORM"); // TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CHECKGRADIENTS"); nnOptions.Append (":"); nnOptions.Append (layoutString); nnOptions.Append (":"); nnOptions.Append (trainingStrategyString); methodName = TString("NNnormalized"); factory->BookMethod( TMVA::Types::kNN, methodName, nnOptions ); // NN } if (true) { TString layoutString ("Layout=TANH|100,TANH|50,LINEAR"); TString training0 ("LearningRate=1e-2,Momentum=0.0,Repetitions=1,ConvergenceSteps=100,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Regularization=NONE,DropConfig=0.0+0.5+0.5+0.5,DropRepetitions=1,Multithreading=True"); TString training1 ("LearningRate=1e-3,Momentum=0.0,Repetitions=1,ConvergenceSteps=20,BatchSize=30,TestRepetitions=7,WeightDecay=0.001,Regularization=L2,Multithreading=True,DropConfig=0.0+0.1+0.1+0.1,DropRepetitions=1"); TString training2 ("LearningRate=1e-4,Momentum=0.0,Repetitions=1,ConvergenceSteps=20,BatchSize=40,TestRepetitions=7,WeightDecay=0.0001,Regularization=L2,Multithreading=True"); TString training3 ("LearningRate=1e-5,Momentum=0.0,Repetitions=1,ConvergenceSteps=30,BatchSize=70,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True"); TString trainingStrategyString ("TrainingStrategy="); trainingStrategyString += training0 + "|" + training1 + "|" + training2 + "|" + training3; TString nnOptions ("!H:!V:ErrorStrategy=CROSSENTROPY:VarTransform=P+G:WeightInitialization=XAVIERUNIFORM"); nnOptions.Append (":"); nnOptions.Append (layoutString); nnOptions.Append (":"); nnOptions.Append (trainingStrategyString); methodName += TString("NNPG"); factory->BookMethod( TMVA::Types::kNN, methodName, nnOptions ); // NN } factory->TrainAllMethods(); // return std::make_pair(TString("hallo"),TString ("nix")); factory->TestAllMethods(); factory->EvaluateAllMethods(); //input->Close(); outputFile->Close(); // TMVA::TMVAGui (outfileName); delete factory; delete tree; switch (analysisType) { case AnalysisType::BACKGROUND: std::cout << "DONE BACKGROUND" << std::endl; break; case AnalysisType::DIRECT: std::cout << "DONE DIRECT" << std::endl; break; case AnalysisType::TRANSFORMED: std::cout << "DONE TRANSFORMED" << std::endl; break; }; std::cout << "classification, return : " << outfileName << " , " << methodName << std::endl; return std::make_pair (outfileName, methodName); }