void regressphi() { TMVA::Tools::Instance(); std::cout << "==> Start TMVAClassification" << std::endl; // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "mva", outputFile, "!V:!Silent:Color:DrawProgressBar" ); factory->AddVariable( "npv" , 'F' ); factory->AddVariable( "u" , 'F' ); factory->AddVariable( "uphi" , 'F' ); factory->AddVariable( "chsumet/sumet" , 'F' ); factory->AddVariable( "tku" , 'F' ); factory->AddVariable( "tkuphi" , 'F' ); factory->AddVariable( "nopusumet/sumet" , 'F' ); factory->AddVariable( "nopuu" , 'F' ); factory->AddVariable( "nopuuphi" , 'F' ); factory->AddVariable( "pusumet/sumet" , 'F' ); factory->AddVariable( "pumet" , 'F' ); factory->AddVariable( "pumetphi" , 'F' ); factory->AddVariable( "pucsumet/sumet" , 'F' ); factory->AddVariable( "pucu" , 'F' ); factory->AddVariable( "pucuphi" , 'F' ); factory->AddVariable( "jspt_1" , 'F' ); factory->AddVariable( "jseta_1" , 'F' ); factory->AddVariable( "jsphi_1" , 'F' ); factory->AddVariable( "jspt_2" , 'F' ); factory->AddVariable( "jseta_2" , 'F' ); factory->AddVariable( "jsphi_2" , 'F' ); factory->AddVariable( "nalljet" , 'I' ); factory->AddVariable( "njet" , 'I' ); factory->AddTarget( "rphi_z-uphi+ 2.*TMath::Pi()*(rphi_z-uphi < -TMath::Pi()) - 2.*TMath::Pi()*(rphi_z-uphi > TMath::Pi()) " ); TString lName = "../Jets/r11-dimu_nochs_v2.root"; TFile *lInput = TFile::Open(lName); TTree *lRegress = (TTree*)lInput ->Get("Flat"); Double_t lRWeight = 1.0; factory->AddRegressionTree( lRegress , lRWeight ); TCut lCut = "nbtag == 0"; //Cut to remove real MET //(rpt_z < 40 || (rpt_z > 40 && rpt_z+u1 < 40)) && nbtag == 0 "; ==> stronger cut to remove Real MET factory->PrepareTrainingAndTestTree( lCut, "nTrain_Regression=0:nTest_Regression=0:SplitMode=Block:NormMode=NumEvents:!V" ); // Boosted Decision Trees factory->BookMethod( TMVA::Types::kBDT, "RecoilPhiRegress_data_clean2_njet", "!H:!V:VarTransform=None:nEventsMin=200:NTrees=100:BoostType=Grad:Shrinkage=0.1:MaxDepth=100:NNodesMax=100000:UseYesNoLeaf=F:nCuts=2000");//MaxDepth=100 factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; //if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVAClassification(char* trainFile, char* tree, char* mycuts, char* mycutb, char* inputVars[], int size) { // this loads the library TMVA::Tools::Instance(); // Create a new root output file. TFile* outputFile = TFile::Open( "TMVA.root", "RECREATE" ); // Create the factory object. TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" ); // ---------- input variables for (int ivar = 0; ivar < size; ++ivar) { factory->AddVariable(inputVars[ivar], 'F'); } // read training and test data TFile *input = TFile::Open( trainFile); TTree *signal = (TTree*)input->Get(tree); TTree *background = (TTree*)input->Get(tree); // global event weights per tree Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // ====== register trees ==================================================== // you can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( TCut(mycuts), TCut(mycutb), "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // If no numbers of events are given, half of the events in the tree are used for training, and // the other half for testing: // ---- Use BDT: Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); // ---- Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // Save the output outputFile->Close(); std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; }
void process(const std::vector<std::string>& inputFiles, const std::string& name, const std::string& outputFile) { TChain* signal = loadChain(inputFiles, "signal"); TChain* background = loadChain(inputFiles, "background"); TFile* output = TFile::Open(outputFile.c_str(), "recreate"); TMVA::Factory* factory = new TMVA::Factory(name.c_str(), output, "V"); factory->AddSignalTree(signal, 1.); factory->AddBackgroundTree(background, 1.); //{ //factory->AddVariable("lightJet1p2_Pt"); //factory->AddVariable("leptonic_B_Pt"); //factory->AddVariable("leptonic_Top_Pt"); //factory->AddVariable("leptonic_Top_M"); //factory->AddVariable("hadronic_B_Pt"); //factory->AddVariable("hadronic_W_M"); //factory->AddVariable("hadronic_Top_Pt"); //factory->AddVariable("hadronic_Top_M"); //factory->AddVariable("delta_R_tops"); //factory->AddVariable("delta_R_lightjets"); //factory->AddVariable("leptonic_B_CSV"); //factory->AddVariable("hadronic_B_CSV"); //} // chi^2 style { factory->AddVariable("leptonic_Top_M"); factory->AddVariable("hadronic_W_M"); factory->AddVariable("hadronic_Top_M"); factory->AddVariable("ht_fraction"); } factory->SetWeightExpression("weight"); factory->PrepareTrainingAndTestTree("", "", "V:VerboseLevel=Info:nTrain_Signal=100000:nTrain_Background=100000:nTest_Signal=100000:nTest_Background=100000"); factory->BookMethod(TMVA::Types::kBDT, "BDT", "V:BoostType=AdaBoost:nCuts=20:VarTransform=D"); factory->BookMethod(TMVA::Types::kMLP, "NN", "V:VarTransform=D"); //factory->BookMethod(TMVA::Types::kPDERS, "PDERS", "V"); factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); output->Close(); delete output; delete signal; delete background; }
void test_train(TString signalName = "WW", TString bkgName = "DY") { TFile *outFile = new TFile("myAnalysisFile.root","RECREATE"); TMVA::Factory *factory = new TMVA::Factory(signalName, outFile,""); TString directory = "../rootFiles/SF/MediumIDTighterIP/"; //signalName = directory + signalName; //defining WW signal TFile *MySignalFile = new TFile("../rootFiles/SF/MediumIDTighterIP/WW.root","READ"); TTree* sigTree = (TTree*)MySignalFile->Get("nt"); factory->AddSignalTree(sigTree,1); //defining DY background TFile *MyBkgFile = new TFile("../rootFiles/SF/MediumIDTighterIP/DY.root","READ"); TTree* bkgTree = (TTree*)MyBkgFile->Get("nt"); factory->AddBackgroundTree(bkgTree,1); factory->SetWeightExpression("baseW"); //************************************ FACTORY factory->AddVariable("fullpmet"); factory->AddVariable("trkpmet"); factory->AddVariable("ratioMet"); factory->AddVariable("ptll"); factory->AddVariable("mth"); factory->AddVariable("jetpt1"); factory->AddVariable("ptWW"); factory->AddVariable("dphilljet"); factory->AddVariable("dphillmet"); factory->AddVariable("dphijet1met"); factory->AddVariable("nvtx"); factory->PrepareTrainingAndTestTree("",500,500,500,500); cout<<"I've prepared trees"<<endl; //factory->BookMethod(TMVA::Types::kFisher, "Fisher",""); factory->BookMethod(TMVA::Types::kBDT, "BDT",""); cout<<"I've booked method"<<endl; factory->TrainAllMethods(); factory->TestAllMethods(); cout<<"I've tested all methods"<<endl; factory->EvaluateAllMethods(); cout<<"I've evaluated all methods"<<endl; }
void trainBDT(void) { // Open input file and get tree TFile *infile = new TFile("l3bdt.root"); TTree *l3tree = (TTree*)infile->Get("l3tree"); if(l3tree == NULL){ cout << "Couldn't open \"l3bdt.root\"!" << endl; return; } // Open output root file (for TMVA) TFile *outfile = new TFile("l3BDT_out.root", "RECREATE"); TMVA::Factory *fac = new TMVA::Factory("L3",outfile,""); // Specify input tree that contains both signal and background TCut signalCut("is_good==1"); TCut backgroundCut("is_good==0"); fac->SetInputTrees(l3tree, signalCut, backgroundCut); // Add variables fac->AddVariable("Nstart_counter", 'I'); fac->AddVariable("Ntof", 'I'); fac->AddVariable("Nbcal_points", 'I'); fac->AddVariable("Nbcal_clusters", 'I'); fac->AddVariable("Ebcal_points", 'F'); fac->AddVariable("Ebcal_clusters", 'F'); fac->AddVariable("Nfcal_clusters", 'I'); fac->AddVariable("Efcal_clusters", 'F'); fac->AddVariable("Ntrack_candidates", 'I'); fac->AddVariable("Ptot_candidates", 'F'); TCut preSelectCut(""); fac->PrepareTrainingAndTestTree(preSelectCut,""); fac->BookMethod(TMVA::Types::kBDT, "BDT", ""); fac->TrainAllMethods(); fac->TestAllMethods(); fac->EvaluateAllMethods(); delete fac; outfile->Close(); delete outfile; }
void trainBJetIdMVA(TString SELECTION) { // the training is done using a dedicated tree format TFile *src = TFile::Open("bjetId_"+SELECTION+".root"); TTree *tr = (TTree*)src->Get("jets"); TFile *outf = new TFile("bjetId_"+SELECTION+"_MVA.root","RECREATE"); TCut signalCut = "abs(partonId) == 5"; TCut bkgCut = "abs(partonId) != 5"; TCut preselectionCut = "btagIdx<4 && etaIdx<4 && etaIdx>-1 && ptIdx<4"; int N = 100000; cout<<"NUMBER OF TRAINING EVENTS = "<<N<<endl; TMVA::Factory* factory = new TMVA::Factory("factory_"+SELECTION+"_",outf,"!V:!Silent:Color:DrawProgressBar:Transformations=I;G:AnalysisType=Classification" ); factory->SetInputTrees(tr,signalCut,bkgCut); factory->AddVariable("btagIdx",'I'); factory->AddVariable("etaIdx" ,'I'); factory->AddVariable("btag" ,'F'); factory->AddVariable("eta" ,'F'); char name[1000]; sprintf(name,"nTrain_Signal=%d:nTrain_Background=%d:nTest_Signal=%d:nTest_Background=%d",N,N,N,N); factory->PrepareTrainingAndTestTree(preselectionCut,name); // specify the training methods factory->BookMethod(TMVA::Types::kLikelihood,"Likelihood"); //factory->BookMethod(TMVA::Types::kBDT,"BDT_DEF"); //factory->BookMethod(TMVA::Types::kBDT,"BDT_ADA","NTrees=600:AdaBoostBeta=0.1:nCuts=35"); //factory->BookMethod(TMVA::Types::kBDT,"BDT_GRAD1","NTrees=600:nCuts=40:BoostType=Grad:Shrinkage=0.5"); factory->BookMethod(TMVA::Types::kBDT,"BDT_GRAD2","NTrees=600:nCuts=25:BoostType=Grad:Shrinkage=0.2"); factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); }
void TMVAtest(){ //gSystem->Load("../lib/slc5_amd64_gcc462/libTAMUWWMEPATNtuple.so"); gSystem->Load("libPhysics"); //gSystem->Load("EvtTreeForAlexx_h.so"); gSystem->Load("libTMVA.1"); gSystem->Load("AutoDict_vector_TLorentzVector__cxx.so"); TMVA::Tools::Instance(); TFile* outputFile = TFile::Open("TMVA1.root", "RECREATE"); TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification",outputFile,"V=true:Color:DrawProgressBar");// ":Transformations=I;D;P;G,D" ); TFile* signal = TFile::Open("/uscms_data/d2/aperloff/Spring12ME7TeV/MEResults/microNtuples_oldStructure/microWW_EPDv01.root"); TFile* bkg = TFile::Open("/uscms_data/d2/aperloff/Spring12ME7TeV/MEResults/microNtuples_oldStructure/microWJets_EPDv01.root"); TTree* stree = (TTree*)signal->Get("METree"); TTree* btree = (TTree*)bkg->Get("METree"); factory->AddSignalTree(stree,1.0); factory->AddBackgroundTree(btree,1.0); factory->SetSignalWeightExpression("1.0"); factory->SetBackgroundWeightExpression("1.0"); factory->AddVariable("tEventProb[0]"); factory->AddVariable("tEventProb[1]"); factory->AddVariable("tEventProb[2]"); //factory->AddVariable("tEventProb0 := tEventProb[0]",'F'); //factory->AddVariable("tEventProb1 := tEventProb[1]",'F'); //factory->AddVariable("tEventProb2 := tEventProb[2]",'F'); TCut test("Entry$>-2 && jLV[1].Pt()>30"); TCut mycuts (test); factory->PrepareTrainingAndTestTree(mycuts,mycuts,"nTrain_Signal=0:nTrain_Background=0:nTest_Signal=0:nTest_Background=0:SplitMode=Random:NormMode=None:V=true:VerboseLevel=DEBUG"); factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); outputFile->Close(); }
int main(int argc, char**argv){ if(argc != 2){ std::cerr << " >>>>> analysis.cpp::usage: " << argv[0] << " configFileName" << std::endl ; return 1; } parseConfigFile (argv[1]) ; // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVARegression.C\(\"myMethod1,myMethod2,myMethod3\"\) // //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); std::vector<std::string> UseMethodName; UseMethodName = gConfigParser -> readStringListOption("Input::UseMethodName"); std::cout << " >>>>> Input::UseMethodName size = " << UseMethodName.size() << std::endl; std::cout << " >>>>> >>>>> "; for (unsigned int iCat = 0; iCat < UseMethodName.size(); iCat++){ std::cout << " " << UseMethodName.at(iCat) << ", "; } std::cout << std::endl; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVARegression" << std::endl; std::map<std::string,int> Use; for(std::vector<std::string>::iterator it=UseMethodName.begin(); it!=UseMethodName.end(); ++it) Use[*it]=0; std::string UseMethodFlag; try{ UseMethodFlag = gConfigParser -> readStringOption("Input::UseMethodFlag"); std::cout<< UseMethodFlag<<std::endl; std::vector<TString> mlist = gTools().SplitString( UseMethodFlag, '/' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return -1; } Use[regMethod] = 1; std::cout << "regMethod= " << regMethod<< " 1 "<<std::endl; } } catch (char * exception){ std::cerr << " exception = Use All method " << std::endl; for(std::vector<std::string>::iterator it=UseMethodName.begin() ;it!=UseMethodName.end(); it++) Use[*it]=1; } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a new root output file std::string outputFileName = gConfigParser -> readStringOption("Output::outputFileName"); std::cout<<" Output Data File = "<<outputFileName<<std::endl; TFile* outputFile = TFile::Open( outputFileName.c_str(), "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // The first argument is the base of the name of all the // weightfiles in the directory weight/ // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string // Read training and test data (see TMVAClassification for reading ASCII files) // load the signal and background event samples from ROOT trees std::string inputFileList = gConfigParser -> readStringOption("Input::inputFileList"); std::string treeNameDATA = gConfigParser -> readStringOption("Input::treeNameDATA"); std::cout<<" Input Data List = "<<inputFileList<<std::endl; TChain* treeDATA = new TChain(treeNameDATA.c_str()); FillChain(*treeDATA,inputFileList.c_str()); TMVA::Factory *factory = new TMVA::Factory( "TMVARegression", outputFile, "!V:!Silent:Color:DrawProgressBar" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] std::string RegionOfTraining = gConfigParser -> readStringOption("Input::RegionOfTraining"); std::cout<<" RegionOfTraining = "<<RegionOfTraining<<std::endl; if(RegionOfTraining=="EB"){ factory->AddVariable( "ele1_scE/ele1_scERaw" , 'F'); factory->AddVariable( "ele1_eRegrInput_nPV" , 'F'); factory->AddVariable( "ele1_eRegrInput_r9" , 'F'); factory->AddVariable( "ele1_fbrem" , 'F'); factory->AddVariable( "ele1_eta" , 'F'); factory->AddVariable( "ele1_DphiIn" , 'F'); factory->AddVariable( "ele1_DetaIn" , 'F'); factory->AddVariable( "ele1_sigmaIetaIeta" , 'F'); factory->AddVariable( "ele1_eRegrInput_etaW" , 'F'); factory->AddVariable( "ele1_eRegrInput_phiW" , 'F'); factory->AddVariable( "ele1_eRegrInput_bCE_Over_sCE", 'F'); factory->AddVariable( "ele1_eRegrInput_sigietaieta_bC1" , 'F'); factory->AddVariable( "ele1_eRegrInput_sigiphiiphi_bC1" , 'F'); factory->AddVariable( "ele1_eRegrInput_sigietaiphi_bC1" , 'F'); factory->AddVariable( "ele1_eRegrInput_e3x3_Over_bCE" , 'F'); factory->AddVariable( "ele1_eRegrInput_Deta_bC_sC" , 'F'); factory->AddVariable( "ele1_eRegrInput_Dphi_bC_sC" , 'F'); factory->AddVariable( "ele1_eRegrInput_bEMax_Over_bCE" , 'F'); factory->AddVariable( "ele1_dxy_PV" , 'F'); factory->AddVariable( "ele1_dz_PV" , 'F'); factory->AddVariable( "ele1_sigmaP/ele1_tkP" , 'F'); factory->AddVariable( "ele1_eRegrInput_bCELow_Over_sCE", 'F'); factory->AddVariable( "ele1_eRegrInput_e3x3_Over_bCELow" , 'F'); factory->AddVariable( "ele1_eRegrInput_Deta_bCLow_sC" , 'F'); factory->AddVariable( "ele1_eRegrInput_Dphi_bCLow_sC" , 'F'); factory->AddVariable( "ele1_eRegrInput_seedbC_etacry" , 'F'); factory->AddVariable( "ele1_eRegrInput_seedbC_phicry" , 'F'); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables // factory->AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ); // factory->AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ); // Add the variable carrying the regression target // factory->AddTarget("ele1_scE/ele1_E_true" ); factory->AddTarget("ele1_tkP/ele1_E_true" ); // It is also possible to declare additional targets for multi-dimensional regression, ie: // -- factory->AddTarget( "fvalue2" ); // BUT: this is currently ONLY implemented for MLP // global event weights per tree (see below for setting event-wise weights) Double_t regWeight = 1.0; // You can add an arbitrary number of regression trees factory->AddRegressionTree( treeDATA, regWeight ); // This would set individual event weights (the variables defined in the // expression need to exist in the original TTree) // factory->SetWeightExpression( "var1", "Regression" ); // TCut mycut = "ele1_isEB==1 && ele1_sigmaP/ele1_tkP<0.4 && ele1_fbrem>0 && abs(ele1_dxy_PV)<0.05 && abs(ele1_dz_PV)<0.05 && ele1_eRegrInput_etaW > 0.006 && ele1_eRegrInput_phiW<0.08 && ele1_eRegrInput_sigietaieta_bC1>0.006 && ele1_eRegrInput_sigiphiiphi_bC1>0.008 && abs(ele1_eRegrInput_Deta_bC_sC)<0.004 && abs(ele1_eRegrInput_Dphi_bC_sC)<0.04 && abs(ele1_eRegrInput_seedbC_etacry)<0.6 && abs(ele1_eRegrInput_seedbC_phicry)<0.6 && ele1_scE/ele1_scERaw<1.2 && (ele1_scE/ele1_E_true)<1.4 && (ele1_scE/ele1_E_true)>0.3"; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycut = "ele1_isEB==1 && ele1_sigmaP/ele1_tkP<0.4 && ele1_fbrem>0 && abs(ele1_dxy_PV)<0.05 && abs(ele1_dz_PV)<0.05 && ele1_eRegrInput_etaW > 0.006 && ele1_eRegrInput_phiW<0.08 && ele1_eRegrInput_sigietaieta_bC1>0.006 && ele1_eRegrInput_sigiphiiphi_bC1>0.008 && abs(ele1_eRegrInput_Deta_bC_sC)<0.004 && abs(ele1_eRegrInput_Dphi_bC_sC)<0.04 && abs(ele1_eRegrInput_seedbC_etacry)<0.6 && abs(ele1_eRegrInput_seedbC_phicry)<0.6 && ele1_scE/ele1_scERaw<1.2 && ele1_tkP/ele1_E_true<1.8 && ele1_tkP/ele1_E_true>0.2"; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1"; // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycut, "nTrain_Regression=2500000:nTest_Regression=2500000:SplitMode=Random:NormMode=NumEvents:!V" ); TString Name = Form("weight_%s_%s_P_W",RegionOfTraining.c_str(),UseMethodFlag.c_str()); (TMVA::gConfig().GetIONames()).fWeightFileDir = Name; } if(RegionOfTraining=="EE"){ factory->AddVariable( "ele1_scE/ele1_scERaw" , 'F'); factory->AddVariable( "ele1_eRegrInput_nPV",'F'); factory->AddVariable( "ele1_eRegrInput_r9",'F'); factory->AddVariable( "ele1_fbrem",'F'); factory->AddVariable( "ele1_eta",'F'); factory->AddVariable( "ele1_DphiIn",'F'); factory->AddVariable( "ele1_DetaIn",'F'); factory->AddVariable( "ele1_sigmaIetaIeta",'F'); factory->AddVariable( "ele1_eRegrInput_etaW",'F'); factory->AddVariable( "ele1_eRegrInput_phiW",'F'); factory->AddVariable( "ele1_dxy_PV",'F'); factory->AddVariable( "ele1_dz_PV",'F'); factory->AddVariable( "ele1_sigmaP/ele1_tkP",'F'); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables // factory->AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ); // factory->AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ); // Add the variable carrying the regression target // factory->AddTarget("ele1_scE/ele1_E_true" ); factory->AddTarget("ele1_tkP/ele1_E_true" ); // It is also possible to declare additional targets for multi-dimensional regression, ie: // -- factory->AddTarget( "fvalue2" ); // BUT: this is currently ONLY implemented for MLP // global event weights per tree (see below for setting event-wise weights) Double_t regWeight = 1.0; // You can add an arbitrary number of regression trees factory->AddRegressionTree( treeDATA, regWeight ); // This would set individual event weights (the variables defined in the // expression need to exist in the original TTree) // factory->SetWeightExpression( "var1", "Regression" ); // TCut mycut = "ele1_isEB==0 && ele1_sigmaP/ele1_tkP<0.4 && ele1_fbrem>0 && abs(ele1_dxy_PV)<0.05 && abs(ele1_dz_PV)<0.05 &&(ele1_scE/ele1_E_true)<1.4 && (ele1_scE/ele1_E_true)>0.3"; TCut mycut = "ele1_isEB==0 && ele1_sigmaP/ele1_tkP<0.4 && ele1_fbrem>0 && abs(ele1_dxy_PV)<0.05 && abs(ele1_dz_PV)<0.05 && (ele1_tkP/ele1_E_true)<1.6"; // for example: TCut mycut = "abs(var1)<0.5 && // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycut, "nTrain_Regression=3000000:nTest_Regression=3000000:SplitMode=Random:NormMode=NumEvents:!V" ); TString Name = Form("weight_%s_%s_P_W",RegionOfTraining.c_str(),UseMethodFlag.c_str()); (TMVA::gConfig().GetIONames()).fWeightFileDir = Name; } // Apply additional cuts on the signal and background samples (can be different) // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // ---- Book MVA methods // // please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // PDE - RS method if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:Normthree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" ); // And the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.3:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // Linear discriminant if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD","!H:!V:VarTransform=G,U,D" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); // Neural network (MLP) if (Use["MLP"]) // factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" ); // factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=200:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" ); // factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=400:HiddenLayers=N+10:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15" ); // factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=N:NeuronType=tanh:NCycles=200:HiddenLayers=N+10:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15" ); // factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=G,N:NeuronType=tanh:NCycles=200:HiddenLayers=N+5:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15" ); factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:NCycles=250:HiddenLayers=N+5:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:TestRate=10"); // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=N" ); // factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=N,G" ); // Boosted Decision Trees if (Use["BDT"]) // factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=100:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" ); // factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=200:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:PruneMethod=CostComplexity:PruneStrength=30" ); // factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=300:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:PruneMethod=CostComplexity:PruneStrength=30" ); // factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=100:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:PruneMethod=CostComplexity:PruneStrength=30" ); factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=100:nEventsMin=20:BoostType=AdaBoostR2:SeparationType=RegressionVariance:PruneMethod=CostComplexity:PruneStrength=30"); if (Use["BDTG"]) // factory->BookMethod( TMVA::Types::kBDT, "BDTG","!H:!V:NTrees=2000::BoostType=Grad:Shrinkage=0.1:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:MaxDepth=3:NNodesMax=15" ); factory->BookMethod( TMVA::Types::kBDT, "BDTG","!H:!V:NTrees=1000::BoostType=Grad:Shrinkage=0.1:UseBaggedGrad:GradBaggingFraction=0.5:MaxDepth=5:NNodesMax=25:PruneMethod=CostComplexity:PruneStrength=30"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVARegression is done!" << std::endl; delete factory; // Launch the GUI for the root macros // if (!gROOT->IsBatch()) TMVARegGui( outputFileName.c_str() ); return 0; }
void BJetRegression( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVARegression.C\(\"myMethod1,myMethod2,myMethod3\"\) // //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDEFoam"] = 1; Use["KNN"] = 1; // // --- Linear Discriminant Analysis Use["LD"] = 1; // // --- Function Discriminant analysis Use["FDA_GA"] = 1; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; // // --- Neural Network Use["MLP"] = 1; // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 0; Use["BDTG"] = 1; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVARegression" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a new root output file TString outfileName( "TMVAReg_CSVJ1.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVARegression", outputFile, "!V:!Silent:Color:DrawProgressBar" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] // std::string inputVariables[] = {"CSVJ1PtUncorr", "CSVJ1Pt", "CSVJ1Et", "CSVJ1Mt", "CSVJ1ptLeadTrk", // "CSVJ1Vtx3dL", "CSVJ1Vtx3deL", "CSVJ1vtxMass", "CSVJ1VtxPt", // "CSVJ1SoftLeptPtRel", "CSVJ1SoftLeptPt", // "CSVJ1SoftLeptdR" , "CSVJ1Ntot"}; std::string inputVariables[] = {"jetPtUncorr", "jetPt", "jetEt", "jetMt", "jetptLeadTrk", "jetVtx3dL", "jetVtx3deL", "jetvtxMass", "jetVtxPt", "jetSoftLeptPtRel", "jetSoftLeptPt", "jetSoftLeptdR" , "jetNtot", "jetJECUnc"}; // for(int ivar = 0; ivar < 14; ivar++){ factory->AddVariable( inputVariables[ivar], inputVariables[ivar], "units", 'F' ); } // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables // factory->AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ); // factory->AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ); // Add the variable carrying the regression target // factory->AddTarget( "matchGenJet1Pt" ); factory->AddTarget( "matchGenJetPt" ); // It is also possible to declare additional targets for multi-dimensional regression, ie: // -- factory->AddTarget( "fvalue2" ); // BUT: this is currently ONLY implemented for MLP // Read training and test data (see TMVAClassification for reading ASCII files) // load the signal and background event samples from ROOT trees TFile *input(0); TString fname = "/scratch/zmao/regression/allSample_both_isobTag.root"; if (!gSystem->AccessPathName( fname )) input = TFile::Open( fname ); // check if file in local directory exists else input = TFile::Open( "http://root.cern.ch/files/tmva_reg_example.root" ); // if not: download from ROOT server if (!input) { std::cout << "ERROR: could not open data file" << std::endl; exit(1); } std::cout << "--- TMVARegression : Using input file: " << input->GetName() << std::endl; // --- Register the regression tree TTree *regTree = (TTree*)input->Get("eventTree"); // global event weights per tree (see below for setting event-wise weights) Double_t regWeight = 1.0; // You can add an arbitrary number of regression trees factory->AddRegressionTree( regTree, regWeight ); // This would set individual event weights (the variables defined in the // expression need to exist in the original TTree) // factory->SetWeightExpression( "triggerEff", "Regression" ); // Apply additional cuts on the signal and background samples (can be different) TCut mycut = ""; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1"; // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycut, "nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents:!V" ); // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // ---- Book MVA methods // // please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // PDE - RS method if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" ); // And the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // Linear discriminant if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "!H:!V:VarTransform=None" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); // Neural network (MLP) if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" ); // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDT"]) factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:MinNodeSize=1.0%:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" ); if (Use["BDTG"]) factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=2000::BoostType=Grad:Shrinkage=0.1:UseBaggedBoost:BaggedSampleFraction=0.7:nCuts=200:MaxDepth=3:NNodesMax=15" ); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVARegression is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVARegGui( outfileName ); }
void TMVAClassification( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // to get access to the GUI and all tmva macros //TString thisdir = gSystem->DirName(gInterpreter->GetCurrentMacroName()); //gROOT->SetMacroPath(thisdir + ":" + gROOT->GetMacroPath()); //gROOT->ProcessLine(".L TMVAGui.C"); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 0; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 1; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 0; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] // factory->AddVariable( "myvar1 := var1+var2", 'F' ); // factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ); // factory->AddVariable( "var3", "Variable 3", "units", 'F' ); // factory->AddVariable( "var4", "Variable 4", "units", 'F' ); factory->AddVariable( "pho_ecalClusterIsoR4", "pho_ecalClusterIsoR4", "units", 'F' ); factory->AddVariable( "pho_hcalRechitIsoR4", "pho_hcalRechitIsoR4", "units", 'F' ); factory->AddVariable( "pho_trackIsoR4PtCut20", "pho_trackIsoR4PtCut20", "units", 'F' ); factory->AddVariable( "phoHoverE", "phoHoverE", "units", 'F' ); factory->AddVariable( "phoSigmaIEtaIEta_2012", "phoSigmaIEtaIEta_2012", "units", 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables // factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); // factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) // TString fname = "./tmva_class_example.root"; TString fname = "/net/hisrv0001/home/juliusbl/alex/cut/cutTree.root"; if (gSystem->AccessPathName( fname )) // file does not exist in local directory gSystem->Exec("curl -O http://root.cern.ch/files/tmva_class_example.root"); TFile *input = TFile::Open( fname ); std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl; // --- Register the training and test trees TTree *signal = (TTree*)input->Get("cutT"); TTree *background = (TTree*)input->Get("cutT"); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); // To give different trees for training and testing, do as follows: // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // // // --- begin ---------------------------------------------------------- // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables // Float_t treevars[4], weight; // // // Signal // for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<signal->GetEntries(); i++) { // signal->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight ); // else factory->AddSignalTestEvent ( vars, signalWeight ); // } // // // Background (has event weights) // background->SetBranchAddress( "weight", &weight ); // for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<background->GetEntries(); i++) { // background->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight ); // else factory->AddBackgroundTestEvent ( vars, backgroundWeight*weight ); // } // --- end ------------------------------------------------------------ // // --- end of tree registration // Set individual event weights (the variables must exist in the original TTree) // for signal : factory->SetSignalWeightExpression ("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); // factory->SetBackgroundWeightExpression( "weight" ); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = "subid==0"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = "subid==1"; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // ---- STILL EXPERIMENTAL and only implemented for BDT's ! // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","FitGA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros //if (!gROOT->IsBatch()) // gROOT->ProcessLine(TString::Format("TMVAGui(\"%s\")", outfileName.Data())); // efficiencies( TString fin = "TMVA.root", Int_t type = 2, Bool_t useTMVAStyle = kTRUE ); }
void TMVAClassificationCategory() { //--------------------------------------------------------------- std::cout << std::endl << "==> Start TMVAClassificationCategory" << std::endl; bool batchMode(false); // Create a new root output file. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string std::string factoryOptions( "!V:!Silent:Transformations=I;D;P;G,D" ); if (batchMode) factoryOptions += ":!Color:!DrawProgressBar"; TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationCategory", outputFile, factoryOptions ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory->AddVariable( "var1", 'F' ); factory->AddVariable( "var2", 'F' ); factory->AddVariable( "var3", 'F' ); factory->AddVariable( "var4", 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables factory->AddSpectator( "eta" ); // load the signal and background event samples from ROOT trees TFile *input(0); TString fname( "" ); if (UseOffsetMethod) fname = "../execs/data/toy_sigbkg_categ_offset.root"; else fname = "../execs/data/toy_sigbkg_categ_varoff.root"; if (!gSystem->AccessPathName( fname )) { // first we try to find tmva_example.root in the local directory std::cout << "--- TMVAClassificationCategory: Accessing " << fname << std::endl; input = TFile::Open( fname ); } if (!input) { std::cout << "ERROR: could not open data file: " << fname << std::endl; exit(1); } TTree *signal = (TTree*)input->Get("TreeS"); TTree *background = (TTree*)input->Get("TreeB"); /// global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; /// you can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // Fisher discriminant factory->BookMethod( TMVA::Types::kFisher, "Fisher", "!H:!V:Fisher" ); // Likelihood factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Categorised classifier TMVA::MethodCategory* mcat = 0; // the variable sets TString theCat1Vars = "var1:var2:var3:var4"; TString theCat2Vars = (UseOffsetMethod ? "var1:var2:var3:var4" : "var1:var2:var3"); // the Fisher TMVA::MethodBase* fiCat = factory->BookMethod( TMVA::Types::kCategory, "FisherCat","" ); mcat = dynamic_cast<TMVA::MethodCategory*>(fiCat); mcat->AddMethod("abs(eta)<=1.3",theCat1Vars, TMVA::Types::kFisher,"Category_Fisher_1","!H:!V:Fisher"); mcat->AddMethod("abs(eta)>1.3", theCat2Vars, TMVA::Types::kFisher,"Category_Fisher_2","!H:!V:Fisher"); // the Likelihood TMVA::MethodBase* liCat = factory->BookMethod( TMVA::Types::kCategory, "LikelihoodCat","" ); mcat = dynamic_cast<TMVA::MethodCategory*>(liCat); mcat->AddMethod("abs(eta)<=1.3",theCat1Vars, TMVA::Types::kLikelihood,"Category_Likelihood_1","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50"); mcat->AddMethod("abs(eta)>1.3", theCat2Vars, TMVA::Types::kLikelihood,"Category_Likelihood_2","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50"); // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassificationCategory is done!" << std::endl; // Clean up delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
int main(int argc, char** argv) { if(argc != 2) { std::cerr << ">>>>> analysis.cpp::usage: " << argv[0] << " configFileName MVAconfigFileName" << std::endl ; return 1; } // Parse the config file parseConfigFile (argv[1]) ; std::string treeName = gConfigParser -> readStringOption("Input::treeName"); std::string fileSamples = gConfigParser -> readStringOption("Input::fileSamples"); std::string inputDirectory = gConfigParser -> readStringOption("Input::inputDirectory"); std::string inputBeginningFile = "out_NtupleProducer_"; try { inputBeginningFile = gConfigParser -> readStringOption("Input::inputBeginningFile"); } catch (char const* exceptionString) { std::cerr << " exception = " << exceptionString << std::endl; } std::cout << ">>>>> Input::inputBeginningFile " << inputBeginningFile << std::endl; double LUMI = gConfigParser -> readDoubleOption("Options::Lumi"); std::vector<std::string> SignalName; SignalName = gConfigParser -> readStringListOption("Options::SignalName"); for (int iSignalSample=0; iSignalSample<SignalName.size(); iSignalSample++) { std::cout << " Signal[" << iSignalSample << "] = " << SignalName.at(iSignalSample) << std::endl; } std::string nameWeight = "1"; try { nameWeight = gConfigParser -> readStringOption("Options::nameWeight"); } catch (char const* exceptionString) { std::cerr << " exception = " << exceptionString << std::endl; } std::cout << ">>>>> Input::nameWeight " << nameWeight << std::endl; TTree *treeJetLepVect[200]; char *nameSample[1000]; char *nameHumanReadable[1000]; char* xsectionName[1000]; char nameFileIn[1000]; sprintf(nameFileIn,"%s",fileSamples.c_str()); int numberOfSamples = ReadFile(nameFileIn, nameSample, nameHumanReadable, xsectionName); double Normalization[1000]; double xsection[1000]; for (int iSample=0; iSample<numberOfSamples; iSample++) { xsection[iSample] = atof(xsectionName[iSample]); } for (int iSample=0; iSample<numberOfSamples; iSample++) { char nameFile[20000]; sprintf(nameFile,"%s/%s%s.root",inputDirectory.c_str(),inputBeginningFile.c_str(),nameSample[iSample]); TFile* f = new TFile(nameFile, "READ"); treeJetLepVect[iSample] = (TTree*) f->Get(treeName.c_str()); char nameTreeJetLep[100]; sprintf(nameTreeJetLep,"treeJetLep_%d",iSample); treeJetLepVect[iSample]->SetName(nameTreeJetLep); double XSection; XSection = xsection[iSample]; Normalization[iSample] = XSection * LUMI / 1000.; } //==== cut std::string CutFile = gConfigParser -> readStringOption("Selections::CutFile"); std::vector<std::string> vCut; std::cout << " nCuts = " << ReadFileCut(CutFile, vCut) << std::endl; std::string Cut; if (vCut.size() != 0) { Cut = vCut.at(0); } else { Cut = "1"; } //==== HiggsMass std::string HiggsMass = gConfigParser -> readStringOption("Options::HiggsMass"); //==== list of methods std::vector<std::string> vectorMyMethodList = gConfigParser -> readStringListOption("Options::MVAmethods"); TString myMethodList; for (int iMVA = 0; iMVA < vectorMyMethodList.size(); iMVA++) { if (iMVA == 0) myMethodList = Form ("%s",vectorMyMethodList.at(iMVA).c_str()); else myMethodList = Form ("%s,%s",myMethodList.Data(),vectorMyMethodList.at(iMVA).c_str()); } //==== output TString outfileName = gConfigParser -> readStringOption("Output::outFileName"); // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string,int> Use; Use["MLP"] = 1; Use["BDTG"] = 1; Use["FDA_GA"] = 0; Use["PDEFoam"] = 0; std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return 0; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a new root output file TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // TMVA::Factory *factory = new TMVA::Factory( "TMVAMulticlass", outputFile, "AnalysisType=multiclass:!V:!Silent:!V:Transformations=I;D" ); TMVA::Factory *factory = new TMVA::Factory( "TMVAMulticlass", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=multiclass" ); factory->AddVariable( "jetpt1" , 'F'); factory->AddVariable( "jetpt2" , 'F'); factory->AddVariable( "mjj" , 'F'); factory->AddVariable( "detajj" , 'F'); factory->AddVariable( "dphilljetjet" , 'F'); factory->AddVariable( "pt1" , 'F'); factory->AddVariable( "pt2" , 'F'); factory->AddVariable( "mll" , 'F'); factory->AddVariable( "dphill" , 'F'); factory->AddVariable( "mth" , 'F'); factory->AddVariable( "dphillmet" , 'F'); factory->AddVariable( "mpmet" , 'F'); factory->AddSpectator( "channel" , 'F'); for (int iSample=0; iSample<numberOfSamples; iSample++) { int numEnt = treeJetLepVect[iSample]->GetEntries(Cut.c_str()); std::cout << " Sample = " << nameSample[iSample] << " ~ " << nameHumanReadable[iSample] << " --> " << numEnt << std::endl; if (numEnt != 0) { if (iSample == 0) factory->AddTree( treeJetLepVect[iSample], "Signal", Normalization[iSample] ); else if (iSample == 1) factory->AddTree( treeJetLepVect[iSample], "Background", Normalization[iSample] ); else factory->AddTree( treeJetLepVect[iSample], TString(nameHumanReadable[iSample]), Normalization[iSample] ); // factory->AddTree( treeJetLepVect[iSample], TString(nameHumanReadable[iSample]), Normalization[iSample] ); // factory->AddTree( treeJetLepVect[iSample], TString(nameHumanReadable[iSample]), Normalization[iSample] , nameWeight.c_str()); // factory->AddTree( treeJetLepVect[iSample], TString(nameHumanReadable[iSample])); } } // for (int iSample=0; iSample<numberOfSamples; iSample++){ // int numEnt = treeJetLepVect[iSample]->GetEntries(Cut.c_str()); // std::cout << " Sample = " << nameSample[iSample] << " ~ " << nameHumanReadable[iSample] << " --> " << numEnt << std::endl; // if (numEnt != 0) { // bool isSig = false; // for (std::vector<std::string>::const_iterator itSig = SignalName.begin(); itSig != SignalName.end(); itSig++){ // if (nameHumanReadable[iSample] == *itSig) isSig = true; // } // if (isSig) { // factory->AddTree( treeJetLepVect[iSample], TString("Signal"), Normalization[iSample] ); //---> ci deve essere uno chiamato Signal! // } // else { // factory->AddTree( treeJetLepVect[iSample], TString(nameHumanReadable[iSample]), Normalization[iSample] ); // } // } // } // // for (int iSample=0; iSample<numberOfSamples; iSample++){ // int numEnt = treeJetLepVect[iSample]->GetEntries(Cut.c_str()); // std::cout << " Sample = " << nameSample[iSample] << " ~ " << nameHumanReadable[iSample] << " --> " << numEnt << std::endl; // if (numEnt != 0) { // bool isSig = false; // for (std::vector<std::string>::const_iterator itSig = SignalName.begin(); itSig != SignalName.end(); itSig++){ // if (nameHumanReadable[iSample] == *itSig) isSig = true; // } // if (isSig) { // // factory->AddTree( treeJetLepVect[iSample], TString("Signal"), Normalization[iSample] ); //---> ci deve essere uno chiamato Signal! // } // else { // factory->AddTree( treeJetLepVect[iSample], TString(nameHumanReadable[iSample]), Normalization[iSample] ); // } // } // } std::cerr << " AAAAAAAAAAAAAAAAAAAAAAAAAAAAA " << std::endl; TCut mycuts = Cut.c_str(); // factory->SetWeightExpression( nameWeight.c_str() ); // factory->SetBackgroundWeightExpression( nameWeight.c_str() ); // factory->SetSignalWeightExpression ( nameWeight.c_str() ); std::cerr << " BBBBBBBBBBBBBBBBBBBBBBBBBBBBB " << std::endl; factory->PrepareTrainingAndTestTree( mycuts ,"SplitMode=Random:NormMode=None:!V"); // factory->PrepareTrainingAndTestTree( "" ,"SplitMode=Random:NormMode=None:!V"); std::cerr << " CCCCCCCCCCCCCCCCCCCCCCCCCCCCC " << std::endl; // gradient boosted decision trees // if (Use["BDTG"]) factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.50:nCuts=20:NNodesMax=8"); if (Use["BDTG"]) factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=600:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.50:nCuts=20:NNodesMax=8"); // neural network if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:NCycles=1000:HiddenLayers=N+5,5:TestRate=5:EstimatorType=MSE"); // functional discriminant with GA minimizer if (Use["FDA_GA"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); // PDE-Foam approach if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); //==== Optimize parameters in MVA methods // factory->OptimizeAllMethods(); // factory->OptimizeAllMethods("ROCIntegral","Scan"); //==== Train MVAs using the set of training events ==== factory->TrainAllMethods(); //==== Evaluate all MVAs using the set of test events ==== factory->TestAllMethods(); //==== Evaluate and compare performance of all configured MVAs ==== factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAnalysis is done!" << std::endl; delete factory; //==== change position of weights file std::string toDo; toDo = "rm -r Weights-MVA-MultiClass/weights_" + HiggsMass + "_testVariables"; std::cerr << "toDo = " << toDo << std::endl; system (toDo.c_str()); toDo = "mv weights Weights-MVA-MultiClass/weights_" + HiggsMass + "_testVariables"; std::cerr << "toDo = " << toDo << std::endl; system (toDo.c_str()); // Launch the GUI for the root macros // if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVAClassification( TString eventsToTrain = "0", const TString & region = "barrel", const TString index = "", TString myMethodList = "BDT") { std::cout << "running classification for " << region << " for " << myMethodList << std::endl; if( region != "barrel" && region != "endcaps" ) { std::cout << "Error, region can only be barrel or endcaps. Selected region was: " << region << std::endl; exit(1); } if( index != "" && index != "0" && index != "1" && index != "2" ) { std::cout << "Error, index can only be \"\", \"0\", \"1\" or \"2\". Selected index was: " << index << std::endl; exit(1); } // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 0; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 0; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 1; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; std::exit(2); } Use[regMethod] = 1; } } // Input and output file names TString fnameTrainS = "BsMC12_barrel_preselection"; TString fnameTrainB = "Barrel_preselection"; TString fnameTestS = "BsMC12_barrel_preselection"; TString fnameTestB = "Barrel_preselection"; TString outputFileName = "TMVA_barrel"; TString weightDirName = "barrel"; if( region == "endcaps" ) { fnameTrainS = "BsMC12_endcaps_preselection"; fnameTrainB = "Endcaps_preselection"; fnameTestS = "BsMC12_endcaps_preselection"; fnameTestB = "Endcaps_preselection"; outputFileName = "TMVA_endcaps"; weightDirName = "endcaps"; } if( index != "" ) { fnameTrainS += "_"+index; fnameTrainB += "_"+index; TString indexTest = ""; // The test index is the train index +1 (2+1 -> 0) if( index == "0" ) indexTest = "1"; else if( index == "1" ) indexTest = "2"; else if( index == "2" ) indexTest = "0"; fnameTestS += "_"+indexTest; fnameTestB += "_"+indexTest; outputFileName += "_"+index; weightDirName += index; } fnameTrainS = rootDir + fnameTrainS + ".root"; fnameTrainB = rootDir + fnameTrainB + ".root"; fnameTestS = rootDir + fnameTestS + ".root"; fnameTestB = rootDir + fnameTestB + ".root"; outputFileName = rootDir + outputFileName + ".root"; weightDirName = weightsDir + weightDirName + "Weights"; // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName(outputFileName); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // (TMVA::gConfig().GetIONames()).fWeightFileDir = outputFileName; (TMVA::gConfig().GetIONames()).fWeightFileDir = weightDirName; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] bool useNewMuonID = false; factory->AddVariable( "fls3d", "fls3d", "", 'F' ); factory->AddVariable( "alpha", "alpha", "", 'F' ); factory->AddVariable( "pvips", "pvips", "", 'F' ); factory->AddVariable( "iso", "iso", "", 'F' ); factory->AddVariable( "m1iso", "m1iso", "", 'F' ); factory->AddVariable( "m2iso", "m2iso", "", 'F' ); factory->AddVariable( "chi2dof", "chi2/dof", "", 'F' ); if( region == "barrel" ) { factory->AddVariable( "eta", "eta", "", 'F' ); factory->AddVariable( "maxdoca", "maxdoca", "cm", 'F' ); } else { factory->AddVariable( "pt", "pt", "GeV/c", 'F' ); factory->AddVariable( "pvip", "pvip", "cm", 'F' ); } factory->AddVariable( "docatrk", "docatrk", "cm", 'F' ); // factory->AddVariable( "pt", "pt", "GeV/c", 'F' ); // factory->AddVariable( "closetrk", "closetrk", "", 'I' ); // factory->AddVariable( "y", "y", "", 'F' ); // factory->AddVariable( "l3d", "l3d", "cm", 'F' ); // factory->AddVariable( "cosAlphaXY", "cosAlphaXY", "", 'F' ); // factory->AddVariable( "mu1_dxy", "mu1_dxy", "cm", 'F' ); // factory->AddVariable( "mu2_dxy", "mu2_dxy", "cm", 'F' ); if( useNewMuonID ) { // New Muon-id factory->AddVariable( "mu1_MVAMuonID", "mu1_MVAMuonID", "", 'F'); factory->AddVariable( "mu2_MVAMuonID", "mu2_MVAMuonID", "", 'F'); } // Extra variables // factory->AddVariable( "mu1_pt", "mu1_pt", "GeV/c", 'F' ); // factory->AddVariable( "mu2_pt", "mu2_pt", "GeV/c", 'F' ); // factory->AddVariable( "pvw8", "pvw8", "", 'F' ); // factory->AddVariable( "cosAlpha3D", "cosAlpha3D", "", 'F' ); // factory->AddVariable( "countTksOfPV", "countTksOfPV", "", 'I' ); // factory->AddVariable( "ctauErrPV", "ctauErrPV", "", 'F' ); // factory->AddVariable( "ctauPV", "ctauPV", "", 'F' ); // factory->AddVariable( "dcaxy", "dcaxy", "", 'F' ); // factory->AddVariable( "mu1_glbTrackProb", "mu1_glbTrackProb", "", 'F' ); // factory->AddVariable( "mu1_nChi2", "mu1_nChi2", "", 'F' ); // factory->AddVariable( "mu1_nMuSegs", "mu1_nMuSegs", "", 'F' ); // factory->AddVariable( "mu1_nMuSegsCln", "mu1_nMuSegsCln", "", 'F' ); // factory->AddVariable( "mu1_nPixHits", "mu1_nPixHits", "", 'F' ); // factory->AddVariable( "mu1_nTrHits", "mu1_nTrHits", "", 'F' ); // factory->AddVariable( "mu1_segComp", "mu1_segComp", "", 'F' ); // factory->AddVariable( "mu1_trkEHitsOut", "mu1_trkEHitsOut", "", 'F' ); // factory->AddVariable( "mu1_trkVHits", "mu1_trkVHits", "", 'F' ); // factory->AddVariable( "mu1_validFrac", "mu1_validFrac", "", 'F' ); // factory->AddVariable( "mu1_chi2LocMom", "mu1_chi2LocMom", "", 'F' ); // factory->AddVariable( "mu1_chi2LocPos", "mu1_chi2LocPos", "", 'F' ); // factory->AddVariable( "mu2_glbTrackProb", "mu2_glbTrackProb", "", 'F' ); // factory->AddVariable( "mu2_nChi2", "mu2_nChi2", "", 'F' ); // factory->AddVariable( "mu2_nMuSegs", "mu2_nMuSegs", "", 'F' ); // factory->AddVariable( "mu2_nMuSegsCln", "mu2_nMuSegsCln", "", 'F' ); // factory->AddVariable( "mu2_nPixHits", "mu2_nPixHits", "", 'F' ); // factory->AddVariable( "mu2_nTrHits", "mu2_nTrHits", "", 'F' ); // factory->AddVariable( "mu2_segComp", "mu2_segComp", "", 'F' ); // factory->AddVariable( "mu2_trkEHitsOut", "mu2_trkEHitsOut", "", 'F' ); // factory->AddVariable( "mu2_trkVHits", "mu2_trkVHits", "", 'F' ); // factory->AddVariable( "mu2_validFrac", "mu2_validFrac", "", 'F' ); // factory->AddVariable( "mu2_chi2LocMom", "mu2_chi2LocMom", "", 'F' ); // factory->AddVariable( "mu2_chi2LocPos", "mu2_chi2LocPos", "", 'F' ); // factory->AddVariable( "l3d := ctauPV*pt/mass", "l3d", "cm", 'F' ); // factory->AddVariable( "l3dSig := ctauPV/ctauErrPV", "l3dSig", "", 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables // factory->AddSpectator( "spec1 := mass*2", "Spectator 1", "units", 'F' ); // factory->AddSpectator( "spec2 := mass*3", "Spectator 2", "units", 'F' ); factory->AddSpectator( "mass", "mass", "GeV/c^{2}", 'F' ); // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) if (gSystem->AccessPathName( fnameTrainS )) { // file does not exist in local directory std::cout << "Did not access " << fnameTrainS << " exiting." << std::endl; std::exit(4); } //gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root"); TFile *inputTrainS = TFile::Open( fnameTrainS ); TFile *inputTrainB = TFile::Open( fnameTrainB ); TFile *inputTestS = TFile::Open( fnameTestS ); TFile *inputTestB = TFile::Open( fnameTestB ); // --- Register the training and test trees TTree *signalTrainTree = (TTree*)inputTrainS->Get("probe_tree"); TTree *backgroundTrainTree = (TTree*)inputTrainB->Get("probe_tree"); TTree *signalTestTree = (TTree*)inputTestS->Get("probe_tree"); TTree *backgroundTestTree = (TTree*)inputTestB->Get("probe_tree"); // global event weights per tree (see below for setting event-wise weights) Double_t signalTrainWeight = 1.0; Double_t backgroundTrainWeight = 1.0; Double_t signalTestWeight = 1.0; Double_t backgroundTestWeight = 1.0; // Decide if using the split and mixing or the full trees if( fnameTrainS == fnameTestS ) { if( fnameTrainB != fnameTestB ) { std::cout << "This macro cannot handle cases where the same signal sample is used for training and testing, but different background samples are used."; exit(1); } std::cout << "--- TMVAClassification : Using input file: " << inputTrainS->GetName() << std::endl; std::cout << "--- and file: " << inputTrainB->GetName() << std::endl; // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( signalTrainTree, signalTrainWeight ); factory->AddBackgroundTree( backgroundTrainTree, backgroundTrainWeight ); } else { if( fnameTrainB == fnameTestB ) { std::cout << "This macro cannot handle cases where the same background sample is used for training and testing, but different signal samples are used."; exit(1); } std::cout << "--- TMVAClassification : Using input file: " << inputTrainS->GetName() << std::endl; std::cout << "--- and file: " << inputTrainB->GetName() << " for training and" << std::endl; std::cout << "--- input file: " << inputTestS->GetName() << std::endl; std::cout << "--- and file: " << inputTestB->GetName() << " for testing." << std::endl; // To give different trees for training and testing, do as follows: factory->AddSignalTree( signalTrainTree, signalTrainWeight, "Training" ); factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); factory->AddBackgroundTree( backgroundTrainTree, backgroundTrainWeight, "Training" ); factory->AddBackgroundTree( backgroundTestTree, backgroundTestWeight, "Test" ); } // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; TCut mycutb = ""; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal="+eventsToTrain+":nTrain_Background="+eventsToTrain+":SplitMode=Random:NormMode=NumEvents:!V" ); // factory->PrepareTrainingAndTestTree( mycuts, mycutb, // "nTrain_Signal=3000:nTrain_Background=3000:nTest_Signal=3000:nTest_Background=3000:SplitMode=Random:NormMode=NumEvents:!V" ); // factory->PrepareTrainingAndTestTree( mycuts, mycutb, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) // factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+8:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=800:nEventsMin=50:MaxDepth=2:BoostType=AdaBoost:AdaBoostBeta=1:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:NNodesMax=5" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:nEventsMin=150:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","GA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events std::cout << "Training all methods" << std::endl; factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events std::cout << "Testing all methods" << std::endl; factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs std::cout << "Evaluating all methods" << std::endl; factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
//////////////////////////////////////////////////////////////////////////////// /// Main /// //////////////////////////////////////////////////////////////////////////////// void TrainRegressionFJ(TString myMethodList="") { gROOT->SetBatch(1); gROOT->LoadMacro("HelperFunctions.h" ); // make functions visible to TTreeFormula if (!TString(gROOT->GetVersion()).Contains("5.34")) { std::cout << "INCORRECT ROOT VERSION! Please use 5.34:" << std::endl; std::cout << "source /uscmst1/prod/sw/cms/slc5_amd64_gcc462/lcg/root/5.34.02-cms/bin/thisroot.csh" << std::endl; std::cout << "Return without doing anything." << std::endl; return; } //TString curDynamicPath( gSystem->GetDynamicPath() ); //gSystem->SetDynamicPath( "../lib:" + curDynamicPath ); //TString curIncludePath(gSystem->GetIncludePath()); //gSystem->SetIncludePath( " -I../include " + curIncludePath ); // Load the library TMVA::Tools::Instance(); //-------------------------------------------------------------------------- // Default MVA methods to be trained + tested std::map<std::string, int> Use; // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDEFoam"] = 1; Use["KNN"] = 1; // // --- Linear Discriminant Analysis Use["LD"] = 1; // // --- Function Discriminant analysis Use["FDA_GA"] = 1; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; // // --- Neural Network Use["MLP"] = 1; // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 1; Use["BDT1"] = 0; Use["BDTG"] = 0; Use["BDTG1"] = 0; //-------------------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVARegression" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } //-------------------------------------------------------------------------- // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVARegFJ.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // // The first argument is the base of the name of all the // weightfiles in the directory weights/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVARegressionFJ", outputFile, "!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=Regression" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; const std::vector<std::string> & inputExpressions = GetInputExpressionsFJReg(); const std::vector<std::string> & inputExpressionLabels = GetInputExpressionLabelsFJReg(); assert(inputExpressions.size() == inputExpressionLabels.size()); // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] //factory->AddVariable( "var1", "Variable 1", "units", 'F' ); //factory->AddVariable( "var2", "Variable 2", "units", 'F' ); for (UInt_t iexpr=0; iexpr!=inputExpressions.size(); iexpr++){ Label label = MakeLabel(inputExpressionLabels.at(iexpr)); TString expr = inputExpressions.at(iexpr); factory->AddVariable(expr, label.xlabel, label.unit, label.type); } // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables //factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); //factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // Add the variable carrying the regression target //factory->AddTarget( "fvalue" ); factory->AddTarget( "fathFilterJets_genPt" ); // It is also possible to declare additional targets for multi-dimensional regression, ie: // -- factory->AddTarget( "fvalue2" ); // BUT: this is currently ONLY implemented for MLP //-------------------------------------------------------------------------- // Read training and test data TFile *input(0); TString dirname = "skim_ZnnH_regression_fj/"; TString prefix = "skim_"; TString suffix = ".root"; TTree *regTrainTree(0), *regTestTree(0); std::vector<std::string> processes; processes.push_back("ZnnH110"); processes.push_back("ZnnH115"); processes.push_back("ZnnH120"); processes.push_back("ZnnH125"); processes.push_back("ZnnH130"); processes.push_back("ZnnH135"); processes.push_back("ZnnH140"); processes.push_back("ZnnH145"); processes.push_back("ZnnH150"); #ifdef USE_WH processes.push_back("WlnH110"); processes.push_back("WlnH115"); processes.push_back("WlnH120"); processes.push_back("WlnH125"); processes.push_back("WlnH130"); processes.push_back("WlnH135"); processes.push_back("WlnH140"); processes.push_back("WlnH145"); processes.push_back("WlnH150"); #endif std::vector<TFile *> files; for (UInt_t i=0; i<processes.size(); i++){ std::string process = processes.at(i); input = (TFile*) TFile::Open(dirname + prefix + process + suffix, "READ"); if (!input) { std::cout << "ERROR: Could not open input file." << std::endl; exit(1); } std::cout << "--- TMVARegression : Using input file: " << input->GetName() << std::endl; files.push_back(input); // --- Register the regression tree regTrainTree = (TTree*) input->Get("tree_train"); regTestTree = (TTree*) input->Get("tree_test"); // Global event weights per tree (see below for setting event-wise weights) Double_t regWeight = 1.0; // You can add an arbitrary number of regression trees factory->AddRegressionTree(regTrainTree, regWeight, TMVA::Types::kTraining); factory->AddRegressionTree(regTestTree , regWeight, TMVA::Types::kTesting ); } // Set individual event weights (the variables must exist in the original TTree) //factory->SetWeightExpression( "var1", "Regression" ); // Apply additional cuts on the signal and background samples (can be different) TCut mycut = "fathFilterJets_genPt>10 && fathFilterJets_pt>15 && abs(fathFilterJets_eta)<2.5"; // this is to avoid 3rd filter jet without gen match //TCut mycut = "hJet_genPt[0] > 0. && hJet_genPt[1] > 0. && hJet_csv[0] > 0. && hJet_csv[1] > 0. && hJet_pt[0] > 20. && hJet_pt[1] > 20. && abs(hJet_eta[0])<2.5 && abs(hJet_eta[1])<2.5"; // Tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycut, "V:nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents" ); // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=Random:!V" ); // --- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethodCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // PDE - RS method if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" ); // And the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // Linear discriminant if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "!H:!V:VarTransform=None" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); // Neural network (MLP) if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" ); // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDT"]) factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:V:NTrees=100:nEventsMin=30:NodePurityLimit=0.5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" ); //"!H:V:NTrees=60:nEventsMin=20:NodePurityLimit=0.5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30:DoBoostMonitor" ); if (Use["BDT1"]) factory->BookMethod( TMVA::Types::kBDT, "BDT1", "!H:V:NTrees=100:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" ); if (Use["BDTG"]) factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:V:NTrees=2000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.7:nCuts=200:MaxDepth=3:NNodesMax=15" ); if (Use["BDTG1"]) factory->BookMethod( TMVA::Types::kBDT, "BDTG1", "!H:V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:MaxDepth=3:NNodesMax=15" ); //-------------------------------------------------------------------------- // Train MVAs using the set of training events factory->TrainAllMethods(); // --- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // --- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); //-------------------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVARegression is done!" << std::endl; for (UInt_t i=0; i<files.size(); i++) files.at(i)->Close(); delete outputFile; delete factory; // Launch the GUI for the root macros //gROOT->SetMacroPath( "$ROOTSYS/tmva/macros/" ); //gROOT->Macro( "$ROOTSYS/tmva/macros/TMVAlogon.C" ); //gROOT->LoadMacro( "$ROOTSYS/tmva/macros/TMVAGui.C" ); //if (!gROOT->IsBatch()) TMVARegGui( outfileName ); }
void testBDT(){ //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); /* TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "testBDT", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // global event weights per tree (see below for setting event-wise weights) //Double_t signalWeight = 0.003582; //Double_t backgroundWeight = 0.0269; Double_t signalWeight = 1; Double_t backgroundWeight = 1; TFile *input_sig = TFile::Open( "signal_exclusif.root" ); TFile *input_wz = TFile::Open( "bruit_w_z.root" ); TTree *signal = (TTree*)input_sig->Get("tree"); TTree *background = (TTree*)input_wz->Get("tree"); // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); factory->AddVariable("PT_z" , 'F'); factory->AddVariable("ASYM" , 'F'); factory->AddVariable("PHI_lw_b", 'F'); factory->AddVariable("M_top", 'F'); */ TString outfileName( "bdtTMVA_FCNC_tZ.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "doBDT_FCNC_tZ", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // global event weights per tree (see below for setting event-wise weights) //Double_t signalWeight = 0.003582; //Double_t backgroundWeight = 0.0269; Double_t signalWeight = 1; Double_t backgroundWeight = 1; TFile *input_sig = TFile::Open( "proof.root" ); TFile *input_wz = TFile::Open( "proof.root" ); TTree *signal = (TTree*)input_sig->Get("Ttree_FCNCkut"); TTree *background_WZ = (TTree*)input_wz->Get("Ttree_WZ"); /*TTree *background_ZZ = (TTree*)input_wz->Get("Ttree_ZZ"); TTree *background_WW = (TTree*)input_wz->Get("Ttree_WW"); TTree *background_TTbar = (TTree*)input_wz->Get("Ttree_TTbar"); TTree *background_Zjets = (TTree*)input_wz->Get("Ttree_Zjets"); TTree *background_Wjets = (TTree*)input_wz->Get("Ttree_Wjets"); TTree *background_TtW = (TTree*)input_wz->Get("Ttree_TtW"); TTree *background_TbartW = (TTree*)input_wz->Get("Ttree_TbartW");*/ // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background_WZ, backgroundWeight ); /*factory->AddBackgroundTree( background_ZZ, backgroundWeight ); factory->AddBackgroundTree( background_WW, backgroundWeight ); factory->AddBackgroundTree( background_TTbar, backgroundWeight ); factory->AddBackgroundTree( background_Zjets, backgroundWeight ); factory->AddBackgroundTree( background_Wjets, backgroundWeight ); factory->AddBackgroundTree( background_TtW, backgroundWeight ); factory->AddBackgroundTree( background_TbartW, backgroundWeight );*/ factory->AddVariable("tree_topMass", 'F'); factory->AddVariable("tree_deltaPhilb", 'F'); factory->AddVariable("tree_asym", 'F'); factory->AddVariable("tree_Zpt", 'F'); // to set weights. The variable must exist in the tree // for signal : factory->SetSignalWeightExpression ("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:nEventsMin=100:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVAMulticlass( TString myMethodList = "" ) { // This loads the library TMVA::Tools::Instance(); // to get access to the GUI and all tmva macros // // TString tmva_dir(TString(gRootDir) + "/tmva"); // if(gSystem->Getenv("TMVASYS")) // tmva_dir = TString(gSystem->Getenv("TMVASYS")); // gROOT->SetMacroPath(tmva_dir + "/test/:" + gROOT->GetMacroPath() ); // gROOT->ProcessLine(".L TMVAMultiClassGui.C"); //--------------------------------------------------------------- // Default MVA methods to be trained + tested std::map<std::string,int> Use; Use["MLP"] = 1; Use["BDTG"] = 1; Use["DNN"] = 0; Use["FDA_GA"] = 0; Use["PDEFoam"] = 0; //--------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAMulticlass" << std::endl; if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // Create a new root output file. TString outfileName = "TMVAMulticlass.root"; TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "TMVAMulticlass", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=multiclass" ); TMVA::DataLoader *dataloader=new TMVA::DataLoader("dataset"); dataloader->AddVariable( "var1", 'F' ); dataloader->AddVariable( "var2", "Variable 2", "", 'F' ); dataloader->AddVariable( "var3", "Variable 3", "units", 'F' ); dataloader->AddVariable( "var4", "Variable 4", "units", 'F' ); TFile *input(0); TString fname = "./tmva_example_multiple_background.root"; if (!gSystem->AccessPathName( fname )) { // first we try to find the file in the local directory std::cout << "--- TMVAMulticlass : Accessing " << fname << std::endl; input = TFile::Open( fname ); } else { std::cout << "Creating testdata...." << std::endl; TString createDataMacro = TString(gROOT->GetTutorialsDir()) + "/tmva/createData.C"; gROOT->ProcessLine(TString::Format(".L %s",createDataMacro.Data())); gROOT->ProcessLine("create_MultipleBackground(2000)"); std::cout << " created tmva_example_multiple_background.root for tests of the multiclass features"<<std::endl; input = TFile::Open( fname ); } if (!input) { std::cout << "ERROR: could not open data file" << std::endl; exit(1); } TTree *signalTree = (TTree*)input->Get("TreeS"); TTree *background0 = (TTree*)input->Get("TreeB0"); TTree *background1 = (TTree*)input->Get("TreeB1"); TTree *background2 = (TTree*)input->Get("TreeB2"); gROOT->cd( outfileName+TString(":/") ); dataloader->AddTree (signalTree,"Signal"); dataloader->AddTree (background0,"bg0"); dataloader->AddTree (background1,"bg1"); dataloader->AddTree (background2,"bg2"); dataloader->PrepareTrainingAndTestTree( "", "SplitMode=Random:NormMode=NumEvents:!V" ); if (Use["BDTG"]) // gradient boosted decision trees factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.50:nCuts=20:MaxDepth=2"); if (Use["MLP"]) // neural network factory->BookMethod( dataloader, TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:NCycles=1000:HiddenLayers=N+5,5:TestRate=5:EstimatorType=MSE"); if (Use["FDA_GA"]) // functional discriminant with GA minimizer factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["PDEFoam"]) // PDE-Foam approach factory->BookMethod( dataloader, TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["DNN"]) { TString layoutString ("Layout=TANH|100,TANH|50,TANH|10,LINEAR"); TString training0 ("LearningRate=1e-1, Momentum=0.5, Repetitions=1, ConvergenceSteps=10," " BatchSize=256, TestRepetitions=10, Multithreading=True"); TString training1 ("LearningRate=1e-2, Momentum=0.0, Repetitions=1, ConvergenceSteps=10," " BatchSize=256, TestRepetitions=7, Multithreading=True"); TString trainingStrategyString ("TrainingStrategy="); trainingStrategyString += training0 + "|" + training1; TString nnOptions ("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=N:" "WeightInitialization=XAVIERUNIFORM:Architecture=STANDARD"); nnOptions.Append (":"); nnOptions.Append (layoutString); nnOptions.Append (":"); nnOptions.Append (trainingStrategyString); factory->BookMethod(dataloader, TMVA::Types::kDNN, "DNN", nnOptions ); } // Train MVAs using the set of training events factory->TrainAllMethods(); // Evaluate all MVAs using the set of test events factory->TestAllMethods(); // Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; delete dataloader; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAMultiClassGui( outfileName ); }
void TMVAClassification( TString myMethodList = "" , TString myModel = "") { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // to get access to the GUI and all tmva macros TString tmva_dir(TString(gRootDir) + "/tmva"); if(gSystem->Getenv("TMVASYS")) tmva_dir = TString(gSystem->Getenv("TMVASYS")); gROOT->SetMacroPath(tmva_dir + "/test/:" + gROOT->GetMacroPath() ); gROOT->ProcessLine(".L TMVAGui.C"); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 1; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 0; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 0; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- // Default model to be trained + tested std::map<std::string,int> Model; // --- Cut optimisation Model[ "MM" ] = 0; // Mass mechanism Model[ "RHC_L" ] = 0; // Right Handed Current Model[ "RHC_E" ] = 0; // Right Handed Current Model[ "M1" ] = 0; // Majoron Model[ "M2" ] = 0; // Majoron Model[ "M3" ] = 0; // Majoron Model[ "M7" ] = 0; // Majoron std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } if(myModel != "") { std::string regModel(myModel); if( Model.find(regModel) == Model.end() ){ std::cout << "Model \"" << myModel << "\" not known in under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Model.begin(); it != Model.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Model[regModel] = 1; } else { std::cout << "No signal model as been specified. You must choose one among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Model.begin(); it != Model.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName; outfileName.Form( "TMVA_%s.root", myModel.Data() ); //TString outfileDir( "/Users/alberto/Software/SuperNEMO/work/nemo3/plot/plot_FINAL_TECHNOTE_20150921/TMVA/" ); TString outfileDir( "/Users/alberto/Software/SuperNEMO/work/nemo3/plot/plot_UPDATE_TECHNOTE_20160429/TMVA/" ); TFile* outputFile = TFile::Open( outfileDir + outfileName , "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TString weightBaseName; weightBaseName.Form("TMVAClassification_%s", myModel.Data()); TMVA::Factory *factory = new TMVA::Factory( weightBaseName , outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] //factory->AddVariable( "myvar1 := var1+var2", 'F' ); //factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ); //factory->AddVariable( "var3", "Variable 3", "units", 'F' ); //factory->AddVariable( "var4", "Variable 4", "units", 'F' ); factory->AddVariable( "min_el_en" , 'F' ); factory->AddVariable( "max_el_en" , 'F' ); factory->AddVariable( "el_en_asym := (max_el_en-min_el_en)/(min_el_en+max_el_en)" , 'F' ); factory->AddVariable( "el_en_sum := min_el_en+max_el_en" , 'F' ); factory->AddVariable( "cos_theta" , 'F' ); factory->AddVariable( "prob_int" , 'F' ); factory->AddVariable( "min_el_track_len" , 'F' ); factory->AddVariable( "max_el_track_len" , 'F' ); //factory->AddVariable( "min_el_curv := min_el_track_r*min_el_sign" , 'F' ); //factory->AddVariable( "max_el_curv := max_el_track_r*max_el_sign" , 'F' ); //factory->AddVariable( "max_vertex_s" , 'F' ); //factory->AddVariable( "max_vertex_z" , 'F' ); //factory->AddVariable( "min_vertex_s" , 'F' ); //factory->AddVariable( "min_vertex_z" , 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables //factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); //factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) //TString fdir = "/sps/nemo/scratch/remoto/nemo3/plot/plot_FINAL_TECHNOTE_20150921/"; TString fdir = "/Users/alberto/Software/SuperNEMO/work/nemo3/plot/plot_UPDATE_TECHNOTE_20160429/"; TString fname = "TwoElectronIntTree.root"; TFile *input = TFile::Open( fdir + fname , "READ"); std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl; TTree * sig_tree = 0; Double_t sig_weight = 1.; if ( Model[ "MM" ] ) sig_tree = (TTree*) input->Get( "Cd116_2b0n_m1_tree" ) ; if ( Model[ "RHC_L" ] ) sig_tree = (TTree*) input->Get( "Cd116_2b0n_m2_tree" ) ; if ( Model[ "RHC_E" ] ) sig_tree = (TTree*) input->Get( "Cd116_2b0n_m18_tree" ) ; if ( Model[ "M1" ] ) sig_tree = (TTree*) input->Get( "Cd116_2b0n_m5_tree" ) ; if ( Model[ "M2" ] ) sig_tree = (TTree*) input->Get( "Cd116_2b0n_m15_tree" ) ; if ( Model[ "M3" ] ) sig_tree = (TTree*) input->Get( "Cd116_2b0n_m6_tree" ) ; if ( Model[ "M7" ] ) sig_tree = (TTree*) input->Get( "Cd116_2b0n_m7_tree" ) ; factory->AddSignalTree( sig_tree , sig_weight ); //Double_t Cd116_2b0n_m1_weight = 1.; //TTree * Cd116_2b0n_m1_tree = (TTree*) input->Get("Cd116_2b0n_m1_tree" ) ; //factory->AddSignalTree( Cd116_2b0n_m1_tree , Cd116_2b0n_m1_weight ); TTree * Cd116_Tl208_tree = (TTree*) input->Get("Cd116_Tl208_tree" ) ; Double_t Cd116_Tl208_weight = 6.52838 ; if( Cd116_Tl208_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Cd116_Tl208_tree , Cd116_Tl208_weight ); }; TTree * Cd116_Ac228_tree = (TTree*) input->Get("Cd116_Ac228_tree" ) ; Double_t Cd116_Ac228_weight = 7.62351 ; if( Cd116_Ac228_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Cd116_Ac228_tree , Cd116_Ac228_weight ); }; TTree * Cd116_Bi212_tree = (TTree*) input->Get("Cd116_Bi212_tree" ) ; Double_t Cd116_Bi212_weight = 3.00708 ; if( Cd116_Bi212_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Cd116_Bi212_tree , Cd116_Bi212_weight ); }; TTree * Cd116_Bi214_tree = (TTree*) input->Get("Cd116_Bi214_tree" ) ; Double_t Cd116_Bi214_weight = 18.1504 ; if( Cd116_Bi214_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Cd116_Bi214_tree , Cd116_Bi214_weight ); }; TTree * Cd116_Pb214_tree = (TTree*) input->Get("Cd116_Pb214_VT_tree" ) ; Double_t Cd116_Pb214_weight = 0.186417 ; if( Cd116_Pb214_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Cd116_Pb214_tree , Cd116_Pb214_weight ); }; TTree * Mylar_Bi214_tree = (TTree*) input->Get("Mylar_Bi214_tree" ) ; Double_t Mylar_Bi214_weight = 11.1346 ; if( Mylar_Bi214_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Mylar_Bi214_tree , Mylar_Bi214_weight ); }; TTree * Mylar_Pb214_tree = (TTree*) input->Get("Mylar_Pb214_tree" ) ; Double_t Mylar_Pb214_weight = 0.496238 ; if( Mylar_Pb214_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Mylar_Pb214_tree , Mylar_Pb214_weight ); }; TTree * Cd116_K40_tree = (TTree*) input->Get("Cd116_K40_tree" ) ; Double_t Cd116_K40_weight = 8.9841+25.8272 ; if( Cd116_K40_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Cd116_K40_tree , Cd116_K40_weight ); }; TTree * Cd116_Pa234m_tree = (TTree*) input->Get("Cd116_Pa234m_tree" ) ; Double_t Cd116_Pa234m_weight = 27.9307+72.4667 ; if( Cd116_Pa234m_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Cd116_Pa234m_tree , Cd116_Pa234m_weight ); }; TTree * SFoil_Bi210_tree = (TTree*) input->Get("SFoil_Bi210_tree" ) ; Double_t SFoil_Bi210_weight = 0+23.2438 ; if( SFoil_Bi210_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SFoil_Bi210_tree , SFoil_Bi210_weight ); }; TTree * SWire_Bi210_tree = (TTree*) input->Get("SWire_Bi210_tree" ) ; Double_t SWire_Bi210_weight = 0.136147+0.624187 ; if( SWire_Bi210_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SWire_Bi210_tree , SWire_Bi210_weight ); }; TTree * SScin_Bi210_tree = (TTree*) input->Get("SScin_Bi210_tree" ) ; Double_t SScin_Bi210_weight = 1.75641 ; if( SScin_Bi210_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SScin_Bi210_tree , SScin_Bi210_weight ); }; TTree * SScin_Bi214_tree = (TTree*) input->Get("SScin_Bi214_tree" ) ; Double_t SScin_Bi214_weight = 0.0510754 ; if( SScin_Bi214_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SScin_Bi214_tree , SScin_Bi214_weight ); }; TTree * SScin_Pb214_tree = (TTree*) input->Get("SScin_Pb214_tree" ) ; Double_t SScin_Pb214_weight = 0 ; if( SScin_Pb214_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SScin_Pb214_tree , SScin_Pb214_weight ); }; TTree * SWire_Tl208_tree = (TTree*) input->Get("SWire_Tl208_tree" ) ; Double_t SWire_Tl208_weight = 0.217623+1.07641 ; if( SWire_Tl208_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SWire_Tl208_tree , SWire_Tl208_weight ); }; TTree * SWire_Bi214_P1_tree = (TTree*) input->Get("SWire_Bi214_tree" ) ; Double_t SWire_Bi214_weight = 21.4188+17.8236 ; if( SWire_Bi214_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SWire_Bi214_tree , SWire_Bi214_weight ); }; TTree * SFoil_Bi214_tree = (TTree*) input->Get("SFoil_Bi214_tree" ) ; Double_t SFoil_Bi214_weight = 5.83533+2.80427 ; if( SFoil_Bi214_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SFoil_Bi214_tree , SFoil_Bi214_weight ); }; TTree * SWire_Pb214_tree = (TTree*) input->Get("SWire_Pb214_tree" ) ; Double_t SWire_Pb214_weight = 0.458486+0.649167 ; if( SWire_Pb214_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SWire_Pb214_tree , SWire_Pb214_weight ); }; TTree * SFoil_Pb214_tree = (TTree*) input->Get("SFoil_Pb214_tree" ) ; Double_t SFoil_Pb214_weight = 0.218761+0.195287 ; if( SFoil_Pb214_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SFoil_Pb214_tree , SFoil_Pb214_weight ); }; TTree * FeShield_Bi214_tree = (TTree*) input->Get("FeShield_Bi214_tree" ) ; Double_t FeShield_Bi214_weight = 50.7021 ; if( FeShield_Bi214_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( FeShield_Bi214_tree , FeShield_Bi214_weight ); }; TTree * FeShield_Tl208_tree = (TTree*) input->Get("FeShield_Tl208_tree" ) ; Double_t FeShield_Tl208_weight = 0.859465 ; if( FeShield_Tl208_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( FeShield_Tl208_tree , FeShield_Tl208_weight ); }; TTree * FeShield_Ac228_tree = (TTree*) input->Get("FeShield_Ac228_tree" ) ; Double_t FeShield_Ac228_weight = 0.126868 ; if( FeShield_Ac228_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( FeShield_Ac228_tree , FeShield_Ac228_weight ); }; TTree * CuTower_Co60_tree = (TTree*) input->Get("CuTower_Co60_tree" ) ; Double_t CuTower_Co60_weight = 3.9407 ; if( CuTower_Co60_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( CuTower_Co60_tree , CuTower_Co60_weight ); }; TTree * Air_Bi214_P1_tree = (TTree*) input->Get("Air_Bi214_tree" ) ; Double_t Air_Bi214_P1_weight = 4.19744 ; if( Air_Bi214_P1_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Air_Bi214_P1_tree , Air_Bi214_P1_weight ); }; TTree * Air_Tl208_P1_tree = (TTree*) input->Get("Air_Tl208_tree" ) ; Double_t Air_Tl208_P1_weight = 0 ; if( Air_Tl208_P1_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Air_Tl208_P1_tree , Air_Tl208_P1_weight ); }; TTree * PMT_Bi214_tree = (TTree*) input->Get("PMT_Bi214_tree" ) ; Double_t PMT_Bi214_weight = 27.9661 ; if( PMT_Bi214_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( PMT_Bi214_tree , PMT_Bi214_weight ); }; TTree * PMT_Tl208_tree = (TTree*) input->Get("PMT_Tl208_tree" ) ; Double_t PMT_Tl208_weight = 22.923 ; if( PMT_Tl208_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( PMT_Tl208_tree , PMT_Tl208_weight ); }; TTree * PMT_Ac228_tree = (TTree*) input->Get("PMT_Ac228_tree" ) ; Double_t PMT_Ac228_weight = 3.60712 ; if( PMT_Ac228_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( PMT_Ac228_tree , PMT_Ac228_weight ); }; TTree * PMT_K40_tree = (TTree*) input->Get("PMT_K40_tree" ) ; Double_t PMT_K40_weight = 16.813 ; if( PMT_K40_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( PMT_K40_tree , PMT_K40_weight ); }; TTree * ScintInn_K40_tree = (TTree*) input->Get("ScintInn_K40_tree" ) ; Double_t ScintInn_K40_weight = 0.333988 ; if( ScintInn_K40_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( ScintInn_K40_tree , ScintInn_K40_weight ); }; TTree * ScintOut_K40_tree = (TTree*) input->Get("ScintOut_K40_tree" ) ; Double_t ScintOut_K40_weight = 0.601178 ; if( ScintOut_K40_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( ScintOut_K40_tree , ScintOut_K40_weight ); }; TTree * ScintPet_K40_tree = (TTree*) input->Get("ScintPet_K40_tree" ) ; Double_t ScintPet_K40_weight = 1.00195 ; if( ScintPet_K40_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( ScintPet_K40_tree , ScintPet_K40_weight ); }; TTree * MuMetal_Pa234m_tree = (TTree*) input->Get("MuMetal_Pa234m_tree" ) ; Double_t MuMetal_Pa234m_weight = 0.739038 ; if( MuMetal_Pa234m_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( MuMetal_Pa234m_tree , MuMetal_Pa234m_weight ); }; TTree * Cd116_2b2n_m14_tree = (TTree*) input->Get("Cd116_2b2n_m14_tree" ) ; Double_t Cd116_2b2n_m14_weight = 4977.55 ; if( Cd116_2b2n_m14_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Cd116_2b2n_m14_tree , Cd116_2b2n_m14_weight ); }; // --- end of tree registration // Set individual event weights (the variables must exist in the original TTree) // for signal : factory->SetSignalWeightExpression ("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); factory->SetBackgroundWeightExpression( "weight" ); // Apply additional cuts on the signal and background samples (can be different) // Apply cut on charge //TCut mycuts = "min_el_sign < 0 && max_el_sign < 0."; //TCut mycutb = "min_el_sign < 0 && max_el_sign < 0."; // Apply cut on vertex //TCut mycuts = "((max_vertex_x - min_vertex_x)**2 + (max_vertex_y - min_vertex_y)**2 <= 4**2)&&((max_vertex_z-min_vertex_z)**2<8**2)"; //TCut mycutb = "((max_vertex_x - min_vertex_x)**2 + (max_vertex_y - min_vertex_y)**2 <= 4**2)&&((max_vertex_z-min_vertex_z)**2<8**2)"; TCut mycuts = ""; TCut mycutb = ""; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=-1" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // ---- STILL EXPERIMENTAL and only implemented for BDT's ! // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","FitGA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileDir + outfileName ); }
int main( int argc, char** argv ) {//main std::string folder; if (argc > 1) { folder = argv[1]; } else { folder = "output_tmva/nunu/MET130/"; } bool useQCD = true; bool useOthers = false; bool useOthersAsSignal = true; //List of input signal files std::vector<std::string> sigfiles; //sigfiles.push_back("MC_VBF_HToZZTo4Nu_M-120"); sigfiles.push_back("MC_Powheg-Htoinv-mH125"); if (useOthersAsSignal) { sigfiles.push_back("MC_TTJets"); //powheg samples //sigfiles.push_back("MC_TT-v1"); //sigfiles.push_back("MC_TT-v2"); // sigfiles.push_back("MC_T-tW"); sigfiles.push_back("MC_Tbar-tW"); sigfiles.push_back("MC_SingleT-s-powheg-tauola"); sigfiles.push_back("MC_SingleTBar-s-powheg-tauola"); sigfiles.push_back("MC_SingleT-t-powheg-tauola"); sigfiles.push_back("MC_SingleTBar-t-powheg-tauola"); sigfiles.push_back("MC_WW-pythia6-tauola"); sigfiles.push_back("MC_WZ-pythia6-tauola"); sigfiles.push_back("MC_ZZ-pythia6-tauola"); sigfiles.push_back("MC_W1JetsToLNu_enu"); sigfiles.push_back("MC_W2JetsToLNu_enu"); sigfiles.push_back("MC_W3JetsToLNu_enu"); sigfiles.push_back("MC_W4JetsToLNu_enu"); sigfiles.push_back("MC_WJetsToLNu-v1_enu"); sigfiles.push_back("MC_WJetsToLNu-v2_enu"); sigfiles.push_back("MC_W1JetsToLNu_munu"); sigfiles.push_back("MC_W2JetsToLNu_munu"); sigfiles.push_back("MC_W3JetsToLNu_munu"); sigfiles.push_back("MC_W4JetsToLNu_munu"); sigfiles.push_back("MC_WJetsToLNu-v1_munu"); sigfiles.push_back("MC_WJetsToLNu-v2_munu"); sigfiles.push_back("MC_W1JetsToLNu_taunu"); sigfiles.push_back("MC_W2JetsToLNu_taunu"); sigfiles.push_back("MC_W3JetsToLNu_taunu"); sigfiles.push_back("MC_W4JetsToLNu_taunu"); sigfiles.push_back("MC_WJetsToLNu-v1_taunu"); sigfiles.push_back("MC_WJetsToLNu-v2_taunu"); sigfiles.push_back("MC_DYJetsToLL"); sigfiles.push_back("MC_DY1JetsToLL"); sigfiles.push_back("MC_DY2JetsToLL"); sigfiles.push_back("MC_DY3JetsToLL"); sigfiles.push_back("MC_DY4JetsToLL"); sigfiles.push_back("MC_ZJetsToNuNu_100_HT_200"); sigfiles.push_back("MC_ZJetsToNuNu_200_HT_400"); sigfiles.push_back("MC_ZJetsToNuNu_400_HT_inf"); sigfiles.push_back("MC_ZJetsToNuNu_50_HT_100"); sigfiles.push_back("MC_GJets-HT-200To400-madgraph"); sigfiles.push_back("MC_GJets-HT-400ToInf-madgraph"); sigfiles.push_back("MC_WGamma"); sigfiles.push_back("MC_EWK-Z2j"); sigfiles.push_back("MC_EWK-Z2jiglep"); sigfiles.push_back("MC_EWK-W2jminus_enu"); sigfiles.push_back("MC_EWK-W2jplus_enu"); sigfiles.push_back("MC_EWK-W2jminus_munu"); sigfiles.push_back("MC_EWK-W2jplus_munu"); sigfiles.push_back("MC_EWK-W2jminus_taunu"); sigfiles.push_back("MC_EWK-W2jplus_taunu"); } //List of input files std::vector<std::string> bkgfiles; if (useQCD){ bkgfiles.push_back("MC_QCD-Pt-30to50-pythia6"); bkgfiles.push_back("MC_QCD-Pt-50to80-pythia6"); bkgfiles.push_back("MC_QCD-Pt-80to120-pythia6"); bkgfiles.push_back("MC_QCD-Pt-120to170-pythia6"); bkgfiles.push_back("MC_QCD-Pt-170to300-pythia6"); bkgfiles.push_back("MC_QCD-Pt-300to470-pythia6"); bkgfiles.push_back("MC_QCD-Pt-470to600-pythia6"); bkgfiles.push_back("MC_QCD-Pt-600to800-pythia6"); bkgfiles.push_back("MC_QCD-Pt-800to1000-pythia6"); bkgfiles.push_back("MC_QCD-Pt-1000to1400-pythia6"); bkgfiles.push_back("MC_QCD-Pt-1400to1800-pythia6"); bkgfiles.push_back("MC_QCD-Pt-1800-pythia6"); } if (useOthers) { bkgfiles.push_back("MC_TTJets"); //powheg samples //bkgfiles.push_back("MC_TT-v1"); //bkgfiles.push_back("MC_TT-v2"); // bkgfiles.push_back("MC_T-tW"); bkgfiles.push_back("MC_Tbar-tW"); bkgfiles.push_back("MC_SingleT-s-powheg-tauola"); bkgfiles.push_back("MC_SingleTBar-s-powheg-tauola"); bkgfiles.push_back("MC_SingleT-t-powheg-tauola"); bkgfiles.push_back("MC_SingleTBar-t-powheg-tauola"); bkgfiles.push_back("MC_WW-pythia6-tauola"); bkgfiles.push_back("MC_WZ-pythia6-tauola"); bkgfiles.push_back("MC_ZZ-pythia6-tauola"); bkgfiles.push_back("MC_W1JetsToLNu_enu"); bkgfiles.push_back("MC_W2JetsToLNu_enu"); bkgfiles.push_back("MC_W3JetsToLNu_enu"); bkgfiles.push_back("MC_W4JetsToLNu_enu"); bkgfiles.push_back("MC_WJetsToLNu-v1_enu"); bkgfiles.push_back("MC_WJetsToLNu-v2_enu"); bkgfiles.push_back("MC_W1JetsToLNu_munu"); bkgfiles.push_back("MC_W2JetsToLNu_munu"); bkgfiles.push_back("MC_W3JetsToLNu_munu"); bkgfiles.push_back("MC_W4JetsToLNu_munu"); bkgfiles.push_back("MC_WJetsToLNu-v1_munu"); bkgfiles.push_back("MC_WJetsToLNu-v2_munu"); bkgfiles.push_back("MC_W1JetsToLNu_taunu"); bkgfiles.push_back("MC_W2JetsToLNu_taunu"); bkgfiles.push_back("MC_W3JetsToLNu_taunu"); bkgfiles.push_back("MC_W4JetsToLNu_taunu"); bkgfiles.push_back("MC_WJetsToLNu-v1_taunu"); bkgfiles.push_back("MC_WJetsToLNu-v2_taunu"); bkgfiles.push_back("MC_DYJetsToLL"); bkgfiles.push_back("MC_DY1JetsToLL"); bkgfiles.push_back("MC_DY2JetsToLL"); bkgfiles.push_back("MC_DY3JetsToLL"); bkgfiles.push_back("MC_DY4JetsToLL"); bkgfiles.push_back("MC_ZJetsToNuNu_100_HT_200"); bkgfiles.push_back("MC_ZJetsToNuNu_200_HT_400"); bkgfiles.push_back("MC_ZJetsToNuNu_400_HT_inf"); bkgfiles.push_back("MC_ZJetsToNuNu_50_HT_100"); bkgfiles.push_back("MC_GJets-HT-200To400-madgraph"); bkgfiles.push_back("MC_GJets-HT-400ToInf-madgraph"); bkgfiles.push_back("MC_WGamma"); bkgfiles.push_back("MC_EWK-Z2j"); bkgfiles.push_back("MC_EWK-Z2jiglep"); bkgfiles.push_back("MC_EWK-W2jminus_enu"); bkgfiles.push_back("MC_EWK-W2jplus_enu"); bkgfiles.push_back("MC_EWK-W2jminus_munu"); bkgfiles.push_back("MC_EWK-W2jplus_munu"); bkgfiles.push_back("MC_EWK-W2jminus_taunu"); bkgfiles.push_back("MC_EWK-W2jplus_taunu"); } // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TFile *output_tmva = TFile::Open((folder+"/TMVA_QCDrej.root").c_str(),"RECREATE"); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", output_tmva, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); //fill the variables with event weight from the trees //const unsigned nVars = 4; factory->AddSpectator("jet1_pt","Jet 1 p_{T}", "GeV", 'F'); factory->AddSpectator("jet2_pt","Jet 2 p_{T}", "GeV", 'F'); factory->AddSpectator("jet1_eta","Jet 1 #eta", "", 'F'); factory->AddVariable("jet2_eta","Jet 2 #eta", "", 'F');// ** factory->AddSpectator("jet1_phi","Jet 1 #phi", "", 'F'); factory->AddSpectator("jet2_phi","Jet 2 #phi", "", 'F'); factory->AddSpectator("dijet_M","M_{jj}", " GeV", 'F'); factory->AddSpectator("dijet_deta","#Delta#eta_{jj}", "", 'F'); factory->AddSpectator("dijet_sumeta","#eta_{j1}+#eta_{j2}", "", 'F'); factory->AddSpectator("dijet_dphi","#Delta#phi_{jj}", "", 'F'); factory->AddSpectator("met","MET", "GeV", 'F');// ** factory->AddSpectator("met_phi","MET #phi", "", 'F'); factory->AddVariable("met_significance","MET significance", "", 'F');// ** factory->AddSpectator("sumet","#Sum E_{T}", "GeV", 'F'); factory->AddSpectator("ht","H_{T}", "GeV", 'F'); factory->AddVariable("mht","MH_{T}", "GeV", 'F');// ** factory->AddSpectator("sqrt_ht","#sqrt{H_{T}}", "GeV^{0.5}", 'F'); factory->AddSpectator("unclustered_et","Unclustered E_{T}", "GeV", 'F'); factory->AddSpectator("unclustered_phi","Unclustered #phi", "GeV", 'F'); factory->AddSpectator("jet1met_dphi","#Delta#phi(MET,jet1)", "", 'F'); factory->AddVariable("jet2met_dphi","#Delta#phi(MET,jet2)", "", 'F');// ** factory->AddVariable("jetmet_mindphi","minimum #Delta#phi(MET,jet)", "", 'F');// ** factory->AddVariable("jetunclet_mindphi","minimum #Delta#phi(unclustered,jet)", "", 'F');// ** factory->AddVariable("metunclet_dphi","#Delta#phi(MET,unclustered)", "", 'F');// ** factory->AddVariable("dijetmet_scalarSum_pt", "p_{T}^{jet1}+p_{T}^{jet2}+MET", "GeV", 'F');// ** factory->AddSpectator("dijetmet_vectorialSum_pt","p_{T}(#vec{j1}+#vec{j2}+#vec{MET})", "GeV", 'F'); factory->AddVariable("dijetmet_ptfraction","p_{T}^{dijet}/(p_{T}^{dijet}+MET)", "", 'F');// ** //factory->AddVariable("jet1met_scalarprod := (jet1_pt*cos(jet1_phi)*met_x+jet1_pt*sin(jet1_phi)*met_y)/met", "#vec{p_{T}^{jet1}}.#vec{MET}/MET", "GeV" , 'F'); //factory->AddVariable("jet2met_scalarprod := (jet2_pt*cos(jet2_phi)*met_x+jet2_pt*sin(jet2_phi)*met_y)/met", "#vec{p_{T}^{jet2}}.#vec{MET}/MET", "GeV" , 'F'); factory->AddVariable("jet1met_scalarprod", "#vec{p_{T}^{jet1}}.#vec{MET}/MET", "GeV" , 'F');// ** factory->AddVariable("jet2met_scalarprod", "#vec{p_{T}^{jet2}}.#vec{MET}/MET", "GeV" , 'F');// ** factory->AddVariable("jet1met_scalarprod_frac := jet1met_scalarprod/met", "#vec{p_{T}^{jet1}}.#vec{MET}/MET^{2}", "" , 'F');// ** factory->AddVariable("jet2met_scalarprod_frac := jet2met_scalarprod/met", "#vec{p_{T}^{jet2}}.#vec{MET}/MET^{2}", "" , 'F');// ** factory->AddSpectator("n_jets_cjv_30","CJV jets (30 GeV)", "" , 'I'); factory->AddSpectator("n_jets_cjv_20EB_30EE","CJV jets (|#eta|<2.4 and 20 GeV, or 30 GeV)", "" , 'I'); //test with only VBF variables used in cut-based analysis //factory->AddVariable("dijet_M","M_{jj}", " GeV", 'F'); //factory->AddVariable("dijet_deta","#Delta#eta_{jj}", "", 'F'); //factory->AddVariable("dijet_dphi","#Delta#phi_{jj}", "", 'F'); //factory->AddVariable("met","MET", "GeV", 'F'); //factory->AddVariable("n_jets_cjv_30","CJV jets (30 GeV)", "" , 'I'); //get input files //signal //TFile *signalfile = TFile::Open((folder+"/"+"MC_VBF_HToZZTo4Nu_M-120.root").c_str()); //TTree *signal = (TTree*)signalfile->Get("TmvaInputTree"); //Double_t signalWeight = 1.0; //factory->AddSignalTree(signal,signalWeight); //Set individual event weights (the variables must exist in the original TTree) //factory->SetSignalWeightExpression("total_weight"); //background std::map<std::string, TFile *> tfiles; for (unsigned i = 0; i < bkgfiles.size(); ++i) { std::string filename = (bkgfiles[i]+".root"); TFile * tmp = new TFile((folder+"/"+filename).c_str()); if (!tmp) { std::cerr << "Warning, file " << filename << " could not be opened." << std::endl; } else { tfiles[bkgfiles[i]] = tmp; } } TTree *background[bkgfiles.size()]; //signal std::map<std::string, TFile *> sfiles; for (unsigned i = 0; i < sigfiles.size(); ++i) { std::string filename = (sigfiles[i]+".root"); TFile * tmp = new TFile((folder+"/"+filename).c_str()); if (!tmp) { std::cerr << "Warning, file " << filename << " could not be opened." << std::endl; } else { sfiles[sigfiles[i]] = tmp; } } TTree *signal[sigfiles.size()]; for (unsigned i = 0; i < bkgfiles.size(); ++i) { std::string f = bkgfiles[i]; if (tfiles[f]){ background[i] = (TTree*)tfiles[f]->Get("TmvaInputTree"); //if (f.find("QCD-Pt")!=f.npos){ //} Double_t backgroundWeight = 1.0; factory->AddBackgroundTree(background[i],backgroundWeight); factory->SetBackgroundWeightExpression("total_weight"); }//if file exist else { std::cout << " Cannot find background file " << f << std::endl; } }//loop on files for (unsigned i = 0; i < sigfiles.size(); ++i) { std::string f = sigfiles[i]; if (sfiles[f]){ signal[i] = (TTree*)sfiles[f]->Get("TmvaInputTree"); //if (f.find("QCD-Pt")!=f.npos){ //} Double_t signalWeight = 1.0; factory->AddSignalTree(signal[i],signalWeight); factory->SetSignalWeightExpression("total_weight"); }//if file exist else { std::cout << " Cannot find signal file " << f << std::endl; } }//loop on files // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = "";//dijet_deta>3.8 && dijet_M > 1100 && met > 100 && met_significance>5"; TCut mycutb = "";//dijet_deta>3.8 && dijet_M > 1100 && met > 100 && met_significance>5"; factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // Likelihood ("naive Bayes estimator") //factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", //"H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Linear discriminant (same as Fisher discriminant) //factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons //factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=60:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); // Boosted Decision Trees // Gradient Boost //factory->BookMethod( TMVA::Types::kBDT, "BDTG", //"!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" ); //factory->BookMethod( TMVA::Types::kBDT, "BDTG", // "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:nCuts=20:MaxDepth=2" ); // Adaptive Boost //factory->BookMethod( TMVA::Types::kBDT, "BDT1000", // "!H:!V:NTrees=1000:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" ); factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=1000:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.2:SeparationType=GiniIndex:nCuts=20" ); // Bagging //factory->BookMethod( TMVA::Types::kBDT, "BDTB", // "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ); // Decorrelation + Adaptive Boost //factory->BookMethod( TMVA::Types::kBDT, "BDTD", // "!H:!V:NTrees=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ); // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables //factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", // "!H:!V:NTrees=50:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" ); // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output output_tmva->Close(); std::cout << "==> Wrote root file: " << output_tmva->GetName() << std::endl << "==> TMVAClassification is done!" << std::endl << std::endl << "==> To view the results, launch the GUI: \"root -l ./TMVAGui.C\"" << std::endl << std::endl; // Clean up delete factory; return 0; }//main
void classifyBDT(TString inputVariables = "trainingVars.txt", TString signalName = "/mnt/hscratch/dabercro/skims2/BDT_Signal.root", TString backName = "/mnt/hscratch/dabercro/skims2/BDT_Background.root") { TMVA::Tools::Instance(); std::cout << "==> Start TMVAClassification" << std::endl; // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA/TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationCategory", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;N" ); // A very simple MVA (feel free to uncomment and comment what you like) => as a rule of thumb 10-20 variables is where people start to get worried about total number ifstream configFile; configFile.open(inputVariables.Data()); TString tempFormula; configFile >> tempFormula; // Is the name of the BDT while(!configFile.eof()){ configFile >> tempFormula; if(tempFormula != ""){ factory->AddVariable(tempFormula,'F'); } } TString lVars; // TCut lCut = "jet1qg2<2.&&jet1pt>250.&&jet1pullAngle>-5.";// < 10 && jet1mass_m2 > 60 && jet1mass_m2 < 120"; // TCut lCut = "passZ > 3 && fjet1pt > 250 && fjet1MassPruned < 120 && fatjetid < 2"; TCut lCut = "abs(fjet1PartonId)!=24&&abs(fjet1PartonId)!=23"; // std::string lEventCut = "event % 2 == 1"; // lCut += lEventCut.c_str(); // TCut lSCut = "passT > 0 && fjet1pt > 250 && fjet1MassPruned < 120 && abs(fjet1PartonId) == 24&& fatjetid < 2"; TCut lSCut = "abs(fjet1PartonId)==24||abs(fjet1PartonId)==23"; // lSCut += lEventCut.c_str(); TCut cleanCut = "fjet1QGtagSub2 > -10 && fjet1PullAngle > -4 && abs(fjet1pt/fjet1MassTrimmed)<200 && abs(fjet1pt/fjet1MassPruned)<200"; TFile *lSAInput = TFile::Open(signalName); TTree *lSASignal = (TTree*)lSAInput ->Get("DMSTree"); TFile *lSBInput = TFile::Open(backName); TTree *lSBSignal = (TTree*)lSBInput ->Get("DMSTree"); Double_t lSWeight = 1.0; Double_t lBWeight = 1.0; gROOT->cd( outfileName+TString(":/") ); factory->AddSignalTree ( lSASignal, lSWeight ); gROOT->cd( outfileName+TString(":/") ); factory->AddBackgroundTree( lSBSignal, lBWeight ); factory->SetWeightExpression("weight"); std::stringstream pSignal,pBackground; pSignal << "nTrain_Signal="<< lSASignal->GetEntries() << ":nTrain_Background=" << lSBSignal->GetEntries(); // factory->PrepareTrainingAndTestTree( lSCut, lCut,(pSignal.str()+":SplitMode=Block:NormMode=NumEvents:!V").c_str() ); factory->PrepareTrainingAndTestTree(lSCut&&cleanCut,lCut&&cleanCut,"nTrain_Signal=0:nTrain_Background=0:SplitMode=Alternate:NormMode=NumEvents:!V"); std::string lName = "alpha_VBF"; TString lBDTDef = "!H:!V:NTrees=400:BoostType=Grad:Shrinkage=0.1:UseBaggedGrad=F:nCuts=2000:NNodesMax=10000:MaxDepth=5:UseYesNoLeaf=F:nEventsMin=200"; // TString lBDTDef = "!H:!V:NTrees=400:BoostType=Grad:Shrinkage=0.1:UseBaggedGrad=F:nCuts=2000:MaxDepth=5:UseYesNoLeaf=F:MinNodeSize=0.086:NegWeightTreatment=IgnoreNegWeightsInTraining"; factory->BookMethod(TMVA::Types::kBDT,"BDT_simple_alpha",lBDTDef); factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; //if (!gROOT->IsBatch()) TMVAGui( outfileName ); //TString lBDTDef = "!H:!V:NTrees=100:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad=F:nCuts=2000:NNodesMax=10000:MaxDepth=3:SeparationType=GiniIndex"; }
void TMVAMulticlass(){ TString outfileName = "TMVAMulticlass.root"; TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=multiclass" ); factory->AddVariable( "var0", 'F' ); factory->AddVariable( "var1", 'F' ); TFile *input(0); TString fname = "./data.root"; if (!gSystem->AccessPathName( fname )) { // first we try to find data.root in the local directory std::cout << "--- TMVAMulticlass : Accessing " << fname << std::endl; input = TFile::Open( fname ); } else { gROOT->LoadMacro( "./createData.C"); create_multiclassdata(20000); cout << " created data.root for tests of the multiclass features"<<endl; input = TFile::Open( fname ); } if (!input) { std::cout << "ERROR: could not open data file" << std::endl; exit(1); } TTree *tree = (TTree*)input->Get("TreeR"); gROOT->cd( outfileName+TString(":/") ); factory->AddTree ( tree, "Signal1", 1. , "cls==0" ); factory->AddTree ( tree, "Signal2", 1. , "cls==1" ); factory->AddTree ( tree, "Background", 1., "cls==2" ); factory->PrepareTrainingAndTestTree( "", "SplitMode=Random:NormMode=NumEvents:!V" ); factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5"); factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:VarTransform=N:NCycles=100:HiddenLayers=N+5,3:TestRate=5"); // testing vartransforms factory->BookMethod( TMVA::Types::kMLP, "MLP2", "!H:!V:NeuronType=tanh:NCycles=100:HiddenLayers=N+5,3:TestRate=5"); factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x0*x1:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void ZTMVAClassification( TString myMethodList = "" ) { //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 0; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 0; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 1; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 1; // uses Adaptive Boost Use["BDTG"] = 1; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 1; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); //factory->AddVariable( "maxpioneta", "maxpioneta", "", 'F' ); //factory->AddVariable( "minpioneta", "minpioneta", "", 'F' ); factory->AddVariable( "nTT", "nTT", "", 'F' ); // factory->AddVariable( "pidpimin", "pidpimin", "", 'F' ); // factory->AddVariable( "pidpimax", "pidpimax", "", 'F' ); factory->AddVariable( "normxpt", "normxpt", "", 'F' ); factory->AddVariable( "eta", "eta", "", 'F' ); //factory->AddVariable( "phi", "phi", "", 'F' ); // factory->AddVariable( "normptsum", "normptsum", "", 'F' ); //factory->AddVariable( "ptAsym", "ptAsym", "", 'F' ); //factory->AddVariable( "dphimax", "dphimax", "", 'F' ); //factory->AddVariable( "dphimin", "dphimin", "", 'F' ); //factory->AddVariable( "drmax", "drmax", "", 'F' ); // factory->AddVariable( "drmin", "drmin", "", 'F' ); // factory->AddVariable( "normpionp", "normpionp", "", 'F' ); factory->AddVariable( "normminpionpt", "normminpionpt", "", 'F' ); //factory->AddVariable( "normminpionp", "normminpionp", "", 'F' ); factory->AddVariable( "normmaxpionpt", "normmaxpionpt", "", 'F' ); // factory->AddVariable( "normptj", "normptj", "", 'F' ); //factory->AddVariable( "jmasspull", "jmasspull", "", 'F' ); //factory->AddVariable( "vchi2dof", "vchi2dof", "", 'F' ); // factory->AddVariable("maxchi2","maxchi2","", 'F'); // factory->AddVariable("normr","normr","", 'F'); // factory->AddVariable("normq","normq","", 'F'); //factory->AddVariable("normminm","normminm","", 'F'); factory->AddVariable("logipmax","logipmax","", 'F'); factory->AddVariable("logipmin","logipmin","", 'F'); factory->AddVariable("logfd","logfd",'F'); factory->AddVariable("logvd","logvd",'F'); //factory->AddVariable("pointAngle","pointingAngle",'F'); factory->AddVariable("logvpi","",'F'); //factory->AddVariable("logmaxprob","",'F'); //factory->AddVariable("logminprob","",'F'); factory->AddSpectator( "mReFit", "mReFit", "", 'D' ); // factory->AddSpectator( "Qdecay", "Qdecay", "",'F' ); // factory->AddSpectator( "m23", "m23", "",'F' ); // TFile * input_Background = new TFile("../back.root"); TFile * input_Signal = new TFile("../cmx12.root"); TFile * input_Background = new TFile("../background12.root"); std::cout << "--- TMVAClassification : Using input file for signal : " << input_Signal->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input file for backgound : " << input_Background->GetName() << std::endl; // --- Register the training and test trees TTree *signal = (TTree*)input_Signal->Get("psiCand"); TTree *background = (TTree*)input_Background->Get("psiCand"); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = "QDecay < 300&&fdchi2 > 300"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = "QDecay < 300&&fdchi2> 300"; // for example: TCut mycutb = "abs(var1)<0.5"; factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]){ // factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); //factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=Norm:NCycles=600:HiddenLayers=N+5:TestRate=5" ); // factory->BookMethod( TMVA::Types::kMLP, "MLPCE", "H:!V:NeuronType=sigmoid:VarTransform=Norm:NCycles=600:HiddenLayers=N+5:TestRate=5:EstimatorType=CE" ); factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=sigmoid:VarTransform=Norm:NCycles=600:HiddenLayers=9:TestRate=5:EstimatorType=CE" ); // factory->BookMethod( TMVA::Types::kMLP, "MLPCE83", "H:!V:NeuronType=tanh:VarTransform=Norm:NCycles=600:HiddenLayers=8,3:TestRate=5:EstimatorType=CE" ); } if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) { // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5" ); // factory->BookMethod( TMVA::Types::kBDT, "BDTGI", // "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5:SeparationType=GiniIndexWithLaplace" ); // factory->BookMethod( TMVA::Types::kBDT, "BDTG6", // "!H:!V:NTrees=600:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.4:nCuts=20:NNodesMax=6" ); //factory->BookMethod( TMVA::Types::kBDT, "BDTG2", // "!H:!V:NTrees=800:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.4:nCuts=20:NNodesMax=6" ); factory->BookMethod( TMVA::Types::kBDT, "BDTG3", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.4:nCuts=20:NNodesMax=6" ); // factory->BookMethod( TMVA::Types::kBDT, "BDTG4", // "!H:!V:NTrees=1200:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.4:nCuts=20:NNodesMax=6" ); // factory->BookMethod( TMVA::Types::kBDT, "BDTG5", // "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.4:nCuts=20:NNodesMax=5" ); } if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:nEventsMin=150:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","GA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros // if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVAClassification( TString myMethodList = "" ) { // TString curDynamicPath( gSystem->GetDynamicPath() ); // gSystem->SetDynamicPath( "/usr/local/bin/root/bin:" + curDynamicPath ); // TString curIncludePath(gSystem->GetIncludePath()); // gSystem->SetIncludePath( " -I /usr/local/bin/root/include " + curIncludePath ); // // load TMVA shared library created in local release: for MAC OSX // if (TString(gSystem->GetBuildArch()).Contains("macosx") ) gSystem->Load( "libTMVA.so" ); // gSystem->Load( "libTMVA" ); // TMVA::Tools::Instance(); // // welcome the user // TMVA::gTools().TMVAWelcomeMessage(); // TMVAGlob::SetTMVAStyle(); // // this loads the library // TMVA::Tools::Instance(); //--------------------------------------------------------------- // default MVA methods to be trained + tested std::map<std::string,int> Use; Use["Cuts"] = 1; // Use["Likelihood"] = 1; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // Create a new root output file. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] // factory->AddVariable( "myvar1 := var1+var2", 'F' ); // factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ); // factory->AddVariable( "var3", "Variable 3", "units", 'F' ); // factory->AddVariable( "var4", "Variable 4", "units", 'F' ); factory->AddVariable("deltaEta := deta", 'F'); factory->AddVariable("deltaPhi := dphi", 'F'); factory->AddVariable("sigmaIetaIeta := sieie", 'F'); factory->AddVariable("HoverE := hoe", 'F'); factory->AddVariable("trackIso := trackiso", 'F'); factory->AddVariable("ecalIso := ecaliso", 'F'); factory->AddVariable("hcalIso := hcaliso", 'F'); //factory->AddVariable("nMissingHits := misshits", 'I'); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables factory->AddSpectator( "et", 'F' ); factory->AddSpectator( "eta", 'F' ); factory->AddSpectator( "phi", 'F' ); // read training and test data TFile *input = TFile::Open( "SigElectrons.root" ); TFile *inputB = TFile::Open( "BkgElectrons.root" ); std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl; TTree *signal = (TTree*)input->Get("ntuple"); TTree *background = (TTree*)inputB->Get("ntuple"); factory->AddSignalTree ( signal, 1.0 ); factory->AddBackgroundTree( background, 1.0 ); // This would set individual event weights (the variables defined in the // expression need to exist in the original TTree) // for signal : factory->SetSignalWeightExpression("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); //factory->SetBackgroundWeightExpression("weight"); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; TCut mycutb = ""; // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // If no numbers of events are given, half of the events in the tree are used for training, and // the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); // ---- Book MVA methods // // please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); // Likelihood if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // gROOT->ProcessLine(".x /usr/local/bin/root/tmva/test/correlations.C"); gROOT->ProcessLine(".x /usr/local/bin/root/tmva/test/variables.C"); }
//require mumucl>0.6 //opening angle >10 //coplanarity >90 //pang<90 void TMVAClassification_cc1pcoh_bdt_ver6noveract( TString myMethodList = "" ) { //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // to get access to the GUI and all tmva macros TString thisdir = gSystem->DirName(gInterpreter->GetCurrentMacroName()); gROOT->SetMacroPath(thisdir + ":" + gROOT->GetMacroPath()); gROOT->ProcessLine(".L TMVAGui.C"); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 1; Use["CutsD"] = 1; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 1; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 1; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 1; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 1; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 1; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 1; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 1; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 1; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 1; // // --- Boosted Decision Trees Use["BDT"] = 1; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 1; // --------------------------------------------------------------- // Choose method std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // --------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA_cc1pcoh_bdt_ver6noveract.root" );//newchange TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification_ver6noveract", outputFile,//newchange "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // Add variable //sprintf(select, "Ntrack==2&&mumucl>0.6&&pmucl>0.25&&pang<90&&muang_t<15 && veract*7.66339869e-2<34"); //factory->AddVariable( "Ntrack", 'F' ); factory->AddVariable( "mumucl", 'F' ); factory->AddVariable( "pmucl", 'F' ); factory->AddVariable( "pang_t", 'F' );//use pang instead of pang_t factory->AddVariable( "muang_t", 'F' ); //factory->AddVariable( "veract", 'F' ); factory->AddVariable( "ppe", 'F'); factory->AddVariable( "mupe", 'F'); factory->AddVariable( "range", 'F'); factory->AddVariable( "coplanarity", 'F'); factory->AddVariable( "opening", 'F');//newadd // Add spectator factory->AddSpectator( "fileIndex", 'I' ); factory->AddSpectator( "nuE", 'F' ); factory->AddSpectator( "inttype", 'I' ); factory->AddSpectator( "norm", 'F' ); factory->AddSpectator( "totcrsne", 'F' ); factory->AddSpectator( "veract", 'F' ); factory->AddSpectator( "pang", 'F' ); factory->AddSpectator( "mupdg", 'I' ); factory->AddSpectator( "ppdg", 'I' ); // --------------------------------------------------------------- // --- Get weight TString fratioStr="/home/kikawa/macros/nd34_tuned_11bv3.1_250ka.root"; // --------------------------------------------------------------- // --- Add sample TString fsignalStr="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmva/pm_merged_ccqe_tot.root"; TString fbarStr="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmva/pmbar_merged_ccqe.root"; TString fbkgStr="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmva/wall_merged_ccqe_tot.root"; TString fbkg2Str="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmva/ingrid_merged_nd3_ccqe_tot.root"; /*TString fsignalStr="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmvafix/pm_merged_ccqe_tot.root"; TString fbarStr="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmvafix/pmbar_merged_ccqe.root"; TString fbkgStr="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmvafix/wall_merged_ccqe_tot.root"; TString fbkg2Str="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmvafix/ingrid_merged_nd3_ccqe_tot.root";*/ TFile *pfileSignal = new TFile(fsignalStr); TFile *pfileBar = new TFile(fbarStr); TFile *pfileBkg = new TFile(fbkgStr); TFile *pfileBkg2 = new TFile(fbkg2Str); TFile *pfileRatio = new TFile(fratioStr); TTree *ptree_sig = (TTree*)pfileSignal->Get("tree"); TTree *ptree_bar = (TTree*)pfileBar->Get("tree"); TTree *ptree_bkg = (TTree*)pfileBkg->Get("tree"); TTree *ptree_bkg2 = (TTree*)pfileBkg2->Get("tree"); // POT normalization const int nmcFile = 3950; const int nbarFile = 986; const int nbkgFile = 55546;//(31085+24461); const int nbkg2File = 7882;//(3941+3941); // global event weights per tree (see below for setting event-wise weights) // adding for signal sample // using this as standard and add other later Double_t signalWeight_sig = 1.0; Double_t backgroundWeight_sig = 1.0; factory->AddSignalTree ( ptree_sig, signalWeight_sig ); factory->AddBackgroundTree( ptree_sig, backgroundWeight_sig ); // Add Numubar sample //Double_t signalWeight_bar = nmcFile/float(nbarFile); Double_t backgroundWeight_bar = nmcFile/float(nbarFile); //factory->AddSignalTree ( ptree_bar, signalWeight_bar ); factory->AddBackgroundTree( ptree_bar, backgroundWeight_bar ); // Add wall background //Double_t signalWeight_bkg = nmcFile/float(nbkgFile); Double_t backgroundWeight_bkg = nmcFile/float(nbkgFile); //factory->AddSignalTree ( ptree_bkg, signalWeight_bkg ); factory->AddBackgroundTree( ptree_bkg, backgroundWeight_bkg ); // Add INGRID background //Double_t signalWeight_bkg2 = nmcFile/float(nbkg2File); Double_t backgroundWeight_bkg2 = nmcFile/float(nbkg2File); //factory->AddSignalTree ( ptree_bkg2, signalWeight_bkg2 ); factory->AddBackgroundTree( ptree_bkg2, backgroundWeight_bkg2 ); //factory->SetSignalWeightExpression ("norm*totcrsne*2.8647e-13"); //factory->SetBackgroundWeightExpression( "norm*totcrsne*2.8647e-13" ); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = "Ntrack==2 && abs(inttype)==16 && fileIndex==1 && pang<90 && mumucl>0.6 && opening>10 && coplanarity>90 && pmucl>0.2"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = "Ntrack==2 && (abs(inttype)!=16 || fileIndex>1) && pang<90 && mumucl>0.6 && opening>10 && coplanarity>90 && pmucl>0.2"; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" ); /*if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );*/ if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // ---- STILL EXPERIMENTAL and only implemented for BDT's ! // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","FitGA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros //if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVA_stop( TString signal_name = "T2tt", int train_region = 1, float x_parameter = 0.25) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVA_stop.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVA_stop.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //----------------------------------------------------- // define event selection (store in TCut sel) //----------------------------------------------------- TCut njets4("mini_njets>=4"); TCut met100("mini_met>=100"); TCut mt120("mini_mt>=120"); TCut nb1("mini_nb>=1"); TCut isotrk("mini_passisotrk==1"); TCut lep_pt30("mini_nlep>=1 && mini_lep1pt>30.0"); TCut sig("mini_sig==1"); TCut sel0 = njets4 + met100 + mt120 + nb1 + isotrk + lep_pt30 + sig; cout << "Using selection : " << sel0.GetTitle() << endl; cout << "Doing signal point : " << train_region << endl; //----------------------------------------------------- // choose which variables to include in MVA training //----------------------------------------------------- std::map<std::string,int> mvaVar; mvaVar[ "met" ] = 1; mvaVar[ "lep1pt" ] = 0; mvaVar[ "mt2w" ] = 1; mvaVar[ "htratiom" ] = 1; mvaVar[ "chi2" ] = 1; mvaVar[ "dphimjmin" ] = 1; mvaVar[ "pt_b" ] = 0; mvaVar[ "nb" ] = 0; mvaVar[ "pt_J1" ] = 0; mvaVar[ "pt_J2" ] = 0; mvaVar[ "rand" ] = 0; mvaVar[ "mt" ] = 0; mvaVar[ "mt2bl" ] = 0; mvaVar[ "mt2b" ] = 0; mvaVar[ "lep1eta" ] = 0; mvaVar[ "thrjetlm" ] = 0; mvaVar[ "apljetlm" ] = 0; mvaVar[ "sphjetlm" ] = 0; mvaVar[ "cirjetlm" ] = 0; mvaVar[ "chi2min" ] = 0; mvaVar[ "chi2min_mt2b" ] = 0; mvaVar[ "chi2min_mt2bl" ] = 0; mvaVar[ "chi2min_mt2w" ] = 0; mvaVar[ "mt2bmin" ] = 0; mvaVar[ "mt2blmin" ] = 0; mvaVar[ "mt2wmin_chi2" ] = 0; mvaVar[ "mt2bmin_chi2" ] = 0; mvaVar[ "mt2blmin_chi2" ] = 0; mvaVar[ "mt2wmin_chi2prob" ] = 0; mvaVar[ "mt2bmin_chi2prob" ] = 0; mvaVar[ "mt2blmin_chi2prob" ] = 0; mvaVar[ "htratiol" ] = 0; mvaVar[ "dphimj1" ] = 0; mvaVar[ "dphimj2" ] = 0; mvaVar[ "metsig" ] = 0; //--------------------------------- //choose bkg samples to include //--------------------------------- cout << "Background trees: " << endl; int n_backgrounds = 8; TString backgrounds[] = {"ttdl_powheg", "ttsl_powheg", "w1to4jets", "tW_lep", "triboson", "diboson", "ttV", "DY1to4Jtot" }; TString bkgPath = "/nfs-3/userdata/stop/Train/V00-02-18__V00-03-00_4jetsMET100_bkg/"; TChain* chBackground = new TChain("t"); for (int i = 0; i < n_backgrounds; i++) { TString backgroundChain = bkgPath + "/" + backgrounds[i] + ".root"; cout << " " << backgroundChain << endl; chBackground ->Add(backgroundChain ); } //--------------------------------- //choose signal sample to include //--------------------------------- cout << "Signal trees: " << endl; TString s_train_region = ""; s_train_region += train_region; TString s_x_parameter = ""; s_x_parameter = Form("%.2f",x_parameter); TString signalPath = "/nfs-3/userdata/stop/Train/"; TString signalVersion = "V00-02-18__V00-03-00_4jetsMET100_"; TChain *chSignal = new TChain("t"); TString base_name = signalPath + "/" + signalVersion + signal_name + "/" + signal_name + "_" + s_train_region; if (signal_name == "T2bw") base_name = base_name + "_" + s_x_parameter; TString signalChain = base_name + ".root" ; cout << " " << signalChain << endl; chSignal->Add(signalChain); //----------------------------------------------------- // choose backgrounds to include for multiple outputs //----------------------------------------------------- // bool doMultipleOutputs = false; // TChain *chww = new TChain("Events"); // chww->Add(Form("%s/WWTo2L2Nu_PU_testFinal_baby.root",babyPath)); // chww->Add(Form("%s/GluGluToWWTo4L_PU_testFinal_baby.root",babyPath)); // TChain *chwjets = new TChain("Events"); // chwjets->Add(Form("%s/WJetsToLNu_PU_testFinal_baby.root",babyPath)); // TChain *chtt = new TChain("Events"); // chtt->Add(Form("%s/TTJets_PU_testFinal_baby.root",babyPath)); // std::map<std::string,int> includeBkg; // includeBkg["ww"] = 1; // includeBkg["wjets"] = 0; // includeBkg["tt"] = 0; //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 0; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 0; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 1; // uses Adaptive Boost Use["BDT1"] = 0; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // // --- multi-output MVA's Use["multi_BDTG"] = 0; Use["multi_MLP"] = 0; Use["multi_FDA_GA"] = 0; // // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName = "TMVA_" + signal_name + "_" + s_train_region; if (signal_name == "T2bw") outfileName = outfileName +"_" + s_x_parameter; outfileName += ".root"; TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TString classification_name = "classification_" + signal_name + "_" + s_train_region; if (signal_name == "T2bw") classification_name = classification_name +"_" + s_x_parameter; /* TString multioutfileName( "TMVA_HWW_multi.root" ); TFile* multioutputFile; if( doMultipleOutputs ) multioutputFile = TFile::Open( multioutfileName, "RECREATE" ); */ // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( classification_name, outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); /* TMVA::Factory *multifactory; if( doMultipleOutputs ) multifactory= new TMVA::Factory( "TMVAMulticlass", multioutputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=multiclass" ); */ // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] //factory->AddVariable( "myvar1 := var1+var2", 'F' ); //factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ); //factory->AddVariable( "var3", "Variable 3", "units", 'F' ); //factory->AddVariable( "var4", "Variable 4", "units", 'F' ); //-------------------------------------------------------- // choose which variables to include in training //-------------------------------------------------------- if( mvaVar[ "met" ] == 1 ) factory->AddVariable( "mini_met" , "E_{T}^{miss}" , "GeV", 'F' ); if( mvaVar[ "mt" ] == 1 ) factory->AddVariable( "mini_mt" , "M_{T}" , "GeV", 'F' ); if( mvaVar[ "mt2w" ] == 1 ) factory->AddVariable( "mini_mt2w" , "MT2W" , "GeV", 'F' ); if( mvaVar[ "mt2bl" ] == 1 ) factory->AddVariable( "mini_mt2bl" , "MT2bl" , "GeV", 'F' ); if( mvaVar[ "mt2b" ] == 1 ) factory->AddVariable( "mini_mt2b" , "MT2b" , "GeV", 'F' ); if( mvaVar[ "chi2" ] == 1 ) factory->AddVariable( "mini_chi2" , "chi2" , "" , 'F' ); if( mvaVar[ "lep1pt" ] == 1 ) factory->AddVariable( "mini_lep1pt" , "lepton pt" , "" , 'F' ); if( mvaVar[ "lep1eta" ] == 1 ) factory->AddVariable( "mini_lep1eta" , "lepton eta" , "" , 'F' ); if( mvaVar[ "thrjetlm" ] == 1 ) factory->AddVariable( "mini_thrjetlm" , "thrust" , "" , 'F' ); if( mvaVar[ "apljetlm" ] == 1 ) factory->AddVariable( "mini_apljetlm" , "aplanarity" , "" , 'F' ); if( mvaVar[ "sphjetlm" ] == 1 ) factory->AddVariable( "mini_sphjetlm" , "sphericity" , "" , 'F' ); if( mvaVar[ "cirjetlm" ] == 1 ) factory->AddVariable( "mini_cirjetlm" , "circularity" , "" , 'F' ); if( mvaVar[ "chi2min" ] == 1 ) factory->AddVariable( "mini_min(chi2min,100)" , "#chi^{2}_{min}" , "" , 'F' ); if( mvaVar[ "chi2minprob" ] == 1 ) factory->AddVariable( "mini_chi2minprob" , "Prob(#chi^{2}_{min})" , "" , 'F' ); if( mvaVar[ "chi2min_mt2b" ] == 1 ) factory->AddVariable( "mini_chi2min_mt2b" , "MT2b(#chi^{2}_{min})" , "" , 'F' ); if( mvaVar[ "chi2min_mt2bl" ] == 1 ) factory->AddVariable( "mini_chi2min_mt2bl" , "MT2bl(#chi^{2}_{min})" , "" , 'F' ); if( mvaVar[ "chi2min_mt2w" ] == 1 ) factory->AddVariable( "mini_chi2min_mt2w" , "MT2W(#chi^{2}_{min})" , "" , 'F' ); if( mvaVar[ "mt2bmin" ] == 1 ) factory->AddVariable( "mini_mt2bmin" , "MT2b_{min}" , "" , 'F' ); if( mvaVar[ "mt2blmin" ] == 1 ) factory->AddVariable( "mini_mt2blmin" , "MT2bl_{min}" , "" , 'F' ); if( mvaVar[ "mt2wmin" ] == 1 ) factory->AddVariable( "mini_mt2wmin" , "MT2W_{min}" , "" , 'F' ); if( mvaVar[ "mt2bmin_chi2" ] == 1 ) factory->AddVariable( "min(mt2bmin_chi2,100)" , "#chi^{2}(MT2b_{min})" , "" , 'F' ); if( mvaVar[ "mt2blmin_chi2" ] == 1 ) factory->AddVariable( "min(mt2blmin_chi2,100)" , "#chi^{2}(MT2bl_{min})" , "" , 'F' ); if( mvaVar[ "mt2wmin_chi2" ] == 1 ) factory->AddVariable( "min(mt2wmin_chi2,100)" , "#chi^{2}(MT2W_{min})" , "" , 'F' ); if( mvaVar[ "mt2bmin_chi2prob" ] == 1 ) factory->AddVariable( "mt2bmin_chi2prob" , "Prob(#chi^{2}(MT2b_{min}))" , "" , 'F' ); if( mvaVar[ "mt2blmin_chi2prob" ] == 1 ) factory->AddVariable( "mt2blmin_chi2prob" , "Prob(#chi^{2}(MT2bl_{min}))" , "" , 'F' ); if( mvaVar[ "mt2wmin_chi2prob" ] == 1 ) factory->AddVariable( "mt2wmin_chi2prob" , "Prob(#chi^{2}(MT2W_{min}))" , "" , 'F' ); if( mvaVar[ "htratiol" ] == 1 ) factory->AddVariable( "mini_htssl/(mini_htosl+mini_htssl)" , "H_{T}^{SSL}/H_{T}" , "" , 'F' ); if( mvaVar[ "htratiom" ] == 1 ) factory->AddVariable( "mini_htssm/(mini_htosm+mini_htssm)" , "H_{T}^{SSM}/H_{T}" , "" , 'F' ); if( mvaVar[ "dphimj1" ] == 1 ) factory->AddVariable( "mini_dphimj1" , "#Delta#phi(j1,E_{T}^{miss})", "" , 'F' ); if( mvaVar[ "dphimj2" ] == 1 ) factory->AddVariable( "mini_dphimj2" , "#Delta#phi(j2,E_{T}^{miss})", "" , 'F' ); if( mvaVar[ "dphimjmin" ] == 1 ) factory->AddVariable( "mini_dphimjmin" , "min(#Delta#phi(j_{1,2},E_{T}^{miss}))", "" , 'F' ); if( mvaVar[ "rand" ] == 1 ) factory->AddVariable( "mini_rand" , "random(0,1)" , "" , 'F' ); if( mvaVar[ "metsig" ] == 1 ) factory->AddVariable( "met/sqrt(htosl+htssl)" , "E_{T}^{miss}/#sqrt{H_{T}}" , "#sqrt{GeV}" , 'F' ) ; if( mvaVar[ "pt_b" ] == 1 ) factory->AddVariable( "mini_pt_b" , "P_T(b) GeV" , 'F' ); if( mvaVar[ "nb" ] == 1 ) factory->AddVariable( "mini_nb" , "P_T(b) GeV" , 'F' ); if( mvaVar[ "pt_J1" ] == 1 ) factory->AddVariable( "pt_J1" , "P_T(J1) GeV" , 'F' ); if( mvaVar[ "pt_J2" ] == 1 ) factory->AddVariable( "pt_J2" , "P_T(J2) GeV" , 'F' ); /* if( doMultipleOutputs ){ if (mvaVar["lephard_pt"]) multifactory->AddVariable( "lephard_pt", "1st lepton pt", "GeV", 'F' ); if (mvaVar["lepsoft_pt"]) multifactory->AddVariable( "lepsoft_pt", "2nd lepton pt", "GeV", 'F' ); if (mvaVar["dil_dphi"]) multifactory->AddVariable( "dil_dphi", "dphi(ll)", "", 'F' ); if (mvaVar["dil_mass"]) multifactory->AddVariable( "dil_mass", "M(ll)", "GeV", 'F' ); if (mvaVar["event_type"]) multifactory->AddVariable( "event_type", "Dil Flavor Type", "", 'F' ); if (mvaVar["met_projpt"]) multifactory->AddVariable( "met_projpt", "Proj. MET", "GeV", 'F' ); if (mvaVar["met_pt"]) multifactory->AddVariable( "met_pt", "MET", "GeV", 'F' ); if (mvaVar["mt_lephardmet"]) multifactory->AddVariable( "mt_lephardmet", "MT(lep1,MET)", "GeV", 'F' ); if (mvaVar["mt_lepsoftmet"]) multifactory->AddVariable( "mt_lepsoftmet", "MT(lep2,MET)", "GeV", 'F' ); if (mvaVar["mthiggs"]) multifactory->AddVariable( "mthiggs", "MT(Higgs)", "GeV", 'F' ); if (mvaVar["dphi_lephardmet"]) multifactory->AddVariable( "dphi_lephardmet", "dphi(lep1,MET)", "GeV", 'F' ); if (mvaVar["dphi_lepsoftmet"]) multifactory->AddVariable( "dphi_lepsoftmet", "dphi(lep2,MET)", "GeV", 'F' ); if (mvaVar["lepsoft_fbrem"]) multifactory->AddVariable( "lepsoft_fbrem", "2nd lepton f_{brem}", "", 'F' ); if (mvaVar["lepsoft_eOverPIn"]) multifactory->AddVariable( "lepsoft_eOverPIn", "2nd lepton E/p", "", 'F' ); if (mvaVar["lepsoft_qdphi"]) multifactory->AddVariable( "lepsoft_q * lepsoft_dPhiIn", "2nd lepton q#times#Delta#phi", "", 'F' ); } */ // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables //factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); //factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // TTree* signalTrainingTree = (TTree*) chSignalTrain; // TTree* signalTestTree = (TTree*) chSignalTest; // // TTree* bkgTrainingTree = (TTree*) chBkgTrain; // TTree* bkgTestTree = (TTree*) chBkgTest; // std::cout << "--- TMVAClassification : Using bkg input files: -------------------" << std::endl; // // TObjArray *listOfBkgFiles = chbackground->GetListOfFiles(); // TIter bkgFileIter(listOfBkgFiles); // TChainElement* currentBkgFile = 0; // // while((currentBkgFile = (TChainElement*)bkgFileIter.Next())) { // std::cout << currentBkgFile->GetTitle() << std::endl; // } // // std::cout << "--- TMVAClassification : Using sig input files: -------------------" << std::endl; // // TObjArray *listOfSigFiles = chsignal->GetListOfFiles(); // TIter sigFileIter(listOfSigFiles); // TChainElement* currentSigFile = 0; // // while((currentSigFile = (TChainElement*)sigFileIter.Next())) { // std::cout << currentSigFile->GetTitle() << std::endl; // } // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // You can add an arbitrary number of signal or background trees // factory->AddSignalTree ( chSignal, signalWeight ); // factory->AddBackgroundTree( chBackground, backgroundWeight ); factory->AddTree(chSignal, "Signal", signalWeight, sel0+"mini_rand < 0.5", "train"); factory->AddTree(chSignal, "Signal", signalWeight, sel0+"mini_rand >= 0.5", "test"); factory->AddTree(chBackground, "Background", backgroundWeight, sel0+"mini_rand < 0.5", "train"); factory->AddTree(chBackground, "Background", backgroundWeight, sel0+"mini_rand >= 0.5", "test"); // To give different trees for training and testing, do as follows: //factory->AddSignalTree( signalTrainingTree, signalWeight, "Training" ); //factory->AddSignalTree( signalTestTree, signalWeight, "Test" ); //factory->AddBackgroundTree( bkgTrainingTree, backgroundWeight, "Training" ); //factory->AddBackgroundTree( bkgTestTree, backgroundWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // // // --- begin ---------------------------------------------------------- // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables // Float_t treevars[4], weight; // // // Signal // for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<signal->GetEntries(); i++) { // signal->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight ); // else factory->AddSignalTestEvent ( vars, signalWeight ); // } // // // Background (has event weights) // background->SetBranchAddress( "weight", &weight ); // for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<background->GetEntries(); i++) { // background->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight ); // else factory->AddBackgroundTestEvent ( vars, backgroundWeight*weight ); // } // // --- end ------------------------------------------------------------ // // --- end of tree registration // Set individual event weights (the variables must exist in the original TTree) factory->SetSignalWeightExpression ("mini_weight"); factory->SetBackgroundWeightExpression("mini_weight"); /* if( doMultipleOutputs ){ multifactory->AddTree(signal,"Signal"); multifactory->SetSignalWeightExpression ("event_scale1fb"); multifactory->SetBackgroundWeightExpression("event_scale1fb"); multifactory->SetWeightExpression("event_scale1fb"); if( includeBkg["ww"] ){ TTree* ww = (TTree*) chww; multifactory->AddTree(ww,"WW"); cout << "Added WW to multi-MVA" << endl; } if( includeBkg["wjets"] ){ TTree* wjets = (TTree*) chwjets; multifactory->AddTree(wjets,"WJets"); cout << "Added W+jets to multi-MVA" << endl; } if( includeBkg["tt"] ){ TTree* tt = (TTree*) chtt; multifactory->AddTree(tt,"tt"); cout << "Added ttbar multi-MVA" << endl; } } */ // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = sel0; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = sel0; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); //Use random splitting // factory->PrepareTrainingAndTestTree( mycuts, mycutb, // "nTrain_Signal=100000:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); factory->PrepareTrainingAndTestTree( "", "", "nTrain_Signal=0:nTrain_Background=0:NormMode=None:!V" ); // if( doMultipleOutputs ){ // multifactory->PrepareTrainingAndTestTree( mycuts, mycutb, // "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // } //Use alternate splitting //(this is preferable since its easier to track which events were used for training, but the job crashes! need to fix this...) //factory->PrepareTrainingAndTestTree( mycuts, mycutb, // "nTrain_Signal=0:nTrain_Background=0:SplitMode=Alternate:NormMode=NumEvents:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); // factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:VarTransform=N:NCycles=1000:HiddenLayers=N+N:TestRate=5:!UseRegulator:LearningRate=0.2:DecayRate=0.001:BPMode=batch:BatchSize=500"); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDT1"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT1", "!H:!V:NTrees=200:nEventsMin=300:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=4:PruneMethod=NoPruning" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // if( doMultipleOutputs ){ // if (Use["multi_BDTG"]) // gradient boosted decision trees // multifactory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.50:nCuts=20:NNodesMax=8"); // if (Use["multi_MLP"]) // neural network // multifactory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:NCycles=1000:HiddenLayers=N+5,5:TestRate=5:EstimatorType=MSE"); // if (Use["multi_FDA_GA"]) // functional discriminant with GA minimizer // multifactory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); // } // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","GA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // if( doMultipleOutputs ){ // // Train nulti-MVAs using the set of training events // multifactory->TrainAllMethods(); // // ---- Evaluate all multi-MVAs using the set of test events // multifactory->TestAllMethods(); // // ----- Evaluate and compare performance of all configured multi-MVAs // multifactory->EvaluateAllMethods(); // } // -------------------------------------------------------------- // Save the output outputFile->Close(); //if( doMultipleOutputs ) multioutputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void Classification() { TMVA::Tools::Instance(); TMVA::PyMethodBase::PyInitialize(); TString outfileName("TMVA.root"); TFile *outputFile = TFile::Open(outfileName, "RECREATE"); TMVA::Factory *factory = new TMVA::Factory("TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification"); factory->AddVariable("myvar1 := var1+var2", 'F'); factory->AddVariable("myvar2 := var1-var2", "Expression 2", "", 'F'); factory->AddVariable("var3", "Variable 3", "units", 'F'); factory->AddVariable("var4", "Variable 4", "units", 'F'); factory->AddSpectator("spec1 := var1*2", "Spectator 1", "units", 'F'); factory->AddSpectator("spec2 := var1*3", "Spectator 2", "units", 'F'); TString fname = "./tmva_class_example.root"; if (gSystem->AccessPathName(fname)) // file does not exist in local directory gSystem->Exec("curl -O http://root.cern.ch/files/tmva_class_example.root"); TFile *input = TFile::Open(fname); std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl; // --- Register the training and test trees TTree *tsignal = (TTree *)input->Get("TreeS"); TTree *tbackground = (TTree *)input->Get("TreeB"); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // You can add an arbitrary number of signal or background trees factory->AddSignalTree(tsignal, signalWeight); factory->AddBackgroundTree(tbackground, backgroundWeight); // Set individual event weights (the variables must exist in the original TTree) factory->SetBackgroundWeightExpression("weight"); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the factory how to use the training and testing events factory->PrepareTrainingAndTestTree(mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=0:nTest_Background=0:SplitMode=Random:NormMode=NumEvents:!V"); /////////////////// //Booking // /////////////////// // Boosted Decision Trees //PyMVA methods factory->BookMethod(TMVA::Types::kPyRandomForest, "PyRandomForest", "!V:NEstimators=150:Criterion=gini:MaxFeatures=auto:MaxDepth=3:MinSamplesLeaf=1:MinWeightFractionLeaf=0:Bootstrap=kTRUE"); factory->BookMethod(TMVA::Types::kPyAdaBoost, "PyAdaBoost", "!V:BaseEstimator=None:NEstimators=100:LearningRate=1:Algorithm=SAMME.R:RandomState=None"); factory->BookMethod(TMVA::Types::kPyGTB, "PyGTB", "!V:NEstimators=150:Loss=deviance:LearningRate=0.1:Subsample=1:MaxDepth=6:MaxFeatures='auto'"); // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; }
void TMVAClassificationCategory() { //--------------------------------------------------------------- // Example for usage of different event categories with classifiers std::cout << std::endl << "==> Start TMVAClassificationCategory" << std::endl; bool batchMode = false; // Create a new root output file. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object (see TMVAClassification.C for more information) std::string factoryOptions( "!V:!Silent:Transformations=I;D;P;G,D" ); if (batchMode) factoryOptions += ":!Color:!DrawProgressBar"; TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationCategory", outputFile, factoryOptions ); // Define the input variables used for the MVA training factory->AddVariable( "var1", 'F' ); factory->AddVariable( "var2", 'F' ); factory->AddVariable( "var3", 'F' ); factory->AddVariable( "var4", 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables factory->AddSpectator( "eta" ); // Load the signal and background event samples from ROOT trees TFile *input(0); TString fname( "" ); if (UseOffsetMethod) fname = "data/toy_sigbkg_categ_offset.root"; else fname = "data/toy_sigbkg_categ_varoff.root"; if (!gSystem->AccessPathName( fname )) { // first we try to find tmva_example.root in the local directory std::cout << "--- TMVAClassificationCategory: Accessing " << fname << std::endl; input = TFile::Open( fname ); } if (!input) { std::cout << "ERROR: could not open data file: " << fname << std::endl; exit(1); } TTree *signal = (TTree*)input->Get("TreeS"); TTree *background = (TTree*)input->Get("TreeB"); /// Global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; /// You can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the factory how to use the training and testing events factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // Fisher discriminant factory->BookMethod( TMVA::Types::kFisher, "Fisher", "!H:!V:Fisher" ); // Likelihood factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // --- Categorised classifier TMVA::MethodCategory* mcat = 0; // The variable sets TString theCat1Vars = "var1:var2:var3:var4"; TString theCat2Vars = (UseOffsetMethod ? "var1:var2:var3:var4" : "var1:var2:var3"); // Fisher with categories TMVA::MethodBase* fiCat = factory->BookMethod( TMVA::Types::kCategory, "FisherCat","" ); mcat = dynamic_cast<TMVA::MethodCategory*>(fiCat); mcat->AddMethod( "abs(eta)<=1.3", theCat1Vars, TMVA::Types::kFisher, "Category_Fisher_1","!H:!V:Fisher" ); mcat->AddMethod( "abs(eta)>1.3", theCat2Vars, TMVA::Types::kFisher, "Category_Fisher_2","!H:!V:Fisher" ); // Likelihood with categories TMVA::MethodBase* liCat = factory->BookMethod( TMVA::Types::kCategory, "LikelihoodCat","" ); mcat = dynamic_cast<TMVA::MethodCategory*>(liCat); mcat->AddMethod( "abs(eta)<=1.3",theCat1Vars, TMVA::Types::kLikelihood, "Category_Likelihood_1","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); mcat->AddMethod( "abs(eta)>1.3", theCat2Vars, TMVA::Types::kLikelihood, "Category_Likelihood_2","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassificationCategory is done!" << std::endl; // Clean up delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
//void TMVAClassification( TString myMethodList = "" ) void Example_Eric( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) // this loads the library TMVA::Tools::Instance(); //--------------------------------------------------------------- // default MVA methods to be trained + tested std::map<std::string,int> Use; Use["Cuts"] =0; Use["CutsD"] =0; Use["CutsPCA"] =0; Use["CutsGA"] =0; Use["CutsSA"] =0; // --- Use["Likelihood"] =0; Use["LikelihoodD"] =0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] =1; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] =0; Use["LikelihoodMIX"] =0; // --- Use["PDERS"] =0; Use["PDERSD"] =0; Use["PDERSPCA"] =0; Use["PDERSkNN"] =0; // depreciated until further notice Use["PDEFoam"] =0; // -- Use["KNN"] =0; // --- Use["HMatrix"] =0; Use["Fisher"] =0; Use["FisherG"] =0; Use["BoostedFisher"] =0; Use["LD"] =0; // --- Use["FDA_GA"] =0; Use["FDA_SA"] =0; Use["FDA_MC"] =0; Use["FDA_MT"] =0; Use["FDA_GAMT"] =0; Use["FDA_MCMT"] =0; // --- Use["MLP"] = 1; // this is the recommended ANN Use["MLPBFGS"] = 0; // recommended ANN with optional training method Use["CFMlpANN"] =0; // *** missing Use["TMlpANN"] =0; // --- Use["SVM"] =1; // --- Use["BDT"] =1; Use["BDTD"] =0; Use["BDTG"] =0; Use["BDTB"] =0; // --- Use["RuleFit"] =1; // --- Use["Plugin"] =0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // Create a new root output file. TString outfileName( "TMVA_Eric2.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] // factory->AddVariable( "myvar1 := var1+var2", 'F' ); // factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ); // factory->AddVariable( "var3", "Variable 3", "units", 'F' ); // factory->AddVariable( "var4", "Variable 4", "units", 'F' ); factory->AddVariable( "Mqq := Mqq", 'F' ); factory->AddVariable( "Pt_qq := Pt_qq", 'F' ); factory->AddVariable( "Eta_qq := Eta_qq", 'F' ); factory->AddVariable( "Charge_qq := Charge_qq", 'F' ); factory->AddVariable( "DPhi_ll := DPhi_ll", 'F' ); factory->AddVariable( "DPt_ll := DPt_ll", 'F' ); //factory->AddVariable( "MinDPhi_lMET := MinDPhi_lMET", 'F' ); //factory->AddVariable( "Aplanarity := aplanarity", 'F' ); //factory->AddVariable( "chargeEta := chargeEta", 'F' ); //factory->AddVariable( "MET := Met", 'F' ); //factory->AddVariable( "MtauJet := MtauJet", 'F' ); //factory->AddVariable( "HT := Ht", 'F' ); //factory->AddVariable( "Chi2 := kinFitChi2", 'F' ); //factory->AddVariable( "DeltaPhiTauMET := DeltaPhiTauMet", 'F' ); //factory->AddVariable( "Mt := Mt", 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables // factory->AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ); // factory->AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ); // read training and test data if (ReadDataFromAsciiIFormat) { // load the signal and background event samples from ascii files // format in file must be: // var1/F:var2/F:var3/F:var4/F // 0.04551 0.59923 0.32400 -0.19170 // ... TString datFileS = "tmva_example_sig.dat"; TString datFileB = "tmva_example_bkg.dat"; factory->SetInputTrees( datFileS, datFileB ); } else { //TFile* f0 = new TFile("/opt/sbg/data/data1/cms/lebihan/clean_january_2012_2/CMSSW_4_2_8_patch7/src/MiniTreeAnalysis/NTupleAnalysis/macros/TopTauJets/TMVA_sig_newLumi.root"); //TFile* f1 = new TFile("/opt/sbg/data/data1/cms/lebihan/clean_january_2012_2/CMSSW_4_2_8_patch7/src/MiniTreeAnalysis/NTupleAnalysis/macros/TopTauJets/TMVA_bkg_newLumi.root"); TFile* f0 = TFile::Open("/opt/sbg/data/data1/cms/echabert/ttbarMET/ProdAlexMars13/CMSSW_5_3_2_patch4/src/NTuple/NTupleAnalysis/macros/TTbarMET/backup_outputProof10-04-13_16-00-57/proof_ttW.root"); TFile* f1 = TFile::Open("/opt/sbg/data/data1/cms/echabert/ttbarMET/ProdAlexMars13/CMSSW_5_3_2_patch4/src/NTuple/NTupleAnalysis/macros/TTbarMET/backup_outputProof10-04-13_16-00-57/proof_tt-dilepton.root"); TTree *signal = (TTree*)f0->Get("theTree2"); TTree *background = (TTree*)f1->Get("theTree2"); cout<<"trees: "<<signal<<" "<<background<<endl; //Double_t backgroundWeight = 1.0; //Double_t signalWeight = 1.0; Double_t signalWeight = 0.30*20/185338; Double_t backgroundWeight = 222.*0.1*20/9982625; // ====== register trees ==================================================== // // the following method is the prefered one: // you can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); // factory->AddSignalTree ( signal ); //factory->AddBackgroundTree( background ); // To give different trees for training and testing, do as follows: // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // // // --- begin ---------------------------------------------------------- // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables // Float_t treevars[4]; // for (Int_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (Int_t i=0; i<signal->GetEntries(); i++) { // signal->GetEntry(i); // for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < signal->GetEntries()/2) factory->AddSignalTrainingEvent( vars, signalWeight ); // else factory->AddSignalTestEvent ( vars, signalWeight ); // } // // for (Int_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (Int_t i=0; i<background->GetEntries(); i++) { // background->GetEntry(i); // for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight ); // else factory->AddBackgroundTestEvent ( vars, backgroundWeight ); // } // // --- end ------------------------------------------------------------ // // ====== end of register trees ============================================== } // This would set individual event weights (the variables defined in the // expression need to exist in the original TTree) // for signal : factory->SetSignalWeightExpression("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); //factory->SetBackgroundWeightExpression("weight_BTAG"); //factory->SetSignalWeightExpression("weight*weight_BTAG"); // Apply additional cuts on the signal and background samples (can be different) // TCut mycuts = "MHt >=0 && MMTauJet >=0 && MM3 >= 0"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; // TCut mycutb = "MHt >=0 && MMTauJet >=0 && MM3 >= 0"; // for example: TCut mycutb = "abs(var1)<0.5"; //TCut mycuts = "Met>=20 "; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; //TCut mycutb = "Met>=20 "; // for example: TCut mycutb = "abs(var1)<0.5"; TCut mycuts; TCut mycutb; // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=3000:nTrain_Background=5000:SplitMode=Random:NormMode=NumEvents:!V" ); // If no numbers of events are given, half of the events in the tree are used for training, and // the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); // ---- Book MVA methods // // please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // test the decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // test the new kernel density estimator if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // test the mixed splines and kernel density estimator (depending on which variable) if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSkNN"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSkNN", "!H:!V:VolumeRangeMode=kNN:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:CutNmin=T:Nmin=100:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" ); // Fisher discriminant if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=60:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2"); // Linear discriminant (same as Fisher) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS" ); // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // -------------------------------------------------------------------------------------------------- // As an example how to use the ROOT plugin mechanism, book BDT via // plugin mechanism if (Use["Plugin"]) { // // first the plugin has to be defined, which can happen either through the following line in the local or global .rootrc: // // # plugin handler plugin name(regexp) class to be instanciated library constructor format // Plugin.TMVA@@MethodBase: ^BDT TMVA::MethodBDT TMVA.1 "MethodBDT(TString,TString,DataSet&,TString)" // // or by telling the global plugin manager directly gPluginMgr->AddHandler("TMVA@@MethodBase", "BDT", "TMVA::MethodBDT", "TMVA.1", "MethodBDT(TString,TString,DataSet&,TString)"); factory->BookMethod( TMVA::Types::kPlugins, "BDT", "!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=50" ); } // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethodsForClassification(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void mvaDonut(TString Type = "Dl", int iChannel = 1, TString Sample = "Sig") { TString Channels[] = {"D0","Ds0","Dp","Dsp"}; TString fname = "mva"; if(Sample=="Dss") fname += Sample; fname += Type; fname += Channels[iChannel-1]; TString outfileName = fname; outfileName += ".root"; TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); int isDss = 0; if(Sample=="Dss") isDss=1; TMVA::Factory *factory = new TMVA::Factory( fname, outputFile, Form("!V:!Silent:%sColor", gROOT->IsBatch()?"!":"") ); TChain c("ntp1"); c.Add("~/releases/ntuplePID50/workdir/AWG82/ntuples/small/Add_R24MVA_RunAll.root"); TString sigCuts[] = {"(MCType==1||MCType==3||MCType==5)", "(MCType==2||MCType==4||MCType==6)", "(MCType==7||MCType==9||MCType==11)", "(MCType==8||MCType==10||MCType==12)", "MCType>12"}; TString bkgCuts[2][2] = {{"MCType>6", "(MCType>0&&MCType<7||MCType>12)"}, {"MCType>0&&MCType<13","MCType>0&&MCType<13"}}; TString sigStr = "candLepTru==1&&"; if(isDss) sigStr += "pmisspi0"; else sigStr += "candPMiss"; sigStr += ">0.2&&candType=="; sigStr += iChannel; sigStr += "&&"; if(isDss) sigStr += sigCuts[4]; else sigStr += sigCuts[iChannel-1]; TString bkgStr = "candType=="; bkgStr += iChannel; bkgStr += "&&"; if(isDss) bkgStr += "pmisspi0"; else bkgStr += "candPMiss"; bkgStr += ">0.2&&"; if(Type=="Dl") bkgStr += bkgCuts[isDss][(iChannel-1)/2]; else bkgStr += "MCType==0"; TCut sigCut = "1", bkgCut = "1", mycuts = "", mycutb = ""; sigCut += sigStr; bkgCut += bkgStr; // --- Base --- // int nSig = 9, nDpi0 = 10; // TString sigVari[] = {"candEExtra","candMES","candDmass","candDeltam","candTagChargedMult","candBTagDeltam", // "candBTagDmass","candDeltaE","candCosT"}; // TString Dpi0Vari[] = {"mpi0","candDmass","dmpi0","eextrapi0","ppi0","e1pi0","candCosT","candDeltam", // "candMES","candDeltaE"}; // --- NoDmNoMp0 --- // int nSig = 8, nDpi0 = 9; // TString sigVari[] = {"candEExtra","candMES","candDmass","candDeltam","candTagChargedMult", // "candBTagDmass","candDeltaE","candCosT"}; // TString Dpi0Vari[] = {"candDmass","dmpi0","eextrapi0","ppi0","e1pi0","candCosT","candDeltam", // "candMES","candDeltaE"}; // sigCuts[4] = "MCType>12&&mpi0>.125&&mpi0<.145"; // --- NoMes --- // int nSig = 8, nDpi0 = 9; // TString sigVari[] = {"candEExtra","candDmass","candDeltam","candTagChargedMult","candBTagDeltam", // "candBTagDmass","candDeltaE","candCosT"}; // TString Dpi0Vari[] = {"mpi0","candDmass","dmpi0","eextrapi0","ppi0","e1pi0","candCosT","candDeltam", // "candDeltaE"}; // --- NoMulYesDm --- int nSig = 8, nDpi0 = 11; TString sigVari[] = {"candEExtra","candMES","candDmass","candDeltam","candBTagDeltam", "candBTagDmass","candDeltaE","candCosT"}; TString Dpi0Vari[] = {"mpi0","candDmass","dmpi0","eextrapi0","ppi0","e1pi0","candCosT","candDeltam", "candMES","candDeltaE","candBTagDeltam"}; factory->SetInputTrees(&c, sigCut, bkgCut); if(isDss==0){ for(int vari = 0; vari < nSig; vari++){ if(sigVari[vari]=="candDeltam" && iChannel%2==1) continue; char variChar = 'F'; if(sigVari[vari]=="candTagChargedMult") variChar = 'I'; factory->AddVariable(sigVari[vari], variChar); } } else { for(int vari = 0; vari < nDpi0; vari++){ if(Dpi0Vari[vari]=="candDeltam" && iChannel%2==1) continue; factory->AddVariable(Dpi0Vari[vari], 'F'); } } factory->PrepareTrainingAndTestTree( mycuts, mycutb, "NSigTest=100:NBkgTest=100:SplitMode=Random:NormMode=NumEvents:!V" ); factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=500:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=2.5" ); factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; delete factory; // Launch the GUI for the root macros //if (!gROOT->IsBatch()) TMVAGui( outfileName ); gROOT->ProcessLine(".q"); }
//------------------------------------------------------------------------------ // MVATrain //------------------------------------------------------------------------------ void MVATrain(TString signal) { TFile* outputfile = TFile::Open(trainingdir + signal + ".root", "recreate"); // Factory //---------------------------------------------------------------------------- TMVA::Factory* factory = new TMVA::Factory(signal, outputfile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification"); // Get the trees //---------------------------------------------------------------------------- _mctree.clear(); AddProcess("signal", signal); AddProcess("background", "HZJ_HToWW_M125"); AddProcess("background", "ggZH_HToWW_M125"); // AddProcess("background", "14_HZ"); // AddProcess("background", "10_HWW"); // AddProcess("background", "06_WW"); // AddProcess("background", "02_WZTo3LNu"); // AddProcess("background", "03_ZZ"); // AddProcess("background", "11_Wg"); // AddProcess("background", "07_ZJets"); // AddProcess("background", "09_TTV"); // AddProcess("background", "05_ST"); // AddProcess("background", "00_Fakes"); Double_t weight = 1.0; factory->AddSignalTree(_signaltree, weight); for (UInt_t i=0; i<_mctree.size(); i++) factory->AddBackgroundTree(_mctree[i], weight); factory->SetWeightExpression("eventW"); // Add variables //---------------------------------------------------------------------------- // Be careful with the order: it must be respected at the reading step // factory->AddVariable("<var1>+<var2>", "pretty title", "unit", 'F'); // factory->AddVariable("channel", "", "", 'F'); factory->AddVariable("metPfType1", "", "", 'F'); factory->AddVariable("m2l", "", "", 'F'); // factory->AddVariable("njet", "", "", 'F'); // factory->AddVariable("nbjet20cmvav2l", "", "", 'F'); factory->AddVariable("lep1pt", "", "", 'F'); factory->AddVariable("lep2pt", "", "", 'F'); // factory->AddVariable("jet1pt", "", "", 'F'); factory->AddVariable("jet2pt", "", "", 'F'); factory->AddVariable("mtw1", "", "", 'F'); factory->AddVariable("dphill", "", "", 'F'); factory->AddVariable("dphilep1jet1", "", "", 'F'); // factory->AddVariable("dphilep1jet2", "", "", 'F'); // factory->AddVariable("dphilmet1", "", "", 'F'); // factory->AddVariable("dphilep2jet1", "", "", 'F'); // factory->AddVariable("dphilep2jet2", "", "", 'F'); // factory->AddVariable("dphilmet2", "", "", 'F'); // factory->AddVariable("dphijj", "", "", 'F'); // factory->AddVariable("dphijet1met", "", "", 'F'); // factory->AddVariable("dphijet2met", "", "", 'F'); factory->AddVariable("dphillmet", "", "", 'F'); // Preselection cuts and preparation //---------------------------------------------------------------------------- factory->PrepareTrainingAndTestTree("", ":nTrain_Signal=0:nTest_Signal=0:nTrain_Background=0:nTest_Background=0:SplitMode=Alternate:MixMode=Random:!V"); // Book MVA //---------------------------------------------------------------------------- factory->BookMethod(TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=sigmoid:VarTransform=N:NCycles=600:HiddenLayers=25,10:TestRate=5:!UseRegulator"); // Train, test and evaluate MVA //---------------------------------------------------------------------------- factory->TrainAllMethods(); // Train using the set of training events factory->TestAllMethods(); // Evaluate using the set of test events factory->EvaluateAllMethods(); // Evaluate and compare performance // Save the output //---------------------------------------------------------------------------- outputfile->Close(); delete factory; }
int main(){ TMVA::Tools::Instance(); std::cout<<"Hello world"<<std::endl; TFile* OutputFile = TFile::Open("Outputfile.root","RECREATE"); TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", OutputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); std::vector<VMVariable*> Variables; MVariable* Var3= new MVariable("var3",F,none); MVariable* Var4 = new MVariable("var4",F,none); Variables.push_back(Var3); Variables.push_back(Var4); MVariable* Var1 = new MVariable("var1",F,none); MVariable* Var2 = new MVariable("var2",F,none); MultiVariable* MyVar1 = new MultiVariable("Var1+Var2",sum); MyVar1->AddVariable(Var1); MyVar1->AddVariable(Var2); Variables.push_back(MyVar1); MultiVariable* MyVar2 = new MultiVariable("Minus",subtract); MyVar2->AddVariable(Var1); MyVar2->AddVariable(Var2); Variables.push_back(MyVar2); std::string InputName= "./tmva_class_exampleD.root"; TFile *input = TFile::Open("./tmva_class_exampleD.root" ); TTree *signal = (TTree*)input->Get("TreeS"); TTree *background=(TTree*)input->Get("TreeB"); Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); for(auto v:Variables){ factory->AddVariable(v->GetFactoryName(),v->GetType()); } factory->SetBackgroundWeightExpression( "weight" ); TCut mycuts = ""; TCut mycutb = ""; factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); std::vector<MClassifier*> Classifiers; Classifiers.push_back(new MClassifier(TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20")); for(auto C:Classifiers){ if(!(C->AddMethodToFactory(factory))){ std::cout<<"Booking classifier failed"<<std::endl; return 1; } } factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); OutputFile->Close(); delete factory; TMVA::Reader *reader = new TMVA::Reader( "!Color:!Silent" ); for(auto v: Variables){ reader->AddVariable(v->GetFactoryName(),v->GetReaderAddress()); } for(auto C:Classifiers){ if(!(C->AddMethodToReader(reader,"./weights/","TMVAClassification"))){ std::cout<<"Failed adding classifer to reader"<<std::endl; return 1; } } TFile* Input = TFile::Open("./tmva_class_exampleD.root"); TTree* TreeToEvaluate= (TTree*)Input->Get("TreeS"); TFile* AppliedFile = new TFile("AppliedFile.root","RECREATE"); TTree* AppliedTree=TreeToEvaluate->CloneTree(0); for(auto C:Classifiers){ if(!(C->MakeBranch(AppliedTree)))return 1; } for(auto Var:Variables){ if(!(Var->SetBA(TreeToEvaluate))){ std::cout<<"Problem Setting Branch addresses"<<std::endl; return 1; } } Long64_t N=TreeToEvaluate->GetEntries(); LoopTimer LT(0.05); int vetoedeventcounter=0; double StartEntry=0.0; double LastEntry=0.0; Long64_t iStart=0; Long64_t iEnd=N; for(Long64_t i=iStart;i<iEnd;++i){ LT.DeclareLoopStart(iEnd-iStart); TreeToEvaluate->GetEntry(i); bool useevent=true; for(auto Var:Variables){ useevent=Var->DoOperation(); } if(!useevent){ vetoedeventcounter++; continue; } for(auto C:Classifiers){ if(!(C->Apply(reader)))return 1; } AppliedTree->Fill(); } AppliedTree->Write(); AppliedFile->Close(); std::cout<<"Got here"<<std::endl; // Compare Applied file from here with applied file from TMVA tests. TFile* ReadAppliedFile = TFile::Open("AppliedFile.root"); TTree* AppliedTreeRead=(TTree*)ReadAppliedFile->Get("TreeS"); if(!AppliedTreeRead)std::cout<<"NUll pointer to tree"<<std::endl; double BDTResponse; AppliedTreeRead->SetBranchAddress("BDT_response",&BDTResponse); TFile* ReadTMVATestFile = TFile::Open("/home/tw/root-v5-34/tmva/test/TreeFile.root"); if(!ReadTMVATestFile)std::cout<<"File open faild"<<std::endl; TTree* TMVATestTree=(TTree*)ReadTMVATestFile->Get("AppliedTree"); if(!TMVATestTree)std::cout<<"NUll pointer to tree"<<std::endl; double TestBDTResponse; TMVATestTree->SetBranchAddress("BDT_response",&TestBDTResponse); Long64_t ATRN=AppliedTreeRead->GetEntries(); Long64_t TTTN=TMVATestTree->GetEntries(); std::cout<<"Entries in my tree= "<<ATRN<<std::endl; std::cout<<"Entries in TMVA tree= "<<TTTN<<std::endl; if(ATRN!=TTTN)std::cout<<"SOMETHING WRONG EVENTS NOT EQUAL"<<std::endl; std::vector<double> ATRValues; std::vector<double> TTTValues; for(int i=0;i<ATRN;++i){ TMVATestTree->GetEntry(i); AppliedTreeRead->GetEntry(i); ATRValues.push_back(BDTResponse); TTTValues.push_back(TestBDTResponse); // std::cout<<" MYTree = "<<BDTResponse<<" TMVATREE= "<<TestBDTResponse<<std::endl; } std::sort(ATRValues.begin(),ATRValues.end()); std::sort(TTTValues.begin(),TTTValues.end()); for(int i=0;i<TTTN;++i){ std::cout<<" MY Value= "<<ATRValues.at(i)<<" TTT Value = "<<TTTValues.at(i)<<std::endl; } }