void TMVAClassificationCategory() { //--------------------------------------------------------------- std::cout << std::endl << "==> Start TMVAClassificationCategory" << std::endl; bool batchMode(false); // Create a new root output file. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string std::string factoryOptions( "!V:!Silent:Transformations=I;D;P;G,D" ); if (batchMode) factoryOptions += ":!Color:!DrawProgressBar"; TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationCategory", outputFile, factoryOptions ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory->AddVariable( "var1", 'F' ); factory->AddVariable( "var2", 'F' ); factory->AddVariable( "var3", 'F' ); factory->AddVariable( "var4", 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables factory->AddSpectator( "eta" ); // load the signal and background event samples from ROOT trees TFile *input(0); TString fname( "" ); if (UseOffsetMethod) fname = "../execs/data/toy_sigbkg_categ_offset.root"; else fname = "../execs/data/toy_sigbkg_categ_varoff.root"; if (!gSystem->AccessPathName( fname )) { // first we try to find tmva_example.root in the local directory std::cout << "--- TMVAClassificationCategory: Accessing " << fname << std::endl; input = TFile::Open( fname ); } if (!input) { std::cout << "ERROR: could not open data file: " << fname << std::endl; exit(1); } TTree *signal = (TTree*)input->Get("TreeS"); TTree *background = (TTree*)input->Get("TreeB"); /// global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; /// you can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // Fisher discriminant factory->BookMethod( TMVA::Types::kFisher, "Fisher", "!H:!V:Fisher" ); // Likelihood factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Categorised classifier TMVA::MethodCategory* mcat = 0; // the variable sets TString theCat1Vars = "var1:var2:var3:var4"; TString theCat2Vars = (UseOffsetMethod ? "var1:var2:var3:var4" : "var1:var2:var3"); // the Fisher TMVA::MethodBase* fiCat = factory->BookMethod( TMVA::Types::kCategory, "FisherCat","" ); mcat = dynamic_cast<TMVA::MethodCategory*>(fiCat); mcat->AddMethod("abs(eta)<=1.3",theCat1Vars, TMVA::Types::kFisher,"Category_Fisher_1","!H:!V:Fisher"); mcat->AddMethod("abs(eta)>1.3", theCat2Vars, TMVA::Types::kFisher,"Category_Fisher_2","!H:!V:Fisher"); // the Likelihood TMVA::MethodBase* liCat = factory->BookMethod( TMVA::Types::kCategory, "LikelihoodCat","" ); mcat = dynamic_cast<TMVA::MethodCategory*>(liCat); mcat->AddMethod("abs(eta)<=1.3",theCat1Vars, TMVA::Types::kLikelihood,"Category_Likelihood_1","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50"); mcat->AddMethod("abs(eta)>1.3", theCat2Vars, TMVA::Types::kLikelihood,"Category_Likelihood_2","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50"); // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassificationCategory is done!" << std::endl; // Clean up delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVAClassification( TString fname = "./tmva_class_example.root") { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // to get access to the GUI and all tmva macros TString tmva_dir(TString(gRootDir) + "/tmva"); if(gSystem->Getenv("TMVASYS")) tmva_dir = TString(gSystem->Getenv("TMVASYS")); gROOT->SetMacroPath(tmva_dir + "/test/:" + gROOT->GetMacroPath() ); gROOT->ProcessLine(".L TMVAGui.C"); // Default MVA methods to be trained + tested std::map<std::string,int> Use; Use["KNN"] = 1; // k-nearest neighbour method // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory->AddVariable( "pt_eH", 'D' ); factory->AddVariable( "max(pt_jet_eH,pt_eH)", 'D' ); factory->AddVariable( "njets", 'I' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables /// factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); /// factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) TFile *input(0); if (gSystem->AccessPathName( fname )){ // file does not exist in local directory gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root"); fname = "./tmva_class_example.root"; }else{ input= TFile::Open( fname ); } if (!input) { std::cout << "ERROR: could not open data file " << fname << std::endl; exit(1); } std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl; // --- Register the training and test trees TTree *inputTree = (TTree*)input->Get("FakeTreeSig"); TTree *background = (TTree*)input->Get("FakeTreeBG"); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // cuts for signal and background //~ TCut signalCut = "selected==1 && id_iso_eleH==1"; //~ TCut backgroundCut = "selected==1 && id_iso_eleH==0"; //~ //~ std::cout << " THe signal cut is " << signalCut.GetTitle() << " bg cut is " << backgroundCut.GetTitle() << std::endl; Int_t num_pass = inputTree->GetEntries(); Int_t num_fail = background->GetEntries(); std::cout << num_pass << " " << num_fail << std::endl; // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( inputTree, 1.0 ); factory->AddBackgroundTree( background, 1.0 ); factory->SetWeightExpression( "weight" ); //factory->SetInputTrees( inputTree, signalCut, backgroundCut ); // To give different trees for training and testing, do as follows: // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // // // --- begin ---------------------------------------------------------- // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables // Float_t treevars[4], weight; // // // Signal // for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<signal->GetEntries(); i++) { // signal->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight ); // else factory->AddSignalTestEvent ( vars, signalWeight ); // } // // // Background (has event weights) // background->SetBranchAddress( "weight", &weight ); // for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<background->GetEntries(); i++) { // background->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight ); // else factory->AddBackgroundTestEvent ( vars, backgroundWeight*weight ); // } // --- end ------------------------------------------------------------ // // --- end of tree registration // Set individual event weights (the variables must exist in the original TTree) // for signal : factory->SetSignalWeightExpression ("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = "selected==1"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = "selected==1"; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); std::stringstream indexes; indexes.str(""); indexes << "nTrain_Signal=" << num_pass << ":nTrain_Background=" << num_fail << ":SplitMode=Random:NormMode=None:!V"; std::string input_opt=indexes.str(); std::cout << "Options are " << input_opt << std::endl; factory->PrepareTrainingAndTestTree( mycuts, mycutb, input_opt); //"nTrain_Signal="+num_pass+":nTrain_Background="+num_fail+":SplitMode=Random:NormMode=None:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=50:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","GA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events //factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs // factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVAClassification( TString eventsToTrain = "0", const TString & region = "barrel", const TString index = "", TString myMethodList = "BDT") { std::cout << "running classification for " << region << " for " << myMethodList << std::endl; if( region != "barrel" && region != "endcaps" ) { std::cout << "Error, region can only be barrel or endcaps. Selected region was: " << region << std::endl; exit(1); } if( index != "" && index != "0" && index != "1" && index != "2" ) { std::cout << "Error, index can only be \"\", \"0\", \"1\" or \"2\". Selected index was: " << index << std::endl; exit(1); } // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 0; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 0; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 1; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; std::exit(2); } Use[regMethod] = 1; } } // Input and output file names TString fnameTrainS = "BsMC12_barrel_preselection"; TString fnameTrainB = "Barrel_preselection"; TString fnameTestS = "BsMC12_barrel_preselection"; TString fnameTestB = "Barrel_preselection"; TString outputFileName = "TMVA_barrel"; TString weightDirName = "barrel"; if( region == "endcaps" ) { fnameTrainS = "BsMC12_endcaps_preselection"; fnameTrainB = "Endcaps_preselection"; fnameTestS = "BsMC12_endcaps_preselection"; fnameTestB = "Endcaps_preselection"; outputFileName = "TMVA_endcaps"; weightDirName = "endcaps"; } if( index != "" ) { fnameTrainS += "_"+index; fnameTrainB += "_"+index; TString indexTest = ""; // The test index is the train index +1 (2+1 -> 0) if( index == "0" ) indexTest = "1"; else if( index == "1" ) indexTest = "2"; else if( index == "2" ) indexTest = "0"; fnameTestS += "_"+indexTest; fnameTestB += "_"+indexTest; outputFileName += "_"+index; weightDirName += index; } fnameTrainS = rootDir + fnameTrainS + ".root"; fnameTrainB = rootDir + fnameTrainB + ".root"; fnameTestS = rootDir + fnameTestS + ".root"; fnameTestB = rootDir + fnameTestB + ".root"; outputFileName = rootDir + outputFileName + ".root"; weightDirName = weightsDir + weightDirName + "Weights"; // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName(outputFileName); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // (TMVA::gConfig().GetIONames()).fWeightFileDir = outputFileName; (TMVA::gConfig().GetIONames()).fWeightFileDir = weightDirName; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] bool useNewMuonID = false; factory->AddVariable( "fls3d", "fls3d", "", 'F' ); factory->AddVariable( "alpha", "alpha", "", 'F' ); factory->AddVariable( "pvips", "pvips", "", 'F' ); factory->AddVariable( "iso", "iso", "", 'F' ); factory->AddVariable( "m1iso", "m1iso", "", 'F' ); factory->AddVariable( "m2iso", "m2iso", "", 'F' ); factory->AddVariable( "chi2dof", "chi2/dof", "", 'F' ); if( region == "barrel" ) { factory->AddVariable( "eta", "eta", "", 'F' ); factory->AddVariable( "maxdoca", "maxdoca", "cm", 'F' ); } else { factory->AddVariable( "pt", "pt", "GeV/c", 'F' ); factory->AddVariable( "pvip", "pvip", "cm", 'F' ); } factory->AddVariable( "docatrk", "docatrk", "cm", 'F' ); // factory->AddVariable( "pt", "pt", "GeV/c", 'F' ); // factory->AddVariable( "closetrk", "closetrk", "", 'I' ); // factory->AddVariable( "y", "y", "", 'F' ); // factory->AddVariable( "l3d", "l3d", "cm", 'F' ); // factory->AddVariable( "cosAlphaXY", "cosAlphaXY", "", 'F' ); // factory->AddVariable( "mu1_dxy", "mu1_dxy", "cm", 'F' ); // factory->AddVariable( "mu2_dxy", "mu2_dxy", "cm", 'F' ); if( useNewMuonID ) { // New Muon-id factory->AddVariable( "mu1_MVAMuonID", "mu1_MVAMuonID", "", 'F'); factory->AddVariable( "mu2_MVAMuonID", "mu2_MVAMuonID", "", 'F'); } // Extra variables // factory->AddVariable( "mu1_pt", "mu1_pt", "GeV/c", 'F' ); // factory->AddVariable( "mu2_pt", "mu2_pt", "GeV/c", 'F' ); // factory->AddVariable( "pvw8", "pvw8", "", 'F' ); // factory->AddVariable( "cosAlpha3D", "cosAlpha3D", "", 'F' ); // factory->AddVariable( "countTksOfPV", "countTksOfPV", "", 'I' ); // factory->AddVariable( "ctauErrPV", "ctauErrPV", "", 'F' ); // factory->AddVariable( "ctauPV", "ctauPV", "", 'F' ); // factory->AddVariable( "dcaxy", "dcaxy", "", 'F' ); // factory->AddVariable( "mu1_glbTrackProb", "mu1_glbTrackProb", "", 'F' ); // factory->AddVariable( "mu1_nChi2", "mu1_nChi2", "", 'F' ); // factory->AddVariable( "mu1_nMuSegs", "mu1_nMuSegs", "", 'F' ); // factory->AddVariable( "mu1_nMuSegsCln", "mu1_nMuSegsCln", "", 'F' ); // factory->AddVariable( "mu1_nPixHits", "mu1_nPixHits", "", 'F' ); // factory->AddVariable( "mu1_nTrHits", "mu1_nTrHits", "", 'F' ); // factory->AddVariable( "mu1_segComp", "mu1_segComp", "", 'F' ); // factory->AddVariable( "mu1_trkEHitsOut", "mu1_trkEHitsOut", "", 'F' ); // factory->AddVariable( "mu1_trkVHits", "mu1_trkVHits", "", 'F' ); // factory->AddVariable( "mu1_validFrac", "mu1_validFrac", "", 'F' ); // factory->AddVariable( "mu1_chi2LocMom", "mu1_chi2LocMom", "", 'F' ); // factory->AddVariable( "mu1_chi2LocPos", "mu1_chi2LocPos", "", 'F' ); // factory->AddVariable( "mu2_glbTrackProb", "mu2_glbTrackProb", "", 'F' ); // factory->AddVariable( "mu2_nChi2", "mu2_nChi2", "", 'F' ); // factory->AddVariable( "mu2_nMuSegs", "mu2_nMuSegs", "", 'F' ); // factory->AddVariable( "mu2_nMuSegsCln", "mu2_nMuSegsCln", "", 'F' ); // factory->AddVariable( "mu2_nPixHits", "mu2_nPixHits", "", 'F' ); // factory->AddVariable( "mu2_nTrHits", "mu2_nTrHits", "", 'F' ); // factory->AddVariable( "mu2_segComp", "mu2_segComp", "", 'F' ); // factory->AddVariable( "mu2_trkEHitsOut", "mu2_trkEHitsOut", "", 'F' ); // factory->AddVariable( "mu2_trkVHits", "mu2_trkVHits", "", 'F' ); // factory->AddVariable( "mu2_validFrac", "mu2_validFrac", "", 'F' ); // factory->AddVariable( "mu2_chi2LocMom", "mu2_chi2LocMom", "", 'F' ); // factory->AddVariable( "mu2_chi2LocPos", "mu2_chi2LocPos", "", 'F' ); // factory->AddVariable( "l3d := ctauPV*pt/mass", "l3d", "cm", 'F' ); // factory->AddVariable( "l3dSig := ctauPV/ctauErrPV", "l3dSig", "", 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables // factory->AddSpectator( "spec1 := mass*2", "Spectator 1", "units", 'F' ); // factory->AddSpectator( "spec2 := mass*3", "Spectator 2", "units", 'F' ); factory->AddSpectator( "mass", "mass", "GeV/c^{2}", 'F' ); // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) if (gSystem->AccessPathName( fnameTrainS )) { // file does not exist in local directory std::cout << "Did not access " << fnameTrainS << " exiting." << std::endl; std::exit(4); } //gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root"); TFile *inputTrainS = TFile::Open( fnameTrainS ); TFile *inputTrainB = TFile::Open( fnameTrainB ); TFile *inputTestS = TFile::Open( fnameTestS ); TFile *inputTestB = TFile::Open( fnameTestB ); // --- Register the training and test trees TTree *signalTrainTree = (TTree*)inputTrainS->Get("probe_tree"); TTree *backgroundTrainTree = (TTree*)inputTrainB->Get("probe_tree"); TTree *signalTestTree = (TTree*)inputTestS->Get("probe_tree"); TTree *backgroundTestTree = (TTree*)inputTestB->Get("probe_tree"); // global event weights per tree (see below for setting event-wise weights) Double_t signalTrainWeight = 1.0; Double_t backgroundTrainWeight = 1.0; Double_t signalTestWeight = 1.0; Double_t backgroundTestWeight = 1.0; // Decide if using the split and mixing or the full trees if( fnameTrainS == fnameTestS ) { if( fnameTrainB != fnameTestB ) { std::cout << "This macro cannot handle cases where the same signal sample is used for training and testing, but different background samples are used."; exit(1); } std::cout << "--- TMVAClassification : Using input file: " << inputTrainS->GetName() << std::endl; std::cout << "--- and file: " << inputTrainB->GetName() << std::endl; // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( signalTrainTree, signalTrainWeight ); factory->AddBackgroundTree( backgroundTrainTree, backgroundTrainWeight ); } else { if( fnameTrainB == fnameTestB ) { std::cout << "This macro cannot handle cases where the same background sample is used for training and testing, but different signal samples are used."; exit(1); } std::cout << "--- TMVAClassification : Using input file: " << inputTrainS->GetName() << std::endl; std::cout << "--- and file: " << inputTrainB->GetName() << " for training and" << std::endl; std::cout << "--- input file: " << inputTestS->GetName() << std::endl; std::cout << "--- and file: " << inputTestB->GetName() << " for testing." << std::endl; // To give different trees for training and testing, do as follows: factory->AddSignalTree( signalTrainTree, signalTrainWeight, "Training" ); factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); factory->AddBackgroundTree( backgroundTrainTree, backgroundTrainWeight, "Training" ); factory->AddBackgroundTree( backgroundTestTree, backgroundTestWeight, "Test" ); } // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; TCut mycutb = ""; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal="+eventsToTrain+":nTrain_Background="+eventsToTrain+":SplitMode=Random:NormMode=NumEvents:!V" ); // factory->PrepareTrainingAndTestTree( mycuts, mycutb, // "nTrain_Signal=3000:nTrain_Background=3000:nTest_Signal=3000:nTest_Background=3000:SplitMode=Random:NormMode=NumEvents:!V" ); // factory->PrepareTrainingAndTestTree( mycuts, mycutb, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) // factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+8:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=800:nEventsMin=50:MaxDepth=2:BoostType=AdaBoost:AdaBoostBeta=1:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:NNodesMax=5" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:nEventsMin=150:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","GA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events std::cout << "Training all methods" << std::endl; factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events std::cout << "Testing all methods" << std::endl; factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs std::cout << "Evaluating all methods" << std::endl; factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void CutsEmuTMVA( TString myMethodList = "" ) { double lumi = 4.928; int lnum = 125; std::cout << "Training " << lnum <<std::endl; // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l CutsEmuTMVA.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l CutsEmuTMVA.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 1; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 0; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 0; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start CutsEmuTMVA" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "CutsEmuTMVA", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory->AddVariable( "delRemu" , "delRemu" , "" , 'I' ); //0 factory->AddVariable( "Emumass" , "Emumass" ,"", 'F' ); //1 factory->AddVariable( "ZmassSVD" , "ZmassSVD" , "", 'I' ); //2 factory->AddVariable( "Naj" , "Naj" , "" , 'I' ); //3 factory->AddVariable( "DitauMass" , "DitauMass" , "" , 'F' ); //4 factory->AddVariable( "pZeta65" , "pZeta65", "" , 'F' ); //5 factory->AddVariable( "DphiEMU := abs(Dphiemu)" , "DphiEMU" , "", 'F' ); //6 factory->AddVariable( "pZeta85" , "pZeta85" , "" , 'F' ); //7 // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables //factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) // (it is also possible to use ASCII format as input -> see TMVA Users Guide) TFile *inputBtt = TFile::Open("/home/hep/wilken/CandReaderForTaus/src/UserCode/wilken/FirstPlots/TTJets.root"); TFile *inputBzz = TFile::Open("/home/hep/wilken/CandReaderForTaus/src/UserCode/wilken/FirstPlots/ZZ.root"); TFile *inputtw = TFile::Open("/home/hep/wilken/CandReaderForTaus/src/UserCode/wilken/FirstPlots/T_tW.root"); TFile *inputtbw = TFile::Open("/home/hep/wilken/CandReaderForTaus/src/UserCode/wilken/FirstPlots/Tbar_tW.root"); TFile *inputww = TFile::Open("/home/hep/wilken/CandReaderForTaus/src/UserCode/wilken/FirstPlots/WW.root"); TFile *inputwz = TFile::Open("/home/hep/wilken/CandReaderForTaus/src/UserCode/wilken/FirstPlots/WZ.root"); TFile *inputwj = TFile::Open("/home/hep/wilken/CandReaderForTaus/src/UserCode/wilken/FirstPlots/WJets.root"); TFile *inputBzj = TFile::Open("/home/hep/wilken/CandReaderForTaus/src/UserCode/wilken/FirstPlots/DY_M50.root"); TFile *inputBzj2 = TFile::Open("/home/hep/wilken/CandReaderForTaus/src/UserCode/wilken/FirstPlots/DY_PtZ.root"); TFile *inputS = TFile::Open("/home/hep/wilken/CandReaderForTaus/src/UserCode/wilken/FirstPlots/Filtered125emu.root"); std::cout << "--- CutsEmuTMVA : Using input file: " << inputS->GetName() << std::endl; // --- Register the training and test trees TTree *signal = (TTree*) inputS->Get("TMVA_tree"); TTree *backgroundtt = (TTree*) inputBtt ->Get("TMVA_tree"); TTree *backgroundzz = (TTree*) inputBzz ->Get("TMVA_tree"); TTree *backgroundzj = (TTree*) inputBzj ->Get("TMVA_tree"); TTree *backgroundzj2 = (TTree*) inputBzj2 ->Get("TMVA_tree"); TTree *backgroundtw = (TTree*) inputtw ->Get("TMVA_tree"); TTree *backgroundtbw = (TTree*) inputtbw ->Get("TMVA_tree"); TTree *backgroundww = (TTree*) inputww ->Get("TMVA_tree"); TTree *backgroundwz = (TTree*) inputwz ->Get("TMVA_tree"); // TTree *backgroundwj = (TTree*) inputwj ->Get("TMVA_tree"); factory->AddSignalTree ( signal , lumi/(lumiZH125Filtered/2.0) ); // You can add an arbitrary number of signal or background trees if(backgroundtbw->GetEntries() > 0) factory->AddBackgroundTree(backgroundtbw, lumi/(lumiTtWb/2.0)); if(backgroundtw->GetEntries() > 0) factory->AddBackgroundTree(backgroundtw,lumi/(lumiTtW/2.0)); if(backgroundww->GetEntries() > 0) factory->AddBackgroundTree(backgroundww,lumi/(lumiWW/2.0)); if(backgroundwz->GetEntries() > 0) factory->AddBackgroundTree(backgroundwz,lumi/(lumiWZ/2.0)); // if(backgroundwj->GetEntries() > 0) factory->AddBackgroundTree(backgroundwj,lumi/(lumiWJ/2.0)); if(backgroundzz->GetEntries() > 0) factory->AddBackgroundTree(backgroundzz , lumi/(lumiZZ/2.0)); if(backgroundtt->GetEntries() > 0) factory->AddBackgroundTree(backgroundtt , lumi/(lumiTT/2.0)); if(backgroundzj->GetEntries() > 0) factory->AddBackgroundTree(backgroundzj,lumi/(lumiZJL/2.0)); if(backgroundzj2->GetEntries() > 0) factory->AddBackgroundTree(backgroundzj2,lumi/(lumiZJH/2.0)); // To give different trees for training and testing, do as follows: // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // // // --- begin ---------------------------------------------------------- // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables // Float_t treevars[4], weight; // // // Signal // for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<signal->GetEntries(); i++) { // signal->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight ); // else factory->AddSignalTestEvent ( vars, signalWeight ); // } // // // Background (has event weights) // background->SetBranchAddress( "weight", &weight ); // for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<background->GetEntries(); i++) { // background->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight ); // else factory->AddBackgroundTestEvent ( vars, backgroundWeight*weight ); // } // --- end ------------------------------------------------------------ // // --- end of tree registration // Set individual event weights (the variables must exist in the original TTree) // for signal : factory->SetSignalWeightExpression ("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); //factory->SetBackgroundWeightExpression( "weight" ); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = "jetCHF0>0.12&&Hmass<170&&Hmass>80&&Mte<95&&Mtmu<135"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); TCut mycuts,mycutb; bool origCuts = false; mycuts = ""; mycutb = ""; factory->PrepareTrainingAndTestTree( mycuts, mycutb,"nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=None:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=GA:EffSel:VarProp[1]=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:nEventsMin=100:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.3:SeparationType=GiniIndex:nCuts=35:PruneMethod=NoPruning" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:nEventsMin=150:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: CutsEmuTMVACategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","GA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> CutsEmuTMVA is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVAClassificationCategory() { //--------------------------------------------------------------- // Example for usage of different event categories with classifiers std::cout << std::endl << "==> Start TMVAClassificationCategory" << std::endl; bool batchMode = false; // Create a new root output file. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object (see TMVAClassification.C for more information) std::string factoryOptions( "!V:!Silent:Transformations=I;D;P;G,D" ); if (batchMode) factoryOptions += ":!Color:!DrawProgressBar"; TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationCategory", outputFile, factoryOptions ); // Define the input variables used for the MVA training factory->AddVariable( "var1", 'F' ); factory->AddVariable( "var2", 'F' ); factory->AddVariable( "var3", 'F' ); factory->AddVariable( "var4", 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables factory->AddSpectator( "eta" ); // Load the signal and background event samples from ROOT trees TFile *input(0); TString fname( "" ); if (UseOffsetMethod) fname = "data/toy_sigbkg_categ_offset.root"; else fname = "data/toy_sigbkg_categ_varoff.root"; if (!gSystem->AccessPathName( fname )) { // first we try to find tmva_example.root in the local directory std::cout << "--- TMVAClassificationCategory: Accessing " << fname << std::endl; input = TFile::Open( fname ); } if (!input) { std::cout << "ERROR: could not open data file: " << fname << std::endl; exit(1); } TTree *signal = (TTree*)input->Get("TreeS"); TTree *background = (TTree*)input->Get("TreeB"); /// Global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; /// You can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the factory how to use the training and testing events factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // Fisher discriminant factory->BookMethod( TMVA::Types::kFisher, "Fisher", "!H:!V:Fisher" ); // Likelihood factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // --- Categorised classifier TMVA::MethodCategory* mcat = 0; // The variable sets TString theCat1Vars = "var1:var2:var3:var4"; TString theCat2Vars = (UseOffsetMethod ? "var1:var2:var3:var4" : "var1:var2:var3"); // Fisher with categories TMVA::MethodBase* fiCat = factory->BookMethod( TMVA::Types::kCategory, "FisherCat","" ); mcat = dynamic_cast<TMVA::MethodCategory*>(fiCat); mcat->AddMethod( "abs(eta)<=1.3", theCat1Vars, TMVA::Types::kFisher, "Category_Fisher_1","!H:!V:Fisher" ); mcat->AddMethod( "abs(eta)>1.3", theCat2Vars, TMVA::Types::kFisher, "Category_Fisher_2","!H:!V:Fisher" ); // Likelihood with categories TMVA::MethodBase* liCat = factory->BookMethod( TMVA::Types::kCategory, "LikelihoodCat","" ); mcat = dynamic_cast<TMVA::MethodCategory*>(liCat); mcat->AddMethod( "abs(eta)<=1.3",theCat1Vars, TMVA::Types::kLikelihood, "Category_Likelihood_1","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); mcat->AddMethod( "abs(eta)>1.3", theCat2Vars, TMVA::Types::kLikelihood, "Category_Likelihood_2","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassificationCategory is done!" << std::endl; // Clean up delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVAClassification( TString myMethodList = "" , TString myModel = "") { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // to get access to the GUI and all tmva macros TString tmva_dir(TString(gRootDir) + "/tmva"); if(gSystem->Getenv("TMVASYS")) tmva_dir = TString(gSystem->Getenv("TMVASYS")); gROOT->SetMacroPath(tmva_dir + "/test/:" + gROOT->GetMacroPath() ); gROOT->ProcessLine(".L TMVAGui.C"); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 1; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 0; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 0; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- // Default model to be trained + tested std::map<std::string,int> Model; // --- Cut optimisation Model[ "MM" ] = 0; // Mass mechanism Model[ "RHC_L" ] = 0; // Right Handed Current Model[ "RHC_E" ] = 0; // Right Handed Current Model[ "M1" ] = 0; // Majoron Model[ "M2" ] = 0; // Majoron Model[ "M3" ] = 0; // Majoron Model[ "M7" ] = 0; // Majoron std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } if(myModel != "") { std::string regModel(myModel); if( Model.find(regModel) == Model.end() ){ std::cout << "Model \"" << myModel << "\" not known in under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Model.begin(); it != Model.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Model[regModel] = 1; } else { std::cout << "No signal model as been specified. You must choose one among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Model.begin(); it != Model.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName; outfileName.Form( "TMVA_%s.root", myModel.Data() ); //TString outfileDir( "/Users/alberto/Software/SuperNEMO/work/nemo3/plot/plot_FINAL_TECHNOTE_20150921/TMVA/" ); TString outfileDir( "/Users/alberto/Software/SuperNEMO/work/nemo3/plot/plot_UPDATE_TECHNOTE_20160429/TMVA/" ); TFile* outputFile = TFile::Open( outfileDir + outfileName , "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TString weightBaseName; weightBaseName.Form("TMVAClassification_%s", myModel.Data()); TMVA::Factory *factory = new TMVA::Factory( weightBaseName , outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] //factory->AddVariable( "myvar1 := var1+var2", 'F' ); //factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ); //factory->AddVariable( "var3", "Variable 3", "units", 'F' ); //factory->AddVariable( "var4", "Variable 4", "units", 'F' ); factory->AddVariable( "min_el_en" , 'F' ); factory->AddVariable( "max_el_en" , 'F' ); factory->AddVariable( "el_en_asym := (max_el_en-min_el_en)/(min_el_en+max_el_en)" , 'F' ); factory->AddVariable( "el_en_sum := min_el_en+max_el_en" , 'F' ); factory->AddVariable( "cos_theta" , 'F' ); factory->AddVariable( "prob_int" , 'F' ); factory->AddVariable( "min_el_track_len" , 'F' ); factory->AddVariable( "max_el_track_len" , 'F' ); //factory->AddVariable( "min_el_curv := min_el_track_r*min_el_sign" , 'F' ); //factory->AddVariable( "max_el_curv := max_el_track_r*max_el_sign" , 'F' ); //factory->AddVariable( "max_vertex_s" , 'F' ); //factory->AddVariable( "max_vertex_z" , 'F' ); //factory->AddVariable( "min_vertex_s" , 'F' ); //factory->AddVariable( "min_vertex_z" , 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables //factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); //factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) //TString fdir = "/sps/nemo/scratch/remoto/nemo3/plot/plot_FINAL_TECHNOTE_20150921/"; TString fdir = "/Users/alberto/Software/SuperNEMO/work/nemo3/plot/plot_UPDATE_TECHNOTE_20160429/"; TString fname = "TwoElectronIntTree.root"; TFile *input = TFile::Open( fdir + fname , "READ"); std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl; TTree * sig_tree = 0; Double_t sig_weight = 1.; if ( Model[ "MM" ] ) sig_tree = (TTree*) input->Get( "Cd116_2b0n_m1_tree" ) ; if ( Model[ "RHC_L" ] ) sig_tree = (TTree*) input->Get( "Cd116_2b0n_m2_tree" ) ; if ( Model[ "RHC_E" ] ) sig_tree = (TTree*) input->Get( "Cd116_2b0n_m18_tree" ) ; if ( Model[ "M1" ] ) sig_tree = (TTree*) input->Get( "Cd116_2b0n_m5_tree" ) ; if ( Model[ "M2" ] ) sig_tree = (TTree*) input->Get( "Cd116_2b0n_m15_tree" ) ; if ( Model[ "M3" ] ) sig_tree = (TTree*) input->Get( "Cd116_2b0n_m6_tree" ) ; if ( Model[ "M7" ] ) sig_tree = (TTree*) input->Get( "Cd116_2b0n_m7_tree" ) ; factory->AddSignalTree( sig_tree , sig_weight ); //Double_t Cd116_2b0n_m1_weight = 1.; //TTree * Cd116_2b0n_m1_tree = (TTree*) input->Get("Cd116_2b0n_m1_tree" ) ; //factory->AddSignalTree( Cd116_2b0n_m1_tree , Cd116_2b0n_m1_weight ); TTree * Cd116_Tl208_tree = (TTree*) input->Get("Cd116_Tl208_tree" ) ; Double_t Cd116_Tl208_weight = 6.52838 ; if( Cd116_Tl208_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Cd116_Tl208_tree , Cd116_Tl208_weight ); }; TTree * Cd116_Ac228_tree = (TTree*) input->Get("Cd116_Ac228_tree" ) ; Double_t Cd116_Ac228_weight = 7.62351 ; if( Cd116_Ac228_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Cd116_Ac228_tree , Cd116_Ac228_weight ); }; TTree * Cd116_Bi212_tree = (TTree*) input->Get("Cd116_Bi212_tree" ) ; Double_t Cd116_Bi212_weight = 3.00708 ; if( Cd116_Bi212_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Cd116_Bi212_tree , Cd116_Bi212_weight ); }; TTree * Cd116_Bi214_tree = (TTree*) input->Get("Cd116_Bi214_tree" ) ; Double_t Cd116_Bi214_weight = 18.1504 ; if( Cd116_Bi214_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Cd116_Bi214_tree , Cd116_Bi214_weight ); }; TTree * Cd116_Pb214_tree = (TTree*) input->Get("Cd116_Pb214_VT_tree" ) ; Double_t Cd116_Pb214_weight = 0.186417 ; if( Cd116_Pb214_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Cd116_Pb214_tree , Cd116_Pb214_weight ); }; TTree * Mylar_Bi214_tree = (TTree*) input->Get("Mylar_Bi214_tree" ) ; Double_t Mylar_Bi214_weight = 11.1346 ; if( Mylar_Bi214_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Mylar_Bi214_tree , Mylar_Bi214_weight ); }; TTree * Mylar_Pb214_tree = (TTree*) input->Get("Mylar_Pb214_tree" ) ; Double_t Mylar_Pb214_weight = 0.496238 ; if( Mylar_Pb214_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Mylar_Pb214_tree , Mylar_Pb214_weight ); }; TTree * Cd116_K40_tree = (TTree*) input->Get("Cd116_K40_tree" ) ; Double_t Cd116_K40_weight = 8.9841+25.8272 ; if( Cd116_K40_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Cd116_K40_tree , Cd116_K40_weight ); }; TTree * Cd116_Pa234m_tree = (TTree*) input->Get("Cd116_Pa234m_tree" ) ; Double_t Cd116_Pa234m_weight = 27.9307+72.4667 ; if( Cd116_Pa234m_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Cd116_Pa234m_tree , Cd116_Pa234m_weight ); }; TTree * SFoil_Bi210_tree = (TTree*) input->Get("SFoil_Bi210_tree" ) ; Double_t SFoil_Bi210_weight = 0+23.2438 ; if( SFoil_Bi210_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SFoil_Bi210_tree , SFoil_Bi210_weight ); }; TTree * SWire_Bi210_tree = (TTree*) input->Get("SWire_Bi210_tree" ) ; Double_t SWire_Bi210_weight = 0.136147+0.624187 ; if( SWire_Bi210_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SWire_Bi210_tree , SWire_Bi210_weight ); }; TTree * SScin_Bi210_tree = (TTree*) input->Get("SScin_Bi210_tree" ) ; Double_t SScin_Bi210_weight = 1.75641 ; if( SScin_Bi210_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SScin_Bi210_tree , SScin_Bi210_weight ); }; TTree * SScin_Bi214_tree = (TTree*) input->Get("SScin_Bi214_tree" ) ; Double_t SScin_Bi214_weight = 0.0510754 ; if( SScin_Bi214_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SScin_Bi214_tree , SScin_Bi214_weight ); }; TTree * SScin_Pb214_tree = (TTree*) input->Get("SScin_Pb214_tree" ) ; Double_t SScin_Pb214_weight = 0 ; if( SScin_Pb214_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SScin_Pb214_tree , SScin_Pb214_weight ); }; TTree * SWire_Tl208_tree = (TTree*) input->Get("SWire_Tl208_tree" ) ; Double_t SWire_Tl208_weight = 0.217623+1.07641 ; if( SWire_Tl208_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SWire_Tl208_tree , SWire_Tl208_weight ); }; TTree * SWire_Bi214_P1_tree = (TTree*) input->Get("SWire_Bi214_tree" ) ; Double_t SWire_Bi214_weight = 21.4188+17.8236 ; if( SWire_Bi214_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SWire_Bi214_tree , SWire_Bi214_weight ); }; TTree * SFoil_Bi214_tree = (TTree*) input->Get("SFoil_Bi214_tree" ) ; Double_t SFoil_Bi214_weight = 5.83533+2.80427 ; if( SFoil_Bi214_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SFoil_Bi214_tree , SFoil_Bi214_weight ); }; TTree * SWire_Pb214_tree = (TTree*) input->Get("SWire_Pb214_tree" ) ; Double_t SWire_Pb214_weight = 0.458486+0.649167 ; if( SWire_Pb214_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SWire_Pb214_tree , SWire_Pb214_weight ); }; TTree * SFoil_Pb214_tree = (TTree*) input->Get("SFoil_Pb214_tree" ) ; Double_t SFoil_Pb214_weight = 0.218761+0.195287 ; if( SFoil_Pb214_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SFoil_Pb214_tree , SFoil_Pb214_weight ); }; TTree * FeShield_Bi214_tree = (TTree*) input->Get("FeShield_Bi214_tree" ) ; Double_t FeShield_Bi214_weight = 50.7021 ; if( FeShield_Bi214_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( FeShield_Bi214_tree , FeShield_Bi214_weight ); }; TTree * FeShield_Tl208_tree = (TTree*) input->Get("FeShield_Tl208_tree" ) ; Double_t FeShield_Tl208_weight = 0.859465 ; if( FeShield_Tl208_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( FeShield_Tl208_tree , FeShield_Tl208_weight ); }; TTree * FeShield_Ac228_tree = (TTree*) input->Get("FeShield_Ac228_tree" ) ; Double_t FeShield_Ac228_weight = 0.126868 ; if( FeShield_Ac228_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( FeShield_Ac228_tree , FeShield_Ac228_weight ); }; TTree * CuTower_Co60_tree = (TTree*) input->Get("CuTower_Co60_tree" ) ; Double_t CuTower_Co60_weight = 3.9407 ; if( CuTower_Co60_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( CuTower_Co60_tree , CuTower_Co60_weight ); }; TTree * Air_Bi214_P1_tree = (TTree*) input->Get("Air_Bi214_tree" ) ; Double_t Air_Bi214_P1_weight = 4.19744 ; if( Air_Bi214_P1_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Air_Bi214_P1_tree , Air_Bi214_P1_weight ); }; TTree * Air_Tl208_P1_tree = (TTree*) input->Get("Air_Tl208_tree" ) ; Double_t Air_Tl208_P1_weight = 0 ; if( Air_Tl208_P1_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Air_Tl208_P1_tree , Air_Tl208_P1_weight ); }; TTree * PMT_Bi214_tree = (TTree*) input->Get("PMT_Bi214_tree" ) ; Double_t PMT_Bi214_weight = 27.9661 ; if( PMT_Bi214_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( PMT_Bi214_tree , PMT_Bi214_weight ); }; TTree * PMT_Tl208_tree = (TTree*) input->Get("PMT_Tl208_tree" ) ; Double_t PMT_Tl208_weight = 22.923 ; if( PMT_Tl208_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( PMT_Tl208_tree , PMT_Tl208_weight ); }; TTree * PMT_Ac228_tree = (TTree*) input->Get("PMT_Ac228_tree" ) ; Double_t PMT_Ac228_weight = 3.60712 ; if( PMT_Ac228_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( PMT_Ac228_tree , PMT_Ac228_weight ); }; TTree * PMT_K40_tree = (TTree*) input->Get("PMT_K40_tree" ) ; Double_t PMT_K40_weight = 16.813 ; if( PMT_K40_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( PMT_K40_tree , PMT_K40_weight ); }; TTree * ScintInn_K40_tree = (TTree*) input->Get("ScintInn_K40_tree" ) ; Double_t ScintInn_K40_weight = 0.333988 ; if( ScintInn_K40_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( ScintInn_K40_tree , ScintInn_K40_weight ); }; TTree * ScintOut_K40_tree = (TTree*) input->Get("ScintOut_K40_tree" ) ; Double_t ScintOut_K40_weight = 0.601178 ; if( ScintOut_K40_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( ScintOut_K40_tree , ScintOut_K40_weight ); }; TTree * ScintPet_K40_tree = (TTree*) input->Get("ScintPet_K40_tree" ) ; Double_t ScintPet_K40_weight = 1.00195 ; if( ScintPet_K40_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( ScintPet_K40_tree , ScintPet_K40_weight ); }; TTree * MuMetal_Pa234m_tree = (TTree*) input->Get("MuMetal_Pa234m_tree" ) ; Double_t MuMetal_Pa234m_weight = 0.739038 ; if( MuMetal_Pa234m_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( MuMetal_Pa234m_tree , MuMetal_Pa234m_weight ); }; TTree * Cd116_2b2n_m14_tree = (TTree*) input->Get("Cd116_2b2n_m14_tree" ) ; Double_t Cd116_2b2n_m14_weight = 4977.55 ; if( Cd116_2b2n_m14_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Cd116_2b2n_m14_tree , Cd116_2b2n_m14_weight ); }; // --- end of tree registration // Set individual event weights (the variables must exist in the original TTree) // for signal : factory->SetSignalWeightExpression ("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); factory->SetBackgroundWeightExpression( "weight" ); // Apply additional cuts on the signal and background samples (can be different) // Apply cut on charge //TCut mycuts = "min_el_sign < 0 && max_el_sign < 0."; //TCut mycutb = "min_el_sign < 0 && max_el_sign < 0."; // Apply cut on vertex //TCut mycuts = "((max_vertex_x - min_vertex_x)**2 + (max_vertex_y - min_vertex_y)**2 <= 4**2)&&((max_vertex_z-min_vertex_z)**2<8**2)"; //TCut mycutb = "((max_vertex_x - min_vertex_x)**2 + (max_vertex_y - min_vertex_y)**2 <= 4**2)&&((max_vertex_z-min_vertex_z)**2<8**2)"; TCut mycuts = ""; TCut mycutb = ""; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=-1" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // ---- STILL EXPERIMENTAL and only implemented for BDT's ! // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","FitGA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileDir + outfileName ); }
void TMVAClassification( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) // this loads the library TMVA::Tools::Instance(); //--------------------------------------------------------------- // default MVA methods to be trained + tested std::map<std::string,int> Use; Use["Cuts"] = 1; Use["CutsD"] = 1; Use["CutsPCA"] = 1; Use["CutsGA"] = 1; Use["CutsSA"] = 1; // --- Use["Likelihood"] = 1; Use["LikelihoodD"] = 1; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 1; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 1; Use["LikelihoodMIX"] = 1; // --- Use["PDERS"] = 1; Use["PDERSD"] = 1; Use["PDERSPCA"] = 1; Use["PDERSkNN"] = 1; // depreciated until further notice Use["PDEFoam"] = 1; // -- Use["KNN"] = 1; // --- Use["HMatrix"] = 1; Use["Fisher"] = 1; Use["FisherG"] = 1; Use["BoostedFisher"] = 1; Use["LD"] = 1; // --- Use["FDA_GA"] = 1; Use["FDA_SA"] = 1; Use["FDA_MC"] = 1; Use["FDA_MT"] = 1; Use["FDA_GAMT"] = 1; Use["FDA_MCMT"] = 1; // --- Use["MLP"] = 1; // this is the recommended ANN Use["MLPBFGS"] = 1; // recommended ANN with optional training method Use["CFMlpANN"] = 1; // *** missing Use["TMlpANN"] = 1; // --- Use["SVM"] = 1; // --- Use["BDT"] = 1; Use["BDTD"] = 0; Use["BDTG"] = 1; Use["BDTB"] = 0; // --- Use["RuleFit"] = 1; // --- Use["Plugin"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // Create a new root output file. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory->AddVariable( "myvar1 := var1+var2", 'F' ); factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ); factory->AddVariable( "var3", "Variable 3", "units", 'F' ); factory->AddVariable( "var4", "Variable 4", "units", 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables factory->AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ); factory->AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ); // read training and test data if (ReadDataFromAsciiIFormat) { // load the signal and background event samples from ascii files // format in file must be: // var1/F:var2/F:var3/F:var4/F // 0.04551 0.59923 0.32400 -0.19170 // ... TString datFileS = "tmva_example_sig.dat"; TString datFileB = "tmva_example_bkg.dat"; factory->SetInputTrees( datFileS, datFileB ); } else { // load the signal and background event samples from ROOT trees TFile *input(0); TString fname = "../macros/tmva_example.root"; if (!gSystem->AccessPathName( fname )) { input = TFile::Open( fname ); // check if file in local directory exists } else { input = TFile::Open( "http://root.cern.ch/files/tmva_class_example.root" ); // if not: download from ROOT server } if (!input) { std::cout << "ERROR: could not open data file" << std::endl; exit(1); } std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl; TTree *signal = (TTree*)input->Get("TreeS"); TTree *background = (TTree*)input->Get("TreeB"); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // ====== register trees ==================================================== // // the following method is the prefered one: // you can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); // To give different trees for training and testing, do as follows: // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // // // --- begin ---------------------------------------------------------- // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables // Float_t treevars[4]; // for (Int_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (Int_t i=0; i<signal->GetEntries(); i++) { // signal->GetEntry(i); // for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < signal->GetEntries()/2) factory->AddSignalTrainingEvent( vars, signalWeight ); // else factory->AddSignalTestEvent ( vars, signalWeight ); // } // // for (Int_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (Int_t i=0; i<background->GetEntries(); i++) { // background->GetEntry(i); // for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight ); // else factory->AddBackgroundTestEvent ( vars, backgroundWeight ); // } // // --- end ------------------------------------------------------------ // // ====== end of register trees ============================================== } // This would set individual event weights (the variables defined in the // expression need to exist in the original TTree) // for signal : factory->SetSignalWeightExpression("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); factory->SetBackgroundWeightExpression("weight"); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // If no numbers of events are given, half of the events in the tree are used for training, and // the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); // ---- Book MVA methods // // please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // test the decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // test the new kernel density estimator if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // test the mixed splines and kernel density estimator (depending on which variable) if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSkNN"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSkNN", "!H:!V:VolumeRangeMode=kNN:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:CutNmin=T:Nmin=100:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" ); // Fisher discriminant if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=60:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2"); // Linear discriminant (same as Fisher) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS" ); // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // -------------------------------------------------------------------------------------------------- // As an example how to use the ROOT plugin mechanism, book BDT via // plugin mechanism if (Use["Plugin"]) { // // first the plugin has to be defined, which can happen either through the following line in the local or global .rootrc: // // # plugin handler plugin name(regexp) class to be instanciated library constructor format // Plugin.TMVA@@MethodBase: ^BDT TMVA::MethodBDT TMVA.1 "MethodBDT(TString,TString,DataSet&,TString)" // // or by telling the global plugin manager directly gPluginMgr->AddHandler("TMVA@@MethodBase", "BDT", "TMVA::MethodBDT", "TMVA.1", "MethodBDT(TString,TString,DataSet&,TString)"); factory->BookMethod( TMVA::Types::kPlugins, "BDT", "!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=50" ); } // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethodsForClassification(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void testBDT(){ //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); /* TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "testBDT", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // global event weights per tree (see below for setting event-wise weights) //Double_t signalWeight = 0.003582; //Double_t backgroundWeight = 0.0269; Double_t signalWeight = 1; Double_t backgroundWeight = 1; TFile *input_sig = TFile::Open( "signal_exclusif.root" ); TFile *input_wz = TFile::Open( "bruit_w_z.root" ); TTree *signal = (TTree*)input_sig->Get("tree"); TTree *background = (TTree*)input_wz->Get("tree"); // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); factory->AddVariable("PT_z" , 'F'); factory->AddVariable("ASYM" , 'F'); factory->AddVariable("PHI_lw_b", 'F'); factory->AddVariable("M_top", 'F'); */ TString outfileName( "bdtTMVA_FCNC_tZ.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "doBDT_FCNC_tZ", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // global event weights per tree (see below for setting event-wise weights) //Double_t signalWeight = 0.003582; //Double_t backgroundWeight = 0.0269; Double_t signalWeight = 1; Double_t backgroundWeight = 1; TFile *input_sig = TFile::Open( "proof.root" ); TFile *input_wz = TFile::Open( "proof.root" ); TTree *signal = (TTree*)input_sig->Get("Ttree_FCNCkut"); TTree *background_WZ = (TTree*)input_wz->Get("Ttree_WZ"); /*TTree *background_ZZ = (TTree*)input_wz->Get("Ttree_ZZ"); TTree *background_WW = (TTree*)input_wz->Get("Ttree_WW"); TTree *background_TTbar = (TTree*)input_wz->Get("Ttree_TTbar"); TTree *background_Zjets = (TTree*)input_wz->Get("Ttree_Zjets"); TTree *background_Wjets = (TTree*)input_wz->Get("Ttree_Wjets"); TTree *background_TtW = (TTree*)input_wz->Get("Ttree_TtW"); TTree *background_TbartW = (TTree*)input_wz->Get("Ttree_TbartW");*/ // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background_WZ, backgroundWeight ); /*factory->AddBackgroundTree( background_ZZ, backgroundWeight ); factory->AddBackgroundTree( background_WW, backgroundWeight ); factory->AddBackgroundTree( background_TTbar, backgroundWeight ); factory->AddBackgroundTree( background_Zjets, backgroundWeight ); factory->AddBackgroundTree( background_Wjets, backgroundWeight ); factory->AddBackgroundTree( background_TtW, backgroundWeight ); factory->AddBackgroundTree( background_TbartW, backgroundWeight );*/ factory->AddVariable("tree_topMass", 'F'); factory->AddVariable("tree_deltaPhilb", 'F'); factory->AddVariable("tree_asym", 'F'); factory->AddVariable("tree_Zpt", 'F'); // to set weights. The variable must exist in the tree // for signal : factory->SetSignalWeightExpression ("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:nEventsMin=100:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void Training_BDT(TString channel, bool isZHvsOther, TString cat){ TString name; if (isZHvsOther) name="ZHvsOther"; else name="TTvsDY"; if (channel=="Mu") name=name+"_Mu"; else name=name+"_El"; if (cat=="2j")name=name+"_2j"; else if(cat=="2j") name=name+"_3j"; else name=name+"_noCat"; TMVA::Tools::Instance(); TFile* outputFile = TFile::Open( "outputtrainning"+name+".root", "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "BDT_NN_trainning"+name, outputFile,"!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );; //TFile *input = TFile::Open( "study_histo.root" ); TFile *inputTT = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/TTFullLept_Summer12_final_skimed_llbbX_withWeights_V3.root" ); TFile *inputDYM10 = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/DYjets_M10to50_Summer12_final_skimed_llbbX_withWeights_V3.root" ); TFile *inputDY = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/DYjets_Summer12_final_skimed_llbbX_withWeights_V3.root" ); TFile *inputZZ = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/ZZ_Summer12_final_skimed_llbbX_withWeights_V3.root" ); TFile *inputZH = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/ZH125_Summer12_final_skimed_llbbX_withWeights_V3.root" ); TFile *inputWW = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/WW_Summer12_final_skimed_llbbX_withWeights_V3.root" ); TFile *inputWZ = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/WZ_Summer12_final_skimed_llbbX_withWeights_V3.root" ); TFile *inputWt = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/Wt_Summer12_final_skimed_llbbX_withWeights_V3.root" ); TFile *inputWtbar = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/Wtbar_Summer12_final_skimed_llbbX_withWeights_V3.root" ); TFile *inputTTSemi = TFile::Open( "/home/fynu/cbeluffi/storage/RDS/5320_JP_Skimmed_V5/llbbX/TTSemiLept_Summer12_final_skimed_llbbX_withWeights_V3.root" ); TTree *signal = (TTree*)inputZH->Get("rds_zbb"); TTree *backgroundDY = (TTree*)inputDY->Get("rds_zbb"); TTree *backgroundZZ = (TTree*)inputZZ->Get("rds_zbb"); TTree *backgroundTT = (TTree*)inputTT->Get("rds_zbb"); TTree *backgroundDYM10 = (TTree*)inputDYM10->Get("rds_zbb"); TTree *backgroundWW = (TTree*)inputWW->Get("rds_zbb"); TTree *backgroundWZ = (TTree*)inputWZ->Get("rds_zbb"); TTree *backgroundWt = (TTree*)inputWt->Get("rds_zbb"); TTree *backgroundWtbar = (TTree*)inputWtbar->Get("rds_zbb"); TTree *backgroundTTSemi = (TTree*)inputTTSemi->Get("rds_zbb"); if (isZHvsOther ){ factory->AddSignalTree( signal , 1.2); factory->AddBackgroundTree( backgroundDY, 0.54); factory->AddBackgroundTree( backgroundZZ, 0.1); factory->AddBackgroundTree( backgroundDYM10, 0.02); factory->AddBackgroundTree( backgroundTT, 0.3); factory->AddBackgroundTree(backgroundWW , 0.005); factory->AddBackgroundTree(backgroundWZ , 0.005); factory->AddBackgroundTree(backgroundWt , 0.01); factory->AddBackgroundTree(backgroundWtbar ,0.01 ); factory->AddBackgroundTree(backgroundTTSemi ,0.01 ); } else{ factory->AddSignalTree( backgroundDY , 1.0); factory->AddBackgroundTree( backgroundTT, 1.0); } if (isZHvsOther ){ factory->AddVariable("MinusLogW_ZH_cor3", 'F'); factory->AddVariable("MinusLogW_ZH_cor0", 'F'); factory->AddVariable("MinusLogW_ZZ_cor3", 'F'); factory->AddVariable("MinusLogW_ZZ_cor0", 'F'); factory->AddVariable("MinusLogW_TT", 'F'); factory->AddVariable("MinusLogW_gg_Zbb", 'F'); factory->AddVariable("MinusLogW_qq_Zbb", 'F'); } else{ factory->AddVariable("MinusLogW_TT", 'F'); factory->AddVariable("MinusLogW_gg_Zbb", 'F'); factory->AddVariable("MinusLogW_qq_Zbb", 'F'); } TCut mycuts, mycutb; if(isZHvsOther){ if(channel=="Mu"){ if(cat=="2j"){ mycuts = "(MinusLogW_ZH_cor3 > 2 && MinusLogW_ZH_cor0 > 2 &&MinusLogW_ZZ_cor0> 2 &&MinusLogW_ZZ_cor3> 2 &&MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_ZH_cor3<50 && MinusLogW_ZH_cor0<50 &&MinusLogW_ZZ_cor0<50 &&MinusLogW_ZZ_cor3<50 &&MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_6_idx && eventSelectionmu1pt_inc>20 && eventSelectionmu2pt_inc>20 && eventSelectiondilepM_inc > 76 && eventSelectiondilepM_inc < 106 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30&& jetmetnj==2 && (eventSelectiondijetM_inc > 80 && eventSelectiondijetM_inc < 150))"; mycutb = "(MinusLogW_ZH_cor3 > 2 && MinusLogW_ZH_cor0 > 2 &&MinusLogW_ZZ_cor0> 2 &&MinusLogW_ZZ_cor3> 2 &&MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_ZH_cor3<50 && MinusLogW_ZH_cor0<50 &&MinusLogW_ZZ_cor0<50 &&MinusLogW_ZZ_cor3<50 &&MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_6_idx && eventSelectionmu1pt_inc>20 && eventSelectionmu2pt_inc>20 && eventSelectiondilepM_inc > 76 && eventSelectiondilepM_inc < 106 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30&& jetmetnj==2 && (eventSelectiondijetM_inc > 80 && eventSelectiondijetM_inc < 150))"; } else{ mycuts = "(MinusLogW_ZH_cor3 > 2 && MinusLogW_ZH_cor0 > 2 &&MinusLogW_ZZ_cor0> 2 &&MinusLogW_ZZ_cor3> 2 &&MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_ZH_cor3<50 && MinusLogW_ZH_cor0<50 &&MinusLogW_ZZ_cor0<50 &&MinusLogW_ZZ_cor3<50 &&MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_6_idx && eventSelectionmu1pt_inc>20 && eventSelectionmu2pt_inc>20 && eventSelectiondilepM_inc > 76 && eventSelectiondilepM_inc < 106 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30&& jetmetnj>2 && (eventSelectiondijetM_inc > 50 && eventSelectiondijetM_inc < 150))"; mycutb = "(MinusLogW_ZH_cor3 > 2 && MinusLogW_ZH_cor0 > 2 &&MinusLogW_ZZ_cor0> 2 &&MinusLogW_ZZ_cor3> 2 &&MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_ZH_cor3<50 && MinusLogW_ZH_cor0<50 &&MinusLogW_ZZ_cor0<50 &&MinusLogW_ZZ_cor3<50 &&MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_6_idx && eventSelectionmu1pt_inc>20 && eventSelectionmu2pt_inc>20 && eventSelectiondilepM_inc > 76 && eventSelectiondilepM_inc < 106 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30&& jetmetnj>2 && (eventSelectiondijetM_inc > 50 && eventSelectiondijetM_inc < 150))"; } } else { if(cat=="2j"){ mycuts = "(MinusLogW_ZH_cor3 > 2 && MinusLogW_ZH_cor0 > 2 &&MinusLogW_ZZ_cor0> 2 &&MinusLogW_ZZ_cor3> 2 &&MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_ZH_cor3<50 && MinusLogW_ZH_cor0<50 &&MinusLogW_ZZ_cor0<50 &&MinusLogW_ZZ_cor3<50 &&MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_16_idx && eventSelectionel1pt_inc>20 && eventSelectionel2pt_inc>20 && eventSelectiondilepM_inc > 76 && eventSelectiondilepM_inc < 106 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30&& jetmetnj==2 && (eventSelectiondijetM_inc > 80 && eventSelectiondijetM_inc < 150))"; mycutb = "(MinusLogW_ZH_cor3 > 2 && MinusLogW_ZH_cor0 > 2 &&MinusLogW_ZZ_cor0> 2 &&MinusLogW_ZZ_cor3> 2 &&MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_ZH_cor3<50 && MinusLogW_ZH_cor0<50 &&MinusLogW_ZZ_cor0<50 &&MinusLogW_ZZ_cor3<50 &&MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_16_idx && eventSelectionel1pt_inc>20 && eventSelectionel2pt_inc>20 && eventSelectiondilepM_inc > 76 && eventSelectiondilepM_inc < 106 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30&& jetmetnj==2 && (eventSelectiondijetM_inc > 80 && eventSelectiondijetM_inc < 150))"; } else{ mycuts = "(MinusLogW_ZH_cor3 > 2 && MinusLogW_ZH_cor0 > 2 &&MinusLogW_ZZ_cor0> 2 &&MinusLogW_ZZ_cor3> 2 &&MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_ZH_cor3<50 && MinusLogW_ZH_cor0<50 &&MinusLogW_ZZ_cor0<50 &&MinusLogW_ZZ_cor3<50 &&MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_16_idx && eventSelectionel1pt_inc>20 && eventSelectionel2pt_inc>20 && eventSelectiondilepM_inc > 76 && eventSelectiondilepM_inc < 106 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30&& jetmetnj>2 && (eventSelectiondijetM_inc > 50 && eventSelectiondijetM_inc < 150))"; mycutb = "(MinusLogW_ZH_cor3 > 2 && MinusLogW_ZH_cor0 > 2 &&MinusLogW_ZZ_cor0> 2 &&MinusLogW_ZZ_cor3> 2 &&MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_ZH_cor3<50 && MinusLogW_ZH_cor0<50 &&MinusLogW_ZZ_cor0<50 &&MinusLogW_ZZ_cor3<50 &&MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_16_idx && eventSelectionel1pt_inc>20 && eventSelectionel2pt_inc>20 && eventSelectiondilepM_inc > 76 && eventSelectiondilepM_inc < 106 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30&& jetmetnj>2 && (eventSelectiondijetM_inc > 50 && eventSelectiondijetM_inc < 150))"; } } } else{ if(channel=="Mu"){ mycuts = "(MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_6_idx && eventSelectionmu1pt_inc>20 && eventSelectionmu2pt_inc>20 && eventSelectiondilepM_inc > 60 && eventSelectiondilepM_inc < 120 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30)"; mycutb = "(MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_6_idx && eventSelectionmu1pt_inc>20 && eventSelectionmu2pt_inc>20 && eventSelectiondilepM_inc > 60 && eventSelectiondilepM_inc < 120 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30)"; } else { mycuts = "(MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_16_idx && eventSelectionel1pt_inc>20 && eventSelectionel2pt_inc>20 && eventSelectiondilepM_inc > 60 && eventSelectiondilepM_inc < 120 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30)"; mycutb = "(MinusLogW_TT> 2 &&MinusLogW_gg_Zbb> 2 &&MinusLogW_qq_Zbb> 2)&&(MinusLogW_TT<50 &&MinusLogW_gg_Zbb<50 &&MinusLogW_qq_Zbb<50)&& (rc_stage_16_idx && eventSelectionel1pt_inc>20 && eventSelectionel2pt_inc>20 && eventSelectiondilepM_inc > 60 && eventSelectiondilepM_inc < 120 && jetmetMETsignificance<10 && jetmetbjet1pt>30 && jetmetbjet2pt>30)"; } } factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); if (isZHvsOther)factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=40:nEventsMin=600:MaxDepth=10:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); else factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=50:nEventsMin=1000:MaxDepth=20:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); if (isZHvsOther)factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=1000:HiddenLayers=N+6:TestRate=5:!UseRegulator" ); else factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=N+2:TestRate=5:!UseRegulator" ); // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch() ) TMVAGui( "outputtrainning"+name+".root" ); }
void TMVAClassify_SepSSFromOS( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // to get access to the GUI and all tmva macros TString tmva_dir(TString(gRootDir) + "/tmva"); if(gSystem->Getenv("TMVASYS")) tmva_dir = TString(gSystem->Getenv("TMVASYS")); gROOT->SetMacroPath(tmva_dir + "/test/:" + gROOT->GetMacroPath() ); gROOT->ProcessLine(".L TMVAGui.C"); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 0; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 0; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 0; // uses Adaptive Boost Use["BDTG"] = 1; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "../../classifiers/D02KPi/TMVA/TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory->AddVariable( "track_angletod", "Angle to D0", "", 'F' ); factory->AddVariable( "track_angletochild1", "Angle to kaon from D0", "", 'F' ); factory->AddVariable( "track_angletochild2", "Angle to pion from D0", "", 'F' ); factory->AddVariable( "log(track_devdist)", "log(DOCA to D0 vertex)", "", 'F' ); //factory->AddVariable( "log(track_docatochild1)", "log(DOCA to kaon from D0)", "", 'F' ); //factory->AddVariable( "log(track_docatochild2)", "log(DOCA to pion from D0)", "", 'F' ); //factory->AddVariable( "log(track_docatod)", "Distance to D0 trajectory", "", 'F' ); factory->AddVariable( "track_ptratiod", "Ratio track PT to D0 PT ", "", 'F' ); factory->AddVariable( "track_ptratiochild1", "Ratio track PT to kaon PT", "", 'F' ); factory->AddVariable( "track_ptratiochild2", "Distance track PT to pion PT", "", 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables //factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); //factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) TFile *input_ss = TFile::Open( "../../data/mcd02kpi_ss_forsepssfromos.root" ); TFile *input_os = TFile::Open( "../../data/mcd02kpi_os_forsepssfromos.root" ); // --- Register the training and test trees TTree *intree_ss = (TTree*)input_ss->Get("DecayTree"); TTree *intree_os = (TTree*)input_os->Get("DecayTree"); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; factory->AddSignalTree ( intree_os, signalWeight ); factory->AddBackgroundTree( intree_ss, backgroundWeight ); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the factory how to use the training and testing events factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=80000:nTrain_Background=80000:nTest_Signal=80000:nTest_Background=80000:SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=N+2:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=4000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=3" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // ---- STILL EXPERIMENTAL and only implemented for BDT's ! // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","FitGA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void WWTMVAClassification( TString myMethodList = "", double mH=400., int njets, TString chan="el" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) // this loads the library TMVA::Tools::Instance(); //--------------------------------------------------------------- // default MVA methods to be trained + tested std::map<std::string,int> Use; Use["Cuts"] = 0; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // --- Use["Likelihood"] = 1; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // --- Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDERSkNN"] = 0; // depreciated until further notice Use["PDEFoam"] = 0; // -- Use["KNN"] = 0; // --- Use["HMatrix"] = 0; Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; Use["LD"] = 1; // --- Use["FDA_GA"] = 0; Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // --- Use["MLP"] = 0; // this is the recommended ANN Use["MLPBFGS"] = 0; // recommended ANN with optional training method Use["CFMlpANN"] = 0; // *** missing Use["TMlpANN"] = 0; // --- Use["SVM"] = 0; // --- Use["BDT"] = 1; Use["BDTD"] = 0; Use["BDTG"] = 0; Use["BDTB"] = 0; // --- Use["RuleFit"] = 0; // --- Use["Plugin"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // Create a new root output file. ///////TString outfileName( "TMVA.root" ); char outfileName[192]; sprintf(outfileName,"TMVA_%3.0f_nJ%i_%s.root",mH,njets,chan.Data()); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string char classifierName[192]; sprintf(classifierName,"TMVAClassification_%3.0f_nJ%i_%s",mH,njets,chan.Data()); TMVA::Factory *factory = new TMVA::Factory( classifierName, outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" ); //TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, // "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // leptonic W factory->AddVariable("WWpt := ptlvjj", 'F'); factory->AddVariable("WWy := ylvjj", 'F'); //factory->AddVariable("Wpt := W_pt", 'F'); //factory->AddVariable("MET := event_met_pfmet", 'F'); if (chan = "mu"){ factory->AddVariable("LepCharge := W_muon_charge", 'F'); } else if (chan = "el"){ factory->AddVariable("LepCharge := W_electron_charge", 'F'); } else{ std::cout << "Invalid channel!" << std::endl; return; } // factory->AddVariable("J1QGL := JetPFCor_QGLikelihood[0]", 'F'); // factory->AddVariable("J2QGL := JetPFCor_QGLikelihood[1]", 'F'); factory->AddVariable("costheta1 := ang_ha", 'F'); factory->AddVariable("costheta2 := ang_hb", 'F'); factory->AddVariable("costhetaS := ang_hs", 'F'); factory->AddVariable("Phi := ang_phi", 'F'); factory->AddVariable("Phi2 := ang_phib", 'F'); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables factory->AddSpectator("run := event_runNo", "I"); factory->AddSpectator("lumi := event_lumi", "I"); factory->AddSpectator("event := event_evtNo", "I"); factory->AddSpectator("mjj := Mass2j_PFCor", "F"); factory->AddSpectator("mlvjj := MassV2j_PFCor", "F"); factory->AddSpectator("masslvjj := masslvjj", "F"); //factory->AddSpectator("ggdevt := ggdevt", "F"); //factory->AddSpectator("fit_mlvjj := fit_mlvjj", "F"); // read training and test data char signalOutputName[192]; sprintf(signalOutputName,"/uscms_data/d2/kalanand/WjjTrees/Full2011DataFall11MC/ReducedTree/RD_%s_HWWMH%3.0f_CMSSW428.root",chan.Data(),mH); TFile *input1 = TFile::Open( signalOutputName ); //TFile *input1 = TFile::Open( "/uscms_data/d2/kalanand/WjjTrees/Full2011DataFall11MC/ReducedTree/RD_mu_HWWMH400_CMSSW428.root"); char backgroundOutputName[192]; sprintf(backgroundOutputName,"/uscms_data/d2/kalanand/WjjTrees/Full2011DataFall11MC/ReducedTree/RD_%s_WpJ_CMSSW428.root",chan.Data()); TFile *input2 = TFile::Open( backgroundOutputName ); std::cout << "--- TMVAClassification : Using input file: " << input1->GetName() << std::endl; TTree *signal = (TTree*)input1->Get("WJet"); TTree *background = (TTree*)input2->Get("WJet"); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // ====== register trees ==================================================== // // the following method is the prefered one: // you can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); // To give different trees for training and testing, do as follows: // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // // // --- begin ---------------------------------------------------------- // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables // Float_t treevars[4]; // for (Int_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (Int_t i=0; i<signal->GetEntries(); i++) { // signal->GetEntry(i); // for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < signal->GetEntries()/2) factory->AddSignalTrainingEvent( vars, signalWeight ); // else factory->AddSignalTestEvent ( vars, signalWeight ); // } // // for (Int_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (Int_t i=0; i<background->GetEntries(); i++) { // background->GetEntry(i); // for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight ); // else factory->AddBackgroundTestEvent ( vars, backgroundWeight ); // } // // --- end ------------------------------------------------------------ // // ====== end of register trees ============================================== // This would set individual event weights (the variables defined in the // expression need to exist in the original TTree) // for signal : factory->SetSignalWeightExpression("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); // factory->SetBackgroundWeightExpression("weight"); // Apply additional cuts on the signal and background samples (can be different) // TCut mycuts = "abs(eta)>1.5"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; // TCut mycutb = "abs(eta)>1.5"; // for example: TCut mycutb = "abs(var1)<0.5"; char * mass4bodycut = ""; if(njets==2) { if(chan.Contains("mu")) { if(mH==170.) mass4bodycut = "(fit_mlvjj>176 && fit_mlvjj<262)"; // 2j170mu ===== if(mH==180.) mass4bodycut = "(fit_mlvjj>179 && fit_mlvjj<256)"; // 2j180mu if(mH==190.) mass4bodycut = "(fit_mlvjj>186 && fit_mlvjj<214)"; // 2j190mu if(mH==200.) mass4bodycut = "(fit_mlvjj>191 && fit_mlvjj<226)"; // 2j200mu if(mH==250.) mass4bodycut = "(fit_mlvjj>226 && fit_mlvjj<287)"; // 2j250mu if(mH==300.) mass4bodycut = "(fit_mlvjj>265 && fit_mlvjj<347)"; // 2j300mu if(mH==350.) mass4bodycut = "(fit_mlvjj>308 && fit_mlvjj<401)"; // 2j350mu if(mH==400.) mass4bodycut = "(fit_mlvjj>346 && fit_mlvjj<457)"; // 2j400mu if(mH==450.) mass4bodycut = "(fit_mlvjj>381 && fit_mlvjj<512)"; // 2j450mu if(mH==500.) mass4bodycut = "(fit_mlvjj>415 && fit_mlvjj<568)"; // 2j500mu if(mH==550.) mass4bodycut = "(fit_mlvjj>440 && fit_mlvjj<617)"; // 2j550mu if(mH==600.) mass4bodycut = "(fit_mlvjj>462 && fit_mlvjj<663)"; // 2j600mu } if(chan.Contains("el")) { if(mH==170.) mass4bodycut = "(fit_mlvjj>176 && fit_mlvjj<262)"; // 2j170el ===== if(mH==180.) mass4bodycut = "(fit_mlvjj>179 && fit_mlvjj<256)"; // 2j180el if(mH==190.) mass4bodycut = "(fit_mlvjj>186 && fit_mlvjj<214)"; // 2j190el if(mH==200.) mass4bodycut = "(fit_mlvjj>191 && fit_mlvjj<226)"; // 2j200el if(mH==250.) mass4bodycut = "(fit_mlvjj>226 && fit_mlvjj<287)"; // 2j250el if(mH==300.) mass4bodycut = "(fit_mlvjj>265 && fit_mlvjj<347)"; // 2j300el if(mH==350.) mass4bodycut = "(fit_mlvjj>308 && fit_mlvjj<401)"; // 2j350el if(mH==400.) mass4bodycut = "(fit_mlvjj>346 && fit_mlvjj<457)"; // 2j400el if(mH==450.) mass4bodycut = "(fit_mlvjj>381 && fit_mlvjj<512)"; // 2j450el if(mH==500.) mass4bodycut = "(fit_mlvjj>415 && fit_mlvjj<568)"; // 2j500el if(mH==550.) mass4bodycut = "(fit_mlvjj>440 && fit_mlvjj<617)"; // 2j550el if(mH==600.) mass4bodycut = "(fit_mlvjj>462 && fit_mlvjj<663)"; // 2j600el } } if(njets==3) { if(chan.Contains("mu")) { if(mH==170.) mass4bodycut = "(fit_mlvjj>150 && fit_mlvjj<271)"; // 3j170mu ===== if(mH==180.) mass4bodycut = "(fit_mlvjj>175 && fit_mlvjj<284)"; // 3j180mu if(mH==190.) mass4bodycut = "(fit_mlvjj>185 && fit_mlvjj<290)"; // 3j190mu if(mH==200.) mass4bodycut = "(fit_mlvjj>188 && fit_mlvjj<293)"; // 3j200mu if(mH==250.) mass4bodycut = "(fit_mlvjj>216 && fit_mlvjj<300)"; // 3j250mu if(mH==300.) mass4bodycut = "(fit_mlvjj>241 && fit_mlvjj<355)"; // 3j300mu if(mH==350.) mass4bodycut = "(fit_mlvjj>269 && fit_mlvjj<407)"; // 3j350mu if(mH==400.) mass4bodycut = "(fit_mlvjj>300 && fit_mlvjj<465)"; // 3j400mu if(mH==450.) mass4bodycut = "(fit_mlvjj>332 && fit_mlvjj<518)"; // 3j450mu if(mH==500.) mass4bodycut = "(fit_mlvjj>362 && fit_mlvjj<569)"; // 3j500mu if(mH==550.) mass4bodycut = "(fit_mlvjj>398 && fit_mlvjj<616)"; // 3j550mu if(mH==600.) mass4bodycut = "(fit_mlvjj>419 && fit_mlvjj<660)"; // 3j600mu } if(chan.Contains("el")) { if(mH==170.) mass4bodycut = "(fit_mlvjj>150 && fit_mlvjj<271)"; // 3j170el ===== if(mH==180.) mass4bodycut = "(fit_mlvjj>175 && fit_mlvjj<284)"; // 3j180el if(mH==190.) mass4bodycut = "(fit_mlvjj>185 && fit_mlvjj<290)"; // 3j190el if(mH==200.) mass4bodycut = "(fit_mlvjj>188 && fit_mlvjj<293)"; // 3j200el if(mH==250.) mass4bodycut = "(fit_mlvjj>216 && fit_mlvjj<300)"; // 3j250el if(mH==300.) mass4bodycut = "(fit_mlvjj>241 && fit_mlvjj<355)"; // 3j300el if(mH==350.) mass4bodycut = "(fit_mlvjj>269 && fit_mlvjj<407)"; // 3j350el if(mH==400.) mass4bodycut = "(fit_mlvjj>300 && fit_mlvjj<465)"; // 3j400el if(mH==450.) mass4bodycut = "(fit_mlvjj>332 && fit_mlvjj<518)"; // 3j450el if(mH==500.) mass4bodycut = "(fit_mlvjj>362 && fit_mlvjj<569)"; // 3j500el if(mH==550.) mass4bodycut = "(fit_mlvjj>398 && fit_mlvjj<616)"; // 3j550el if(mH==600.) mass4bodycut = "(fit_mlvjj>419 && fit_mlvjj<660)"; // 3j600el } } char mycutschar[1000]; sprintf(mycutschar,"ggdevt == %i &&(Mass2j_PFCor>65 && Mass2j_PFCor<95) && %s", njets, mass4bodycut); TCut mycuts (mycutschar); // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycuts, mycuts, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // If no numbers of events are given, half of the events in the tree are used for training, and // the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); // ---- Book MVA methods // // please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // test the decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // test the new kernel density estimator if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // test the mixed splines and kernel density estimator (depending on which variable) if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSkNN"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSkNN", "!H:!V:VolumeRangeMode=kNN:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:CutNmin=T:Nmin=100:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" ); // Fisher discriminant if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=60:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2"); // Linear discriminant (same as Fisher) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=N+5:TestRate=10:EpochMonitoring" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=N+5:TestRate=10:TrainingMethod=BFGS:!EpochMonitoring" ); // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // As an example how to use the ROOT plugin mechanism, book BDT via // plugin mechanism if (Use["Plugin"]) { // // first the plugin has to be defined, which can happen either through the following line in the local or global .rootrc: // // # plugin handler plugin name(regexp) class to be instanciated library constructor format // Plugin.TMVA@@MethodBase: ^BDT TMVA::MethodBDT TMVA.1 "MethodBDT(TString,TString,DataSet&,TString)" // // or by telling the global plugin manager directly gPluginMgr->AddHandler("TMVA@@MethodBase", "BDT", "TMVA::MethodBDT", "TMVA.1", "MethodBDT(TString,TString,DataSet&,TString)"); factory->BookMethod( TMVA::Types::kPlugins, "BDT", "!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=50" ); } // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void test2(){ //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); TString outfileName( "trainingBDT_tZq.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "BDT_trainning_tzq", outputFile,"!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); TFile *input_sig = TFile::Open( "../TreeReader/outputroot/histofile_tZq.root" ); TFile *input_wz = TFile::Open( "../TreeReader/outputroot/histofile_WZ.root" ); TTree *signal = (TTree*)input_sig->Get("Ttree_tZq"); TTree *background = (TTree*)input_wz->Get("Ttree_WZ"); factory->AddSignalTree ( signal, 1.); factory->AddBackgroundTree( background, 1.); std::vector<TString > varList; varList.push_back("tree_cosThetaStar");; varList.push_back("tree_topMass"); varList.push_back("tree_totMass"); varList.push_back("tree_deltaPhilb"); varList.push_back("tree_deltaRlb"); varList.push_back("tree_deltaRTopZ"); varList.push_back("tree_asym"); varList.push_back("tree_Zpt"); varList.push_back("tree_ZEta"); varList.push_back("tree_topPt"); varList.push_back("tree_topEta"); varList.push_back("tree_NJets"); varList.push_back("tree_NBJets"); varList.push_back("tree_deltaRZl"); varList.push_back("tree_deltaPhiZmet"); varList.push_back("tree_btagDiscri"); varList.push_back("tree_totPt"); varList.push_back("tree_totEta"); varList.push_back("tree_leptWPt"); varList.push_back("tree_leptWEta"); varList.push_back("tree_leadJetPt"); varList.push_back("tree_leadJetEta"); varList.push_back("tree_deltaRZleptW"); varList.push_back("tree_deltaPhiZleptW"); varList.push_back("tree_met" ); varList.push_back("tree_mTW" ); for(unsigned int i=0; i< varList.size() ; i++) factory->AddVariable( varList[i].Data(), 'F'); factory->SetSignalWeightExpression ("tree_EvtWeight"); factory->SetBackgroundWeightExpression("tree_EvtWeight"); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); //factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); // factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:nEventsMin=100:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:nEventsMin=100:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVAClassification( ) { // this loads the library TMVA::Tools::Instance(); std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Create a new root output file. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" ); // ---------- input variables factory->AddVariable("iso12x12", 'F'); factory->AddVariable("iso4x4", 'F'); factory->AddVariable("isoLshaped", 'F'); factory->AddVariable("PUM0", 'F'); // ---------- spectators ---------------- factory->AddSpectator("pt", "F"); factory->AddSpectator("eta", "F"); factory->AddSpectator("phi", "F"); factory->AddSpectator("e", "F"); // read training and test data TString fSig = "DrellYan_Zee.root"; TString fBkg = "MinBias.root"; TFile *fileSig = TFile::Open( fSig ); TFile *fileBkg = TFile::Open( fBkg ); TTree *signal = (TTree*)fileSig->Get("tree"); TTree *background = (TTree*)fileBkg->Get("tree"); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // ====== register trees ==================================================== // you can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); TCut mycuts = "pt > 10"; TCut mycutb = "pt > 10"; // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // If no numbers of events are given, half of the events in the tree are used for training, and // the other half for testing: // ---- Book MVA methods // Boosted Decision Trees factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); // ---- Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVAClassification( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // to get access to the GUI and all tmva macros TString tmva_dir(TString(gRootDir) + "/tmva"); if(gSystem->Getenv("TMVASYS")) tmva_dir = TString(gSystem->Getenv("TMVASYS")); gROOT->SetMacroPath(tmva_dir + "/test/:" + gROOT->GetMacroPath() ); gROOT->ProcessLine(".L TMVAGui.C"); gROOT->ProcessLine(".L BDT.C"); gROOT->ProcessLine(".L BDTControlPlots.C"); gROOT->ProcessLine(".L BDT_Reg.C"); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // // Turn on-off the MVA to run // // --- Cut optimisation Use["Cuts"] = 0; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 1; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 0; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 1; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 0; // uses Adaptive Boost Use["BDTG"] = 1; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. string outFileName = "TMVA_" + sSelection[mode-1] + ".root"; TString outfileName( outFileName.c_str()); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] /* factory->AddVariable( "myvar1 := var1+var2", 'F' ); factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ); factory->AddVariable( "var3", "Variable 3", "units", 'F' ); factory->AddVariable( "var4", "Variable 4", "units", 'F' ); */ //Not order matter for the CutRange declarations if(mode==1){ //SRZjets inside Z use mT2, pTll, mjj, dR_ll, mEff, mWWT factory->AddVariable( "mT2", 'F'); factory->AddVariable( "pTll", 'F'); factory->AddVariable( "mjj", 'F'); factory->AddVariable( "dR_ll", 'F'); factory->AddVariable( "mEff", 'F'); factory->AddVariable( "mWWT", 'F'); factory->AddVariable( "mctPerp", 'F'); factory->AddVariable( "metrel", 'F'); factory->AddVariable( "met/mEff", 'F'); factory->AddVariable( "mT2jj", 'F'); factory->AddVariable( "sphericity", 'F'); factory->AddVariable( "sphericityTrans", 'F'); factory->AddVariable( "abs(llAcoplanarity+3.1415)", 'F'); factory->AddVariable( "abs(jjAcoplanarity+3.1415)", 'F'); factory->AddVariable( "mTl[0]", 'F'); factory->AddVariable( "mTl[1]", 'F'); //factory->AddVariable( "mctPara", 'F'); //issue with var content NAN } else if(mode==2){ factory->AddVariable( "mT2", 'F'); factory->AddVariable( "pTll", 'F'); factory->AddVariable( "mjj", 'F'); factory->AddVariable( "dR_ll", 'F'); factory->AddVariable( "mEff", 'F'); factory->AddVariable( "mWWT", 'F'); factory->AddVariable( "mctPerp", 'F'); factory->AddVariable( "metrel", 'F'); factory->AddVariable( "met/mEff", 'F'); factory->AddVariable( "mT2jj", 'F'); factory->AddVariable( "sphericity", 'F'); factory->AddVariable( "sphericityTrans", 'F'); factory->AddVariable( "abs(llAcoplanarity+3.1415)", 'F'); factory->AddVariable( "abs(jjAcoplanarity+3.1415)", 'F'); factory->AddVariable( "mTl[0]", 'F'); factory->AddVariable( "mTl[1]", 'F'); } else if(mode==3){ factory->AddVariable( "mT2", 'F'); factory->AddVariable( "pTll", 'F'); factory->AddVariable( "mjj", 'F'); factory->AddVariable( "dR_ll", 'F'); factory->AddVariable( "mEff", 'F'); factory->AddVariable( "mWWT", 'F'); factory->AddVariable( "mctPerp", 'F'); factory->AddVariable( "metrel", 'F'); factory->AddVariable( "met/mEff", 'F'); factory->AddVariable( "mT2jj", 'F'); factory->AddVariable( "sphericity", 'F'); factory->AddVariable( "sphericityTrans", 'F'); factory->AddVariable( "abs(llAcoplanarity+3.1415)", 'F'); factory->AddVariable( "abs(jjAcoplanarity+3.1415)", 'F'); factory->AddVariable( "mTl[0]", 'F'); factory->AddVariable( "mTl[1]", 'F'); } else if(mode==4){ factory->AddVariable( "mT2", 'F'); factory->AddVariable( "pTll", 'F'); factory->AddVariable( "dR_ll", 'F'); factory->AddVariable( "mWWT", 'F'); factory->AddVariable( "metrel", 'F'); factory->AddVariable( "mTl[0]", 'F'); factory->AddVariable( "mTl[1]", 'F'); } // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables //factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); //factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) //TString fname = "./tmva_class_example.root"; TString fname_bkg = string(getenv("HISTOANA")) + "/SusyAna/" + ver + "toyNt_Bkg_Zjets_SherpaAlpgen_WZ_ZZ_PowHeg_WW_PowHeg_TopMCNLO_" + toySkim + "_rlep.root"; if(toySkim == "DIL_optimSRSS") fname_bkg = string(getenv("HISTOANA")) + "/SusyAna/" + ver + "toyNt_Bkg_Zjets_SherpaAlpgen_WZ_ZZ_PowHeg_WW_PowHeg_TopMCNLO_FAKE_" + toySkim + ".root"; std::cout << "Bkg file: " << fname_bkg << std::endl; TFile* fBkg = TFile::Open( fname_bkg.Data() ); TString fname_sig; fname_sig = string(getenv("HISTOANA")) + "/SusyAna/" + ver + "ToyNtOutputs/"; if(mode==1) fname_sig += "164339_" + toySkim + ".root"; //wA_noslep_300_50 else if(mode==2) fname_sig += "164326_" + toySkim + ".root"; //wA_noslep_150_0 else if(mode==3) fname_sig += "157955_" + toySkim + ".root"; //wA_slep_142_107 //else if(mode==4) fname_sig += "176559_" + toySkim + ".root"; //wC_slep_117_47 else if(mode==4) fname_sig += "144907_" + toySkim + ".root"; //wC_slep_150_50 std::cout << "Signal file: " << fname_sig << std::endl; TFile* fSig = TFile::Open( fname_sig.Data() ); //if (gSystem->AccessPathName( fname )) // file does not exist in local directory // gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root"); std::cout << "--- TMVAClassification : Using input Bkg file: " << fBkg->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input Sig file: " << fSig->GetName() << std::endl; // --- Register the training and test trees TTree *signal = (TTree*)fSig->Get("ToyNt"); TTree *background = (TTree*)fBkg->Get("ToyNt"); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); // To give different trees for training and testing, do as follows: // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // // // --- begin ---------------------------------------------------------- // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables // Float_t treevars[4], weight; // // // Signal // for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<signal->GetEntries(); i++) { // signal->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight ); // else factory->AddSignalTestEvent ( vars, signalWeight ); // } // // // Background (has event weights) // background->SetBranchAddress( "weight", &weight ); // for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<background->GetEntries(); i++) { // background->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight ); // else factory->AddBackgroundTestEvent ( vars, backgroundWeight*weight ); // } // --- end ------------------------------------------------------------ // // --- end of tree registration // Set individual event weights (the variables must exist in the original TTree) // for signal : factory->SetSignalWeightExpression ("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); factory->SetBackgroundWeightExpression( "w" ); factory->SetSignalWeightExpression( "w"); // Apply additional cuts on the signal and background samples (can be different) // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycuts = ""; if(mode==1){ //Optimisation inside Z peak mycuts = "(llType==0 || llType==1) && abs(mll-91.2)<10 && nCJets>=2 \ && j_pt[0]>20 && j_isC20[0] && Alt$(j_pt[1],0) && Alt$(j_isC20[1],0)"; /* \&& mEff>250 && met/mEff>0.3"; */ } else if(mode==2){ mycuts = "(llType==0 || llType==1) && abs(mll-91.2)<10 && nCJets>=2 \ && j_pt[0]>20 && j_isC20[0] && Alt$(j_pt[1],0) && Alt$(j_isC20[1],0) \ && mT2jj>60"; } else if(mode==3){ mycuts = "llType==1 && nBJets==0 &&nFJets==0 "; } else if(mode==4){ mycuts = "llType==2"; } /* //Optimation close to diagonal TCut mycuts = "(llType==0 || llType==1) && mll<50 && nCJets>=2 \ && j_pt[0]>20 && j_isC20[0] && Alt$(j_pt[1],0) && Alt$(j_isC20[1],0)"; */ // for example: TCut mycutb = "abs(var1)<0.5"; TCut mycutb = mycuts; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); if(mode==1) factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=4000:nTrain_Background=100000:SplitMode=Random:NormMode=NumEvents:!V" ); else if(mode==2) factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=4000:nTrain_Background=100000:SplitMode=Random:NormMode=NumEvents:!V" ); else if(mode==3) factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=1000:nTrain_Background=100000:SplitMode=Random:NormMode=NumEvents:!V" ); else if(mode==4) factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=1000:nTrain_Background=100000:SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:V:FitMethod=MC:CutRangeMin[0]=0:CutRangeMax[0]=1000:CutRangeMin[1]=0:CutRangeMax[1]=1000:CutRangeMin[2]=0:CutRangeMax[2]=2000:CutRangeMin[3]=0:CutRangeMax[3]=5:CutRangeMin[4]=0:CutRangeMax[4]=3000:CutRangeMin[5]=0:CutRangeMax[5]=2000:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]){ if(mode==1) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "!H:V:FitMethod=GA:CutRangeMin[0]=0:CutRangeMax[0]=1000:CutRangeMin[1]=0:CutRangeMax[1]=1000:CutRangeMin[2]=0:CutRangeMax[2]=2000:CutRangeMin[3]=0:CutRangeMax[3]=5:CutRangeMin[4]=0:CutRangeMax[4]=3000:CutRangeMin[5]=0:CutRangeMax[5]=2000:VarProp=FSmart:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); else if(mode==2) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "!H:V:FitMethod=GA:CutRangeMin[0]=0:CutRangeMax[0]=1000:CutRangeMin[1]=0:CutRangeMax[1]=3000:CutRangeMin[2]=0:CutRangeMax[2]=3000:CutRangeMin[3]=0:CutRangeMax[3]=1000:CutRangeMin[4]=0:CutRangeMax[4]=1000:CutRangeMin[5]=0:CutRangeMax[5]=3000:VarProp=FSmart:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); else if(mode==3) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "!H:V:FitMethod=GA:CutRangeMin[0]=0:CutRangeMax[0]=1000:CutRangeMin[1]=0:CutRangeMax[1]=2000:CutRangeMin[2]=0:CutRangeMax[2]=1000:CutRangeMin[3]=0:CutRangeMax[3]=5:VarProp=FSmart:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); } if (Use["CutsSA"]){ if(mode==1) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:CutRangeMin[0]=0:CutRangeMax[0]=1000:CutRangeMin[1]=0:CutRangeMax[1]=1000:CutRangeMin[2]=0:CutRangeMax[2]=2000:CutRangeMin[3]=0:CutRangeMax[3]=5:CutRangeMin[4]=0:CutRangeMax[4]=3000:CutRangeMin[5]=0:CutRangeMax[5]=2000:VarProp=FSmart:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); else if(mode==2) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:CutRangeMin[0]=0:CutRangeMax[0]=1000:CutRangeMin[1]=0:CutRangeMax[1]=3000:CutRangeMin[2]=0:CutRangeMax[2]=3000:CutRangeMin[3]=0:CutRangeMax[3]=1000:CutRangeMin[4]=0:CutRangeMax[4]=1000:CutRangeMin[5]=0:CutRangeMax[5]=3000:VarProp=FSmart:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); else if(mode==3) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:CutRangeMin[0]=0:CutRangeMax[0]=1000:CutRangeMin[1]=0:CutRangeMax[1]=2000:CutRangeMin[2]=0:CutRangeMax[2]=1000:CutRangeMin[3]=0:CutRangeMax[3]=5:VarProp=FSmart:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); } // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "!H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=N+1:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3:IgnoreNegWeightsInTraining=True" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees [MY DEFAULT] if (Use["BDTG"]){ // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=300:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=10:NNodesMax=5:IgnoreNegWeightsInTraining=True" ); } if (Use["BDT"]){ // Adaptive Boost if(mode<3){ factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:V:NTrees=100:nEventsMin=2000:MaxDepth=4:UseRandomisedTrees=True:UseNVars=4:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:PruneStrength=-1:IgnoreNegWeightsInTraining=True" ); } else if(mode==3){ factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:V:NTrees=50:nEventsMin=2000:MaxDepth=4:UseRandomisedTrees=True:UseNVars=4:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:PruneStrength=-1:IgnoreNegWeightsInTraining=True" ); } } if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:nEventsMin=150:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","GA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void mytmvaClass( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) //-------------------------------- TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 0; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 0; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 1; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (Not of much interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" //Syntax --> "factory->AddVariable( "VarName", "UserDefinedLabel", "units", 'VarType' );" //Where VarType refers to float, int etc. Float_t nhiggsJetSubjets; Float_t njets; Float_t deltaPhiWH; Float_t jetPt[11]; Float_t jetEta[11]; Float_t jetPhi[11]; Float_t jetE[11]; Float_t higgsJetSubjetEta[3]; Float_t higgsJetSubjetPhi[3]; Float_t higgsJetSubjetPt[3]; Float_t higgsJetSubjetE[3]; Float_t Wphi; Float_t fatjetPt[15]; Float_t fatjetEta[15]; Float_t fatjetPhi[15]; Float_t fatjetE[15]; Float_t DeltaPhiWH; TLorentzVector fatjet(0,0,0,0); TLorentzVector subjet1(0,0,0,0); TLorentzVector subjet2(0,0,0,0); TLorentzVector subjet3(0,0,0,0); TLorentzVector akt[15]; fatjet.SetPtEtaPhiE(fatjetPt[0],fatjetEta[0],fatjetPhi[0], fatjetE[0]); // deltaPhiWH = std::fabs(Wphi-fatjetPhi[0]); // if(deltaPhiWH>TMath::Pi()){ std::fabs(deltaPhiWH = 2*TMath::Pi()-deltaPhiWH);} factory->AddVariable( "deltaPhiWH", "deltaPhiWH", "", 'F' ); factory->AddVariable( "jetPt.array[0]", "jetPt.array[0]", "", 'F' ); factory->AddVariable( "fatjetPt[0].array[0]", "fatjetPt.array[0]", "", 'F' ); //factory->AddVariable( "DeltaR[3][13]", "DeltaR[3][13]", "", 'F' ); // factory->AddSpectator( "nhiggsJetSubjets", "nhiggsJetSubjets", "", 'F'); // factory->AddSpectator( "njets", "", "njets", 'I'); // factory->AddSpectator( "jetPt[0]", "jetPt[0]", "", 'F'); // factory->AddSpectator( "jetPt[1]", "jetPt[1]", "", 'F'); // factory->AddSpectator( "jetPt[2]", "jetPt[2]", "", 'F'); // factory->AddSpectator( "jetPt[3]", "jetPt[3]", "", 'F'); // factory->AddSpectator( "jetPt[4]", "jetPt[4]", "", 'F'); // factory->AddSpectator( "jetPt[5]", "jetPt[5]", "", 'F'); // factory->AddSpectator( "jetPt[6]", "jetPt[6]", "", 'F'); // factory->AddSpectator( "jetPt[7]", "jetPt[7]", "", 'F'); // factory->AddSpectator( "jetPt[8]", "jetPt[8]", "", 'F'); // factory->AddSpectator( "jetPt[9]", "jetPt[9]", "", 'F'); // factory->AddSpectator( "jetPt[10]", "jetPt[10]", "", 'F'); // factory->AddSpectator( "jetEta[0]", "jetEta[0]", "", 'F'); // factory->AddSpectator( "jetEta[1]", "jetEta[1]", "", 'F'); // factory->AddSpectator( "jetEta[2]", "jetEta[2]", "", 'F'); // factory->AddSpectator( "jetEta[3]", "jetEta[3]", "", 'F'); // factory->AddSpectator( "jetEta[4]", "jetEta[4]", "", 'F'); // factory->AddSpectator( "jetEta[5]", "jetEta[5]", "", 'F'); // factory->AddSpectator( "jetEta[6]", "jetEta[6]", "", 'F'); // factory->AddSpectator( "jetEta[7]", "jetEta[7]", "", 'F'); // factory->AddSpectator( "jetEta[8]", "jetEta[8]", "", 'F'); // factory->AddSpectator( "jetEta[9]", "jetEta[9]", "", 'F'); // factory->AddSpectator( "jetEta[10]", "jetEta[10]", "", 'F'); // factory->AddSpectator( "jetPhi[0]", "jetPhi[0]", "", 'F'); // factory->AddSpectator( "jetPhi[1]", "jetPhi[1]", "", 'F'); // factory->AddSpectator( "jetPhi[2]", "jetPhi[2]", "", 'F'); // factory->AddSpectator( "jetPhi[3]", "jetPhi[3]", "", 'F'); // factory->AddSpectator( "jetPhi[4]", "jetPhi[4]", "", 'F'); // factory->AddSpectator( "jetPhi[5]", "jetPhi[5]", "", 'F'); // factory->AddSpectator( "jetPhi[6]", "jetPhi[6]", "", 'F'); // factory->AddSpectator( "jetPhi[7]", "jetPhi[7]", "", 'F'); // factory->AddSpectator( "jetPhi[8]", "jetPhi[8]", "", 'F'); // factory->AddSpectator( "jetPhi[9]", "jetPhi[9]", "", 'F'); // factory->AddSpectator( "jetPhi[10]", "jetPhi[10]", "", 'F'); // factory->AddSpectator("jetE[0]", "jetE[0]", "", 'F'); // factory->AddSpectator("jetE[1]", "jetE[1]", "", 'F'); // factory->AddSpectator("jetE[2]", "jetE[2]", "", 'F'); // factory->AddSpectator("higgsJetSubjetEta[0]","higgsJetSubjetEta[0]", "", 'F'); // factory->AddSpectator("higgsJetSubjetEta[1]","higgsJetSubjetEta[1]", "", 'F'); // factory->AddSpectator("higgsJetSubjetEta[2]","higgsJetSubjetEta[2]", "", 'F'); // factory->AddSpectator("higgsJetSubjetPhi[0]","higgsJetSubjetPhi[0]", "", 'F'); // factory->AddSpectator("higgsJetSubjetPhi[1]","higgsJetSubjetPhi[1]", "", 'F'); // factory->AddSpectator("higgsJetSubjetPhi[2]","higgsJetSubjetPhi[2]", "", 'F'); // factory->AddSpectator("higgsJetSubjetPt[0]","higgsJetSubjetPt[0]", "", 'F'); // factory->AddSpectator("higgsJetSubjetPt[1]","higgsJetSubjetPt[1]", "", 'F'); // factory->AddSpectator("higgsJetSubjetPt[2]","higgsJetSubjetPt[2]", "", 'F'); // factory->AddSpectator("higgsJetSubjetE[0]","higgsJetSubjetE[0]", "", 'F'); // factory->AddSpectator("higgsJetSubjetE[1]","higgsJetSubjetE[1]", "", 'F'); // factory->AddSpectator("higgsJetSubjetE[2]","higgsJetSubjetE[2]", "", 'F'); // factory->AddSpectator("Wphi", "Wphi", "" , 'F'); // factory->AddVariable("fatjetPt[0]", "fatjetPt[0]", "", 'F'); // factory->AddSpectator("fatjetPhi[0]", "fatjetPhi[0]", "", 'F'); // factory->AddSpectator("fatjetEta[0]", "fatjetEta[0]", "", 'F'); // factory->AddSpectator("fatjetE[0]", "fatjetE[0]", "", 'F'); // //if(std::fabs(Wphi-fatjetPhi[0])<TMath::Pi()){ // factory->AddVariable("abs(Wphi-fatjetPhi[0])", "DeltaPhiWH", "", 'F'); // //} // if ((abs(Wphi-fatjetPhi[0])>TMath::Pi())){ // factory->AddVariable("(abs(DeltaPhiWH = 2*(TMath::Pi())-DeltaPhiWH))", "DeltaPhiWH", "", 'F'); // } // Here, we add the lepton flag as a spectator so we can cut (further down) on lep == 0 for muons only or lep == 1 for electrons only. //factory->AddSpectator( "lepType[0]", "lepType[0]", "units", 'I'); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables // factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); // factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // --- Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) TString fsig_name_WH = "/Disk/speyside7/Grid/grid-files/rsmith/SummerProject/OverlapperInputFiles/PreCutsApplied/mc11_7TeV.128034.HerwigppPowHeg_Boosted_WH125lnubb.merge.NTUP_HSG5WH.Output_PreCuts-Original.root"; TString fback_name_TT = "/Disk/speyside7/Grid/grid-files/rsmith/SummerProject/OverlapperInputFiles/PreCutsApplied/TT_NewBranch2.output_MERGE_PreCuts-Original.root"; TString fback_name_Wbb = "/Disk/speyside7/Grid/grid-files/rsmith/SummerProject/OverlapperInputFiles/PreCutsApplied/mc11_7TeV.128861.SherpaWbb2J_lnu_Wpt100GeV_PreCuts-Original.root"; TFile *sig_input_WH = TFile::Open( fsig_name_WH ); TFile *back_input_TT = TFile::Open( fback_name_TT ); TFile *back_input_Wbb = TFile::Open( fback_name_Wbb); // --- Register the training and test trees TTree *signal_WH = (TTree*)sig_input_WH->Get("OutputTree"); TTree *background_TT = (TTree*)back_input_TT->Get("OutputTree"); TTree * background_Wbb = (TTree*)back_input_Wbb->Get("OutputTree"); // --- global event weights per tree (see below for setting event-wise weights) Double_t signalWeight_WH = 1; Double_t backgroundWeight_TT = 1; Double_t backgroundWeight_Wbb= 1; // --- Here can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal_WH, signalWeight_WH ); factory->AddBackgroundTree( background_TT, backgroundWeight_TT ); factory->AddBackgroundTree( background_Wbb, backgroundWeight_Wbb); Float_t nhiggsJetSubjets; Float_t njets; Float_t deltaPhiWH; Float_t jetPt[11]; Float_t jetEta[11]; Float_t jetPhi[11]; Float_t jetE[11]; Float_t higgsJetSubjetEta[3]; Float_t higgsJetSubjetPhi[3]; Float_t higgsJetSubjetPt[3]; Float_t higgsJetSubjetE[3]; Float_t Wphi; Float_t fatjetPt[15]; Float_t fatjetEta[15]; Float_t fatjetPhi[15]; Float_t fatjetE[15]; TLorentzVector fatjet(0,0,0,0); TLorentzVector subjet1(0,0,0,0); TLorentzVector subjet2(0,0,0,0); TLorentzVector subjet3(0,0,0,0); TLorentzVector akt[15]; fatjet.SetPtEtaPhiE(fatjetPt[0],fatjetEta[0],fatjetPhi[0], fatjetE[0]); deltaPhiWH = std::fabs(Wphi-fatjetPhi[0]); if(deltaPhiWH>TMath::Pi()){ std::fabs(deltaPhiWH = 2*TMath::Pi()-deltaPhiWH);} float deltaEta[3][11]; float deltaPhi[3][11]; float DeltaR[3][13]; for(int j = 0; j<nhiggsJetSubjets; j++){ for(int i = 0; i<njets; i++){ deltaEta[j][i] = std::fabs(higgsJetSubjetEta[j]-jetEta[i]); deltaPhi[j][i] = std::fabs(higgsJetSubjetPhi[j]-jetPhi[i]); DeltaR[j][i] = std::sqrt(deltaEta[j][i]*deltaEta[j][i]+deltaPhi[j][i]*deltaPhi[j][i]); } } for (int i=0; i < njets; i++) { akt[i].SetPtEtaPhiE(jetPt[i],jetEta[i],jetPhi[i], jetE[i]); } subjet1.SetPtEtaPhiE(higgsJetSubjetPt[0],higgsJetSubjetEta[0],higgsJetSubjetPhi[0], higgsJetSubjetE[0]); subjet2.SetPtEtaPhiE(higgsJetSubjetPt[1],higgsJetSubjetEta[1],higgsJetSubjetPhi[1], higgsJetSubjetE[1]); FatHiggs = subjet1 + subjet2; if (nhiggsJetSubjets>2){ subjet3.SetPtEtaPhiE(higgsJetSubjetPt[2],higgsJetSubjetEta[2],higgsJetSubjetPhi[2], higgsJetSubjetE[2]); } // To give different trees for training and testing, do: // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); // Set individual event weights (the variables must exist in the original TTree) // for signal : factory->SetSignalWeightExpression ("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("wei // --- Apply additional cuts on the signal and background samples (can be different) // PLEASE NOTE: If no cuts are required, do NOT leave any spaces between quotation marks, otherwise an error will occur. TCut mycuts = "deltaPhiWH>-800"; //Signal Tree Cuts TCut mycutb = "deltaPhiWH>-800"; //Background Tree Cuts //cout << "DeltaPhiWH: " << deltaPhiWH << endl; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:!V:NormMode=NumEvents"); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // --- Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[1]=0:CutRangeMax[1]=450:VarProp[1]=FMax:EffSel:Steps=60:Cycles=3:PopSize=2000:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=350:nEventsMin=100:MaxDepth=3:BoostType=Grad:Shrinkage=0.05:UseBaggedGrad:GradBaggingFraction=0.51:nCuts=65:PruneStrength=2:PruneMethod=ExpectedError:NNodesMax=7" ); //"!H:!V:NTrees=350:nEventsMin=100:MaxDepth=3:BoostType=Grad:Shrinkage=0.05:UseBaggedGrad:GradBaggingFraction=0.55:nCuts=50:PruneStrength=2:PruneMethod=ExpectedError:NNodesMax=10" ); //nCuts=20 && no pruning originally if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "NTrees=550:nEventsMin=110:MaxDepth=5.5:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=10:PruneStrength=6:PruneMethod=ExpectedError"); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=950:nEventsMin=20:MaxDepth=10:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=300:nEventsMin=20:MaxDepth=10:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=10:PruneStrength=6:PruneMethod=ExpectedError:VarTransform=Decorrelate" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("SigEffAt001","FitGA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }