void TMVAClassification() { TMVA::Tools::Instance(); std::cout << "==> Start TMVAClassification" << std::endl; TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "AnalysisType=Classification" ); factory->AddVariable("LifeTime", 'F'); factory->AddVariable("FlightDistance", 'F'); factory->AddVariable("pt", 'F'); TString fname = "../tau_data/training.root"; TFile *input = TFile::Open( fname ); TTree *tree = (TTree*)input->Get("data"); factory->AddTree(tree, "Signal", 1., "signal == 1", "Training"); factory->AddTree(tree, "Signal", 1., "signal == 1", "Test"); factory->AddTree(tree, "Background", 1., "signal == 0", "Training"); factory->AddTree(tree, "Background", 1., "signal == 0", "Test"); // gradient boosting training factory->BookMethod(TMVA::Types::kBDT, "GBDT", "NTrees=40:BoostType=Grad:Shrinkage=0.01:MaxDepth=7:UseNvars=6:nCuts=20:MinNodeSize=10"); factory->TrainAllMethods(); input->Close(); outputFile->Close(); delete factory; }
void regressphi() { TMVA::Tools::Instance(); std::cout << "==> Start TMVAClassification" << std::endl; // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "mva", outputFile, "!V:!Silent:Color:DrawProgressBar" ); factory->AddVariable( "npv" , 'F' ); factory->AddVariable( "u" , 'F' ); factory->AddVariable( "uphi" , 'F' ); factory->AddVariable( "chsumet/sumet" , 'F' ); factory->AddVariable( "tku" , 'F' ); factory->AddVariable( "tkuphi" , 'F' ); factory->AddVariable( "nopusumet/sumet" , 'F' ); factory->AddVariable( "nopuu" , 'F' ); factory->AddVariable( "nopuuphi" , 'F' ); factory->AddVariable( "pusumet/sumet" , 'F' ); factory->AddVariable( "pumet" , 'F' ); factory->AddVariable( "pumetphi" , 'F' ); factory->AddVariable( "pucsumet/sumet" , 'F' ); factory->AddVariable( "pucu" , 'F' ); factory->AddVariable( "pucuphi" , 'F' ); factory->AddVariable( "jspt_1" , 'F' ); factory->AddVariable( "jseta_1" , 'F' ); factory->AddVariable( "jsphi_1" , 'F' ); factory->AddVariable( "jspt_2" , 'F' ); factory->AddVariable( "jseta_2" , 'F' ); factory->AddVariable( "jsphi_2" , 'F' ); factory->AddVariable( "nalljet" , 'I' ); factory->AddVariable( "njet" , 'I' ); factory->AddTarget( "rphi_z-uphi+ 2.*TMath::Pi()*(rphi_z-uphi < -TMath::Pi()) - 2.*TMath::Pi()*(rphi_z-uphi > TMath::Pi()) " ); TString lName = "../Jets/r11-dimu_nochs_v2.root"; TFile *lInput = TFile::Open(lName); TTree *lRegress = (TTree*)lInput ->Get("Flat"); Double_t lRWeight = 1.0; factory->AddRegressionTree( lRegress , lRWeight ); TCut lCut = "nbtag == 0"; //Cut to remove real MET //(rpt_z < 40 || (rpt_z > 40 && rpt_z+u1 < 40)) && nbtag == 0 "; ==> stronger cut to remove Real MET factory->PrepareTrainingAndTestTree( lCut, "nTrain_Regression=0:nTest_Regression=0:SplitMode=Block:NormMode=NumEvents:!V" ); // Boosted Decision Trees factory->BookMethod( TMVA::Types::kBDT, "RecoilPhiRegress_data_clean2_njet", "!H:!V:VarTransform=None:nEventsMin=200:NTrees=100:BoostType=Grad:Shrinkage=0.1:MaxDepth=100:NNodesMax=100000:UseYesNoLeaf=F:nCuts=2000");//MaxDepth=100 factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; //if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void REG::Loop() { if (fChain == 0) return; Long64_t nentries = fChain->GetEntriesFast(); Long64_t nbytes = 0, nb = 0; TMVA::Tools::Instance(); std::cout << std::endl; std::cout << "==> Start TMVARegression" << std::endl; TString outfileName( "TMVAReg.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "TMVARegression", outputFile, "!V:!Silent:Color:DrawProgressBar" ); factory->AddVariable( "l1Pt", "Variable 1", "units", 'F' ); factory->AddVariable( "l1Eta", "Variable 2", "units", 'F' ); factory->AddVariable( "l1Phi", "Variable 1", "units", 'F' ); // factory->AddVariable( "RhoL1", "Variable 2", "units", 'F' ); factory->AddTarget( "ratio" ); factory->AddRegressionTree(fChain, 1.0 ); TCut mycut = ""; factory->PrepareTrainingAndTestTree( mycut, "nTrain_Regression=10000:nTest_Regression=0:SplitMode=Block:NormMode=NumEvents:!V" ); factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" ); factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events // factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs // factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVARegression is done!" << std::endl; delete factory; }
void TMVAClassification( TString myMethodList = "" ) { // TString curDynamicPath( gSystem->GetDynamicPath() ); // gSystem->SetDynamicPath( "/usr/local/bin/root/bin:" + curDynamicPath ); // TString curIncludePath(gSystem->GetIncludePath()); // gSystem->SetIncludePath( " -I /usr/local/bin/root/include " + curIncludePath ); // // load TMVA shared library created in local release: for MAC OSX // if (TString(gSystem->GetBuildArch()).Contains("macosx") ) gSystem->Load( "libTMVA.so" ); // gSystem->Load( "libTMVA" ); // TMVA::Tools::Instance(); // // welcome the user // TMVA::gTools().TMVAWelcomeMessage(); // TMVAGlob::SetTMVAStyle(); // // this loads the library // TMVA::Tools::Instance(); //--------------------------------------------------------------- // default MVA methods to be trained + tested std::map<std::string,int> Use; Use["Cuts"] = 1; // Use["Likelihood"] = 1; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // Create a new root output file. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] // factory->AddVariable( "myvar1 := var1+var2", 'F' ); // factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ); // factory->AddVariable( "var3", "Variable 3", "units", 'F' ); // factory->AddVariable( "var4", "Variable 4", "units", 'F' ); factory->AddVariable("deltaEta := deta", 'F'); factory->AddVariable("deltaPhi := dphi", 'F'); factory->AddVariable("sigmaIetaIeta := sieie", 'F'); factory->AddVariable("HoverE := hoe", 'F'); factory->AddVariable("trackIso := trackiso", 'F'); factory->AddVariable("ecalIso := ecaliso", 'F'); factory->AddVariable("hcalIso := hcaliso", 'F'); //factory->AddVariable("nMissingHits := misshits", 'I'); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables factory->AddSpectator( "et", 'F' ); factory->AddSpectator( "eta", 'F' ); factory->AddSpectator( "phi", 'F' ); // read training and test data TFile *input = TFile::Open( "SigElectrons.root" ); TFile *inputB = TFile::Open( "BkgElectrons.root" ); std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl; TTree *signal = (TTree*)input->Get("ntuple"); TTree *background = (TTree*)inputB->Get("ntuple"); factory->AddSignalTree ( signal, 1.0 ); factory->AddBackgroundTree( background, 1.0 ); // This would set individual event weights (the variables defined in the // expression need to exist in the original TTree) // for signal : factory->SetSignalWeightExpression("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); //factory->SetBackgroundWeightExpression("weight"); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; TCut mycutb = ""; // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // If no numbers of events are given, half of the events in the tree are used for training, and // the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); // ---- Book MVA methods // // please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); // Likelihood if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // gROOT->ProcessLine(".x /usr/local/bin/root/tmva/test/correlations.C"); gROOT->ProcessLine(".x /usr/local/bin/root/tmva/test/variables.C"); }
//////////////////////////////////////////////////////////////////////////////// /// Main /// //////////////////////////////////////////////////////////////////////////////// void TrainRegressionFJ(TString myMethodList="") { gROOT->SetBatch(1); gROOT->LoadMacro("HelperFunctions.h" ); // make functions visible to TTreeFormula if (!TString(gROOT->GetVersion()).Contains("5.34")) { std::cout << "INCORRECT ROOT VERSION! Please use 5.34:" << std::endl; std::cout << "source /uscmst1/prod/sw/cms/slc5_amd64_gcc462/lcg/root/5.34.02-cms/bin/thisroot.csh" << std::endl; std::cout << "Return without doing anything." << std::endl; return; } //TString curDynamicPath( gSystem->GetDynamicPath() ); //gSystem->SetDynamicPath( "../lib:" + curDynamicPath ); //TString curIncludePath(gSystem->GetIncludePath()); //gSystem->SetIncludePath( " -I../include " + curIncludePath ); // Load the library TMVA::Tools::Instance(); //-------------------------------------------------------------------------- // Default MVA methods to be trained + tested std::map<std::string, int> Use; // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDEFoam"] = 1; Use["KNN"] = 1; // // --- Linear Discriminant Analysis Use["LD"] = 1; // // --- Function Discriminant analysis Use["FDA_GA"] = 1; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; // // --- Neural Network Use["MLP"] = 1; // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 1; Use["BDT1"] = 0; Use["BDTG"] = 0; Use["BDTG1"] = 0; //-------------------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVARegression" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } //-------------------------------------------------------------------------- // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVARegFJ.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // // The first argument is the base of the name of all the // weightfiles in the directory weights/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVARegressionFJ", outputFile, "!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=Regression" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; const std::vector<std::string> & inputExpressions = GetInputExpressionsFJReg(); const std::vector<std::string> & inputExpressionLabels = GetInputExpressionLabelsFJReg(); assert(inputExpressions.size() == inputExpressionLabels.size()); // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] //factory->AddVariable( "var1", "Variable 1", "units", 'F' ); //factory->AddVariable( "var2", "Variable 2", "units", 'F' ); for (UInt_t iexpr=0; iexpr!=inputExpressions.size(); iexpr++){ Label label = MakeLabel(inputExpressionLabels.at(iexpr)); TString expr = inputExpressions.at(iexpr); factory->AddVariable(expr, label.xlabel, label.unit, label.type); } // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables //factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); //factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // Add the variable carrying the regression target //factory->AddTarget( "fvalue" ); factory->AddTarget( "fathFilterJets_genPt" ); // It is also possible to declare additional targets for multi-dimensional regression, ie: // -- factory->AddTarget( "fvalue2" ); // BUT: this is currently ONLY implemented for MLP //-------------------------------------------------------------------------- // Read training and test data TFile *input(0); TString dirname = "skim_ZnnH_regression_fj/"; TString prefix = "skim_"; TString suffix = ".root"; TTree *regTrainTree(0), *regTestTree(0); std::vector<std::string> processes; processes.push_back("ZnnH110"); processes.push_back("ZnnH115"); processes.push_back("ZnnH120"); processes.push_back("ZnnH125"); processes.push_back("ZnnH130"); processes.push_back("ZnnH135"); processes.push_back("ZnnH140"); processes.push_back("ZnnH145"); processes.push_back("ZnnH150"); #ifdef USE_WH processes.push_back("WlnH110"); processes.push_back("WlnH115"); processes.push_back("WlnH120"); processes.push_back("WlnH125"); processes.push_back("WlnH130"); processes.push_back("WlnH135"); processes.push_back("WlnH140"); processes.push_back("WlnH145"); processes.push_back("WlnH150"); #endif std::vector<TFile *> files; for (UInt_t i=0; i<processes.size(); i++){ std::string process = processes.at(i); input = (TFile*) TFile::Open(dirname + prefix + process + suffix, "READ"); if (!input) { std::cout << "ERROR: Could not open input file." << std::endl; exit(1); } std::cout << "--- TMVARegression : Using input file: " << input->GetName() << std::endl; files.push_back(input); // --- Register the regression tree regTrainTree = (TTree*) input->Get("tree_train"); regTestTree = (TTree*) input->Get("tree_test"); // Global event weights per tree (see below for setting event-wise weights) Double_t regWeight = 1.0; // You can add an arbitrary number of regression trees factory->AddRegressionTree(regTrainTree, regWeight, TMVA::Types::kTraining); factory->AddRegressionTree(regTestTree , regWeight, TMVA::Types::kTesting ); } // Set individual event weights (the variables must exist in the original TTree) //factory->SetWeightExpression( "var1", "Regression" ); // Apply additional cuts on the signal and background samples (can be different) TCut mycut = "fathFilterJets_genPt>10 && fathFilterJets_pt>15 && abs(fathFilterJets_eta)<2.5"; // this is to avoid 3rd filter jet without gen match //TCut mycut = "hJet_genPt[0] > 0. && hJet_genPt[1] > 0. && hJet_csv[0] > 0. && hJet_csv[1] > 0. && hJet_pt[0] > 20. && hJet_pt[1] > 20. && abs(hJet_eta[0])<2.5 && abs(hJet_eta[1])<2.5"; // Tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycut, "V:nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents" ); // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=Random:!V" ); // --- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethodCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // PDE - RS method if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" ); // And the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // Linear discriminant if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "!H:!V:VarTransform=None" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); // Neural network (MLP) if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" ); // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDT"]) factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:V:NTrees=100:nEventsMin=30:NodePurityLimit=0.5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" ); //"!H:V:NTrees=60:nEventsMin=20:NodePurityLimit=0.5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30:DoBoostMonitor" ); if (Use["BDT1"]) factory->BookMethod( TMVA::Types::kBDT, "BDT1", "!H:V:NTrees=100:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" ); if (Use["BDTG"]) factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:V:NTrees=2000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.7:nCuts=200:MaxDepth=3:NNodesMax=15" ); if (Use["BDTG1"]) factory->BookMethod( TMVA::Types::kBDT, "BDTG1", "!H:V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:MaxDepth=3:NNodesMax=15" ); //-------------------------------------------------------------------------- // Train MVAs using the set of training events factory->TrainAllMethods(); // --- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // --- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); //-------------------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVARegression is done!" << std::endl; for (UInt_t i=0; i<files.size(); i++) files.at(i)->Close(); delete outputFile; delete factory; // Launch the GUI for the root macros //gROOT->SetMacroPath( "$ROOTSYS/tmva/macros/" ); //gROOT->Macro( "$ROOTSYS/tmva/macros/TMVAlogon.C" ); //gROOT->LoadMacro( "$ROOTSYS/tmva/macros/TMVAGui.C" ); //if (!gROOT->IsBatch()) TMVARegGui( outfileName ); }
void TMVAClassification( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) // this loads the library TMVA::Tools::Instance(); //--------------------------------------------------------------- // default MVA methods to be trained + tested std::map<std::string,int> Use; Use["Cuts"] = 0; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // --- Use["Likelihood"] = 1; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // --- Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDERSkNN"] = 0; // depreciated until further notice Use["PDEFoam"] = 0; // -- Use["KNN"] = 0; // --- Use["HMatrix"] = 0; Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; Use["LD"] = 0; // --- Use["FDA_GA"] = 0; Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // --- Use["MLP"] = 0; // this is the recommended ANN Use["MLPBFGS"] = 0; // recommended ANN with optional training method Use["MLPBNN"] = 0; // recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // *** missing Use["TMlpANN"] = 0; // --- Use["SVM"] = 0; // --- Use["BDT"] = 1; Use["BDTD"] = 0; Use["BDTG"] = 0; Use["BDTB"] = 0; // --- Use["RuleFit"] = 0; // --- Use["Plugin"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 0; } } // Create a new root output file. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] // factory->AddVariable( "myvar1 := var1+var2", 'F' ); // factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ); // factory->AddVariable( "var3", "Variable 3", "units", 'F' ); // factory->AddVariable( "var4", "Variable 4", "units", 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables // factory->AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ); // factory->AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ); // read training and test data factory->AddVariable("CScostheta",'F'); factory->AddVariable("ZRapidity",'F'); factory->AddVariable("REDmet",'F'); if (ReadDataFromAsciiIFormat) { // load the signal and background event samples from ascii files // format in file must be: // var1/F:var2/F:var3/F:var4/F // 0.04551 0.59923 0.32400 -0.19170 // ... TString datFileS = "tmva_example_sig.dat"; TString datFileB = "tmva_example_bkg.dat"; factory->SetInputTrees( datFileS, datFileB ); } else { // load the signal and background event samples from ROOT trees TString fname = "./tmva_class_example.root"; TString fname_Data7TeV_DoubleElectron2011B_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleElectron2011B_0.root"; TString fname_Data7TeV_MuEG2011B_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_MuEG2011B_0.root"; TString fname_ZH125 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH125.root"; TString fname_SingleT_tW = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleT_tW.root"; TString fname_SingleT_s = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleT_s.root"; TString fname_Data7TeV_DoubleMu2011B_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleMu2011B_0.root"; TString fname_Data7TeV_DoubleElectron2011A_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleElectron2011A_0.root"; TString fname_ZH135 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH135.root"; TString fname_DYJetsToLL = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//DYJetsToLL.root"; TString fname_ZH115 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH115.root"; TString fname_SingleTbar_t = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleTbar_t.root"; TString fname_Data7TeV_DoubleElectron2011B_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleElectron2011B_1.root"; TString fname_Data7TeV_DoubleMu2011A_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleMu2011A_1.root"; TString fname_Data7TeV_MuEG2011A_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_MuEG2011A_1.root"; TString fname_TTJets = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//TTJets.root"; TString fname_SingleTbar_s = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleTbar_s.root"; TString fname_WJetsToLNu = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//WJetsToLNu.root"; TString fname_Data7TeV_DoubleElectron2011A_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleElectron2011A_1.root"; TString fname_ZZ = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZZ.root"; TString fname_ZH150 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH150.root"; TString fname_Data7TeV_MuEG2011B_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_MuEG2011B_1.root"; TString fname_WW = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//WW.root"; TString fname_ZH105 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH105.root"; TString fname_Data7TeV_DoubleMu2011A_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleMu2011A_0.root"; TString fname_SingleTbar_tW = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleTbar_tW.root"; TString fname_WZ = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//WZ.root"; TString fname_Data7TeV_MuEG2011A_0 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_MuEG2011A_0.root"; TString fname_Data7TeV_DoubleMu2011B_1 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//Data7TeV_DoubleMu2011B_1.root"; TString fname_ZH145 = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//ZH145.root"; TString fname_SingleT_t = "/tmp/chasco/INIT/HADD/TMVA/Trees_FUSION2/ZZ_vs_nonZZ//SingleT_t.root"; if (gSystem->AccessPathName( fname )) // file does not exist in local directory gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root"); TFile *input_Data7TeV_DoubleElectron2011B_0 = TFile::Open( fname_Data7TeV_DoubleElectron2011B_0 ); TFile *input_Data7TeV_MuEG2011B_0 = TFile::Open( fname_Data7TeV_MuEG2011B_0 ); TFile *input_ZH125 = TFile::Open( fname_ZH125 ); TFile *input_SingleT_tW = TFile::Open( fname_SingleT_tW ); TFile *input_SingleT_s = TFile::Open( fname_SingleT_s ); TFile *input_Data7TeV_DoubleMu2011B_0 = TFile::Open( fname_Data7TeV_DoubleMu2011B_0 ); TFile *input_Data7TeV_DoubleElectron2011A_0 = TFile::Open( fname_Data7TeV_DoubleElectron2011A_0 ); TFile *input_ZH135 = TFile::Open( fname_ZH135 ); TFile *input_DYJetsToLL = TFile::Open( fname_DYJetsToLL ); TFile *input_ZH115 = TFile::Open( fname_ZH115 ); TFile *input_SingleTbar_t = TFile::Open( fname_SingleTbar_t ); TFile *input_Data7TeV_DoubleElectron2011B_1 = TFile::Open( fname_Data7TeV_DoubleElectron2011B_1 ); TFile *input_Data7TeV_DoubleMu2011A_1 = TFile::Open( fname_Data7TeV_DoubleMu2011A_1 ); TFile *input_Data7TeV_MuEG2011A_1 = TFile::Open( fname_Data7TeV_MuEG2011A_1 ); TFile *input_TTJets = TFile::Open( fname_TTJets ); TFile *input_SingleTbar_s = TFile::Open( fname_SingleTbar_s ); TFile *input_WJetsToLNu = TFile::Open( fname_WJetsToLNu ); TFile *input_Data7TeV_DoubleElectron2011A_1 = TFile::Open( fname_Data7TeV_DoubleElectron2011A_1 ); TFile *input_ZZ = TFile::Open( fname_ZZ ); TFile *input_ZH150 = TFile::Open( fname_ZH150 ); TFile *input_Data7TeV_MuEG2011B_1 = TFile::Open( fname_Data7TeV_MuEG2011B_1 ); TFile *input_WW = TFile::Open( fname_WW ); TFile *input_ZH105 = TFile::Open( fname_ZH105 ); TFile *input_Data7TeV_DoubleMu2011A_0 = TFile::Open( fname_Data7TeV_DoubleMu2011A_0 ); TFile *input_SingleTbar_tW = TFile::Open( fname_SingleTbar_tW ); TFile *input_WZ = TFile::Open( fname_WZ ); TFile *input_Data7TeV_MuEG2011A_0 = TFile::Open( fname_Data7TeV_MuEG2011A_0 ); TFile *input_Data7TeV_DoubleMu2011B_1 = TFile::Open( fname_Data7TeV_DoubleMu2011B_1 ); TFile *input_ZH145 = TFile::Open( fname_ZH145 ); TFile *input_SingleT_t = TFile::Open( fname_SingleT_t ); std::cout << "--- TMVAClassification : Using input_Data7TeV_DoubleElectron2011B_0 file: " << input_Data7TeV_DoubleElectron2011B_0->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_Data7TeV_MuEG2011B_0 file: " << input_Data7TeV_MuEG2011B_0->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_ZH125 file: " << input_ZH125->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_SingleT_tW file: " << input_SingleT_tW->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_SingleT_s file: " << input_SingleT_s->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_Data7TeV_DoubleMu2011B_0 file: " << input_Data7TeV_DoubleMu2011B_0->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_Data7TeV_DoubleElectron2011A_0 file: " << input_Data7TeV_DoubleElectron2011A_0->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_ZH135 file: " << input_ZH135->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_DYJetsToLL file: " << input_DYJetsToLL->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_ZH115 file: " << input_ZH115->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_SingleTbar_t file: " << input_SingleTbar_t->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_Data7TeV_DoubleElectron2011B_1 file: " << input_Data7TeV_DoubleElectron2011B_1->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_Data7TeV_DoubleMu2011A_1 file: " << input_Data7TeV_DoubleMu2011A_1->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_Data7TeV_MuEG2011A_1 file: " << input_Data7TeV_MuEG2011A_1->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_TTJets file: " << input_TTJets->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_SingleTbar_s file: " << input_SingleTbar_s->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_WJetsToLNu file: " << input_WJetsToLNu->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_Data7TeV_DoubleElectron2011A_1 file: " << input_Data7TeV_DoubleElectron2011A_1->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_ZZ file: " << input_ZZ->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_ZH150 file: " << input_ZH150->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_Data7TeV_MuEG2011B_1 file: " << input_Data7TeV_MuEG2011B_1->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_WW file: " << input_WW->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_ZH105 file: " << input_ZH105->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_Data7TeV_DoubleMu2011A_0 file: " << input_Data7TeV_DoubleMu2011A_0->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_SingleTbar_tW file: " << input_SingleTbar_tW->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_WZ file: " << input_WZ->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_Data7TeV_MuEG2011A_0 file: " << input_Data7TeV_MuEG2011A_0->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_Data7TeV_DoubleMu2011B_1 file: " << input_Data7TeV_DoubleMu2011B_1->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_ZH145 file: " << input_ZH145->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input_SingleT_t file: " << input_SingleT_t->GetName() << std::endl; TTree *signal_ZH145 = (TTree*)input_ZH145->Get("tmvatree"); TTree *background_ZZ = (TTree*)input_ZZ->Get("tmvatree"); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // ====== register trees ==================================================== // // the following method is the prefered one: // you can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal_ZH145, 1.0 ); factory->AddBackgroundTree( background_ZZ, 1.0 ); // To give different trees for training and testing, do as follows: // factory->AddSignalTree( signal_ZH145TrainingTree, signal_ZH145TrainWeight, "Training" ); // factory->AddSignalTree( signal_ZH145TestTree, signal_ZH145TestWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // // // --- begin ---------------------------------------------------------- // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables // Float_t treevars[4]; // for (Int_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (Int_t i=0; i<signal->GetEntries(); i++) { // signal->GetEntry(i); // for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < signal->GetEntries()/2) factory->AddSignalTrainingEvent( vars, signalWeight ); // else factory->AddSignalTestEvent ( vars, signalWeight ); // } // // for (Int_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (Int_t i=0; i<background->GetEntries(); i++) { // background->GetEntry(i); // for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight ); // else factory->AddBackgroundTestEvent ( vars, backgroundWeight ); // } // // --- end ------------------------------------------------------------ // // ====== end of register trees ============================================== } // This would set individual event weights (the variables defined in the // expression need to exist in the original TTree) // for signal : factory->SetSignalWeightExpression("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); factory->SetBackgroundWeightExpression("Eweight*XS*BR*LUM*(1/NGE)*(B2/B3)*CUT"); factory->SetSignalWeightExpression("Eweight*XS*BR*LUM*(1/NGE)*(B2/B3)*CUT"); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = "(CUT>2)"; TCut mycutb = "(CUT>2)"; // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycuts, "SplitMode=random:!V" ); // "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // If no numbers of events are given, half of the events in the tree are used for training, and // the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); // ---- Book MVA methods // // please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:Seed=0:EffSel:Steps=50:Cycles=3:PopSize=1000:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // test the decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // test the new kernel density estimator if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // test the mixed splines and kernel density estimator (depending on which variable) if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSkNN"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSkNN", "!H:!V:VolumeRangeMode=kNN:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:CutNmin=T:Nmin=100:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" ); // Fisher discriminant if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=60:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2"); // Linear discriminant (same as Fisher) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=1000:nEventsMin=400:MaxDepth=6:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=1000:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=1000:nEventsMin=400:MaxDepth=6:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // As an example how to use the ROOT plugin mechanism, book BDT via // plugin mechanism if (Use["Plugin"]) { // // first the plugin has to be defined, which can happen either through the following line in the local or global .rootrc: // // # plugin handler plugin name(regexp) class to be instanciated library constructor format // Plugin.TMVA@@MethodBase: ^BDT TMVA::MethodBDT TMVA.1 "MethodBDT(TString,TString,DataSet&,TString)" // // or by telling the global plugin manager directly gPluginMgr->AddHandler("TMVA@@MethodBase", "BDT", "TMVA::MethodBDT", "TMVA.1", "MethodBDT(TString,TString,DataSet&,TString)"); factory->BookMethod( TMVA::Types::kPlugins, "BDT", "!H:!V:NTrees=1000:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=50" ); } // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros gROOT->ProcessLine(".q;"); }
void Classification() { TMVA::Tools::Instance(); TMVA::PyMethodBase::PyInitialize(); TString outfileName("TMVA.root"); TFile *outputFile = TFile::Open(outfileName, "RECREATE"); TMVA::Factory *factory = new TMVA::Factory("TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification"); factory->AddVariable("myvar1 := var1+var2", 'F'); factory->AddVariable("myvar2 := var1-var2", "Expression 2", "", 'F'); factory->AddVariable("var3", "Variable 3", "units", 'F'); factory->AddVariable("var4", "Variable 4", "units", 'F'); factory->AddSpectator("spec1 := var1*2", "Spectator 1", "units", 'F'); factory->AddSpectator("spec2 := var1*3", "Spectator 2", "units", 'F'); TString fname = "./tmva_class_example.root"; if (gSystem->AccessPathName(fname)) // file does not exist in local directory gSystem->Exec("curl -O http://root.cern.ch/files/tmva_class_example.root"); TFile *input = TFile::Open(fname); std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl; // --- Register the training and test trees TTree *tsignal = (TTree *)input->Get("TreeS"); TTree *tbackground = (TTree *)input->Get("TreeB"); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // You can add an arbitrary number of signal or background trees factory->AddSignalTree(tsignal, signalWeight); factory->AddBackgroundTree(tbackground, backgroundWeight); // Set individual event weights (the variables must exist in the original TTree) factory->SetBackgroundWeightExpression("weight"); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the factory how to use the training and testing events factory->PrepareTrainingAndTestTree(mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=0:nTest_Background=0:SplitMode=Random:NormMode=NumEvents:!V"); /////////////////// //Booking // /////////////////// // Boosted Decision Trees //PyMVA methods factory->BookMethod(TMVA::Types::kPyRandomForest, "PyRandomForest", "!V:NEstimators=150:Criterion=gini:MaxFeatures=auto:MaxDepth=3:MinSamplesLeaf=1:MinWeightFractionLeaf=0:Bootstrap=kTRUE"); factory->BookMethod(TMVA::Types::kPyAdaBoost, "PyAdaBoost", "!V:BaseEstimator=None:NEstimators=100:LearningRate=1:Algorithm=SAMME.R:RandomState=None"); factory->BookMethod(TMVA::Types::kPyGTB, "PyGTB", "!V:NEstimators=150:Loss=deviance:LearningRate=0.1:Subsample=1:MaxDepth=6:MaxFeatures='auto'"); // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; }
void mytmvaClass( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) //-------------------------------- TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 0; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 0; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 1; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (Not of much interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" //Syntax --> "factory->AddVariable( "VarName", "UserDefinedLabel", "units", 'VarType' );" //Where VarType refers to float, int etc. Float_t nhiggsJetSubjets; Float_t njets; Float_t deltaPhiWH; Float_t jetPt[11]; Float_t jetEta[11]; Float_t jetPhi[11]; Float_t jetE[11]; Float_t higgsJetSubjetEta[3]; Float_t higgsJetSubjetPhi[3]; Float_t higgsJetSubjetPt[3]; Float_t higgsJetSubjetE[3]; Float_t Wphi; Float_t fatjetPt[15]; Float_t fatjetEta[15]; Float_t fatjetPhi[15]; Float_t fatjetE[15]; Float_t DeltaPhiWH; TLorentzVector fatjet(0,0,0,0); TLorentzVector subjet1(0,0,0,0); TLorentzVector subjet2(0,0,0,0); TLorentzVector subjet3(0,0,0,0); TLorentzVector akt[15]; fatjet.SetPtEtaPhiE(fatjetPt[0],fatjetEta[0],fatjetPhi[0], fatjetE[0]); // deltaPhiWH = std::fabs(Wphi-fatjetPhi[0]); // if(deltaPhiWH>TMath::Pi()){ std::fabs(deltaPhiWH = 2*TMath::Pi()-deltaPhiWH);} factory->AddVariable( "deltaPhiWH", "deltaPhiWH", "", 'F' ); factory->AddVariable( "jetPt.array[0]", "jetPt.array[0]", "", 'F' ); factory->AddVariable( "fatjetPt[0].array[0]", "fatjetPt.array[0]", "", 'F' ); //factory->AddVariable( "DeltaR[3][13]", "DeltaR[3][13]", "", 'F' ); // factory->AddSpectator( "nhiggsJetSubjets", "nhiggsJetSubjets", "", 'F'); // factory->AddSpectator( "njets", "", "njets", 'I'); // factory->AddSpectator( "jetPt[0]", "jetPt[0]", "", 'F'); // factory->AddSpectator( "jetPt[1]", "jetPt[1]", "", 'F'); // factory->AddSpectator( "jetPt[2]", "jetPt[2]", "", 'F'); // factory->AddSpectator( "jetPt[3]", "jetPt[3]", "", 'F'); // factory->AddSpectator( "jetPt[4]", "jetPt[4]", "", 'F'); // factory->AddSpectator( "jetPt[5]", "jetPt[5]", "", 'F'); // factory->AddSpectator( "jetPt[6]", "jetPt[6]", "", 'F'); // factory->AddSpectator( "jetPt[7]", "jetPt[7]", "", 'F'); // factory->AddSpectator( "jetPt[8]", "jetPt[8]", "", 'F'); // factory->AddSpectator( "jetPt[9]", "jetPt[9]", "", 'F'); // factory->AddSpectator( "jetPt[10]", "jetPt[10]", "", 'F'); // factory->AddSpectator( "jetEta[0]", "jetEta[0]", "", 'F'); // factory->AddSpectator( "jetEta[1]", "jetEta[1]", "", 'F'); // factory->AddSpectator( "jetEta[2]", "jetEta[2]", "", 'F'); // factory->AddSpectator( "jetEta[3]", "jetEta[3]", "", 'F'); // factory->AddSpectator( "jetEta[4]", "jetEta[4]", "", 'F'); // factory->AddSpectator( "jetEta[5]", "jetEta[5]", "", 'F'); // factory->AddSpectator( "jetEta[6]", "jetEta[6]", "", 'F'); // factory->AddSpectator( "jetEta[7]", "jetEta[7]", "", 'F'); // factory->AddSpectator( "jetEta[8]", "jetEta[8]", "", 'F'); // factory->AddSpectator( "jetEta[9]", "jetEta[9]", "", 'F'); // factory->AddSpectator( "jetEta[10]", "jetEta[10]", "", 'F'); // factory->AddSpectator( "jetPhi[0]", "jetPhi[0]", "", 'F'); // factory->AddSpectator( "jetPhi[1]", "jetPhi[1]", "", 'F'); // factory->AddSpectator( "jetPhi[2]", "jetPhi[2]", "", 'F'); // factory->AddSpectator( "jetPhi[3]", "jetPhi[3]", "", 'F'); // factory->AddSpectator( "jetPhi[4]", "jetPhi[4]", "", 'F'); // factory->AddSpectator( "jetPhi[5]", "jetPhi[5]", "", 'F'); // factory->AddSpectator( "jetPhi[6]", "jetPhi[6]", "", 'F'); // factory->AddSpectator( "jetPhi[7]", "jetPhi[7]", "", 'F'); // factory->AddSpectator( "jetPhi[8]", "jetPhi[8]", "", 'F'); // factory->AddSpectator( "jetPhi[9]", "jetPhi[9]", "", 'F'); // factory->AddSpectator( "jetPhi[10]", "jetPhi[10]", "", 'F'); // factory->AddSpectator("jetE[0]", "jetE[0]", "", 'F'); // factory->AddSpectator("jetE[1]", "jetE[1]", "", 'F'); // factory->AddSpectator("jetE[2]", "jetE[2]", "", 'F'); // factory->AddSpectator("higgsJetSubjetEta[0]","higgsJetSubjetEta[0]", "", 'F'); // factory->AddSpectator("higgsJetSubjetEta[1]","higgsJetSubjetEta[1]", "", 'F'); // factory->AddSpectator("higgsJetSubjetEta[2]","higgsJetSubjetEta[2]", "", 'F'); // factory->AddSpectator("higgsJetSubjetPhi[0]","higgsJetSubjetPhi[0]", "", 'F'); // factory->AddSpectator("higgsJetSubjetPhi[1]","higgsJetSubjetPhi[1]", "", 'F'); // factory->AddSpectator("higgsJetSubjetPhi[2]","higgsJetSubjetPhi[2]", "", 'F'); // factory->AddSpectator("higgsJetSubjetPt[0]","higgsJetSubjetPt[0]", "", 'F'); // factory->AddSpectator("higgsJetSubjetPt[1]","higgsJetSubjetPt[1]", "", 'F'); // factory->AddSpectator("higgsJetSubjetPt[2]","higgsJetSubjetPt[2]", "", 'F'); // factory->AddSpectator("higgsJetSubjetE[0]","higgsJetSubjetE[0]", "", 'F'); // factory->AddSpectator("higgsJetSubjetE[1]","higgsJetSubjetE[1]", "", 'F'); // factory->AddSpectator("higgsJetSubjetE[2]","higgsJetSubjetE[2]", "", 'F'); // factory->AddSpectator("Wphi", "Wphi", "" , 'F'); // factory->AddVariable("fatjetPt[0]", "fatjetPt[0]", "", 'F'); // factory->AddSpectator("fatjetPhi[0]", "fatjetPhi[0]", "", 'F'); // factory->AddSpectator("fatjetEta[0]", "fatjetEta[0]", "", 'F'); // factory->AddSpectator("fatjetE[0]", "fatjetE[0]", "", 'F'); // //if(std::fabs(Wphi-fatjetPhi[0])<TMath::Pi()){ // factory->AddVariable("abs(Wphi-fatjetPhi[0])", "DeltaPhiWH", "", 'F'); // //} // if ((abs(Wphi-fatjetPhi[0])>TMath::Pi())){ // factory->AddVariable("(abs(DeltaPhiWH = 2*(TMath::Pi())-DeltaPhiWH))", "DeltaPhiWH", "", 'F'); // } // Here, we add the lepton flag as a spectator so we can cut (further down) on lep == 0 for muons only or lep == 1 for electrons only. //factory->AddSpectator( "lepType[0]", "lepType[0]", "units", 'I'); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables // factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); // factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // --- Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) TString fsig_name_WH = "/Disk/speyside7/Grid/grid-files/rsmith/SummerProject/OverlapperInputFiles/PreCutsApplied/mc11_7TeV.128034.HerwigppPowHeg_Boosted_WH125lnubb.merge.NTUP_HSG5WH.Output_PreCuts-Original.root"; TString fback_name_TT = "/Disk/speyside7/Grid/grid-files/rsmith/SummerProject/OverlapperInputFiles/PreCutsApplied/TT_NewBranch2.output_MERGE_PreCuts-Original.root"; TString fback_name_Wbb = "/Disk/speyside7/Grid/grid-files/rsmith/SummerProject/OverlapperInputFiles/PreCutsApplied/mc11_7TeV.128861.SherpaWbb2J_lnu_Wpt100GeV_PreCuts-Original.root"; TFile *sig_input_WH = TFile::Open( fsig_name_WH ); TFile *back_input_TT = TFile::Open( fback_name_TT ); TFile *back_input_Wbb = TFile::Open( fback_name_Wbb); // --- Register the training and test trees TTree *signal_WH = (TTree*)sig_input_WH->Get("OutputTree"); TTree *background_TT = (TTree*)back_input_TT->Get("OutputTree"); TTree * background_Wbb = (TTree*)back_input_Wbb->Get("OutputTree"); // --- global event weights per tree (see below for setting event-wise weights) Double_t signalWeight_WH = 1; Double_t backgroundWeight_TT = 1; Double_t backgroundWeight_Wbb= 1; // --- Here can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal_WH, signalWeight_WH ); factory->AddBackgroundTree( background_TT, backgroundWeight_TT ); factory->AddBackgroundTree( background_Wbb, backgroundWeight_Wbb); Float_t nhiggsJetSubjets; Float_t njets; Float_t deltaPhiWH; Float_t jetPt[11]; Float_t jetEta[11]; Float_t jetPhi[11]; Float_t jetE[11]; Float_t higgsJetSubjetEta[3]; Float_t higgsJetSubjetPhi[3]; Float_t higgsJetSubjetPt[3]; Float_t higgsJetSubjetE[3]; Float_t Wphi; Float_t fatjetPt[15]; Float_t fatjetEta[15]; Float_t fatjetPhi[15]; Float_t fatjetE[15]; TLorentzVector fatjet(0,0,0,0); TLorentzVector subjet1(0,0,0,0); TLorentzVector subjet2(0,0,0,0); TLorentzVector subjet3(0,0,0,0); TLorentzVector akt[15]; fatjet.SetPtEtaPhiE(fatjetPt[0],fatjetEta[0],fatjetPhi[0], fatjetE[0]); deltaPhiWH = std::fabs(Wphi-fatjetPhi[0]); if(deltaPhiWH>TMath::Pi()){ std::fabs(deltaPhiWH = 2*TMath::Pi()-deltaPhiWH);} float deltaEta[3][11]; float deltaPhi[3][11]; float DeltaR[3][13]; for(int j = 0; j<nhiggsJetSubjets; j++){ for(int i = 0; i<njets; i++){ deltaEta[j][i] = std::fabs(higgsJetSubjetEta[j]-jetEta[i]); deltaPhi[j][i] = std::fabs(higgsJetSubjetPhi[j]-jetPhi[i]); DeltaR[j][i] = std::sqrt(deltaEta[j][i]*deltaEta[j][i]+deltaPhi[j][i]*deltaPhi[j][i]); } } for (int i=0; i < njets; i++) { akt[i].SetPtEtaPhiE(jetPt[i],jetEta[i],jetPhi[i], jetE[i]); } subjet1.SetPtEtaPhiE(higgsJetSubjetPt[0],higgsJetSubjetEta[0],higgsJetSubjetPhi[0], higgsJetSubjetE[0]); subjet2.SetPtEtaPhiE(higgsJetSubjetPt[1],higgsJetSubjetEta[1],higgsJetSubjetPhi[1], higgsJetSubjetE[1]); FatHiggs = subjet1 + subjet2; if (nhiggsJetSubjets>2){ subjet3.SetPtEtaPhiE(higgsJetSubjetPt[2],higgsJetSubjetEta[2],higgsJetSubjetPhi[2], higgsJetSubjetE[2]); } // To give different trees for training and testing, do: // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); // Set individual event weights (the variables must exist in the original TTree) // for signal : factory->SetSignalWeightExpression ("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("wei // --- Apply additional cuts on the signal and background samples (can be different) // PLEASE NOTE: If no cuts are required, do NOT leave any spaces between quotation marks, otherwise an error will occur. TCut mycuts = "deltaPhiWH>-800"; //Signal Tree Cuts TCut mycutb = "deltaPhiWH>-800"; //Background Tree Cuts //cout << "DeltaPhiWH: " << deltaPhiWH << endl; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:!V:NormMode=NumEvents"); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // --- Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[1]=0:CutRangeMax[1]=450:VarProp[1]=FMax:EffSel:Steps=60:Cycles=3:PopSize=2000:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=350:nEventsMin=100:MaxDepth=3:BoostType=Grad:Shrinkage=0.05:UseBaggedGrad:GradBaggingFraction=0.51:nCuts=65:PruneStrength=2:PruneMethod=ExpectedError:NNodesMax=7" ); //"!H:!V:NTrees=350:nEventsMin=100:MaxDepth=3:BoostType=Grad:Shrinkage=0.05:UseBaggedGrad:GradBaggingFraction=0.55:nCuts=50:PruneStrength=2:PruneMethod=ExpectedError:NNodesMax=10" ); //nCuts=20 && no pruning originally if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "NTrees=550:nEventsMin=110:MaxDepth=5.5:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=10:PruneStrength=6:PruneMethod=ExpectedError"); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=950:nEventsMin=20:MaxDepth=10:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=300:nEventsMin=20:MaxDepth=10:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=10:PruneStrength=6:PruneMethod=ExpectedError:VarTransform=Decorrelate" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("SigEffAt001","FitGA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void ZTMVAClassification( TString myMethodList = "" ) { //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 0; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 0; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 1; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 1; // uses Adaptive Boost Use["BDTG"] = 1; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 1; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); //factory->AddVariable( "maxpioneta", "maxpioneta", "", 'F' ); //factory->AddVariable( "minpioneta", "minpioneta", "", 'F' ); factory->AddVariable( "nTT", "nTT", "", 'F' ); // factory->AddVariable( "pidpimin", "pidpimin", "", 'F' ); // factory->AddVariable( "pidpimax", "pidpimax", "", 'F' ); factory->AddVariable( "normxpt", "normxpt", "", 'F' ); factory->AddVariable( "eta", "eta", "", 'F' ); //factory->AddVariable( "phi", "phi", "", 'F' ); // factory->AddVariable( "normptsum", "normptsum", "", 'F' ); //factory->AddVariable( "ptAsym", "ptAsym", "", 'F' ); //factory->AddVariable( "dphimax", "dphimax", "", 'F' ); //factory->AddVariable( "dphimin", "dphimin", "", 'F' ); //factory->AddVariable( "drmax", "drmax", "", 'F' ); // factory->AddVariable( "drmin", "drmin", "", 'F' ); // factory->AddVariable( "normpionp", "normpionp", "", 'F' ); factory->AddVariable( "normminpionpt", "normminpionpt", "", 'F' ); //factory->AddVariable( "normminpionp", "normminpionp", "", 'F' ); factory->AddVariable( "normmaxpionpt", "normmaxpionpt", "", 'F' ); // factory->AddVariable( "normptj", "normptj", "", 'F' ); //factory->AddVariable( "jmasspull", "jmasspull", "", 'F' ); //factory->AddVariable( "vchi2dof", "vchi2dof", "", 'F' ); // factory->AddVariable("maxchi2","maxchi2","", 'F'); // factory->AddVariable("normr","normr","", 'F'); // factory->AddVariable("normq","normq","", 'F'); //factory->AddVariable("normminm","normminm","", 'F'); factory->AddVariable("logipmax","logipmax","", 'F'); factory->AddVariable("logipmin","logipmin","", 'F'); factory->AddVariable("logfd","logfd",'F'); factory->AddVariable("logvd","logvd",'F'); //factory->AddVariable("pointAngle","pointingAngle",'F'); factory->AddVariable("logvpi","",'F'); //factory->AddVariable("logmaxprob","",'F'); //factory->AddVariable("logminprob","",'F'); factory->AddSpectator( "mReFit", "mReFit", "", 'D' ); // factory->AddSpectator( "Qdecay", "Qdecay", "",'F' ); // factory->AddSpectator( "m23", "m23", "",'F' ); // TFile * input_Background = new TFile("../back.root"); TFile * input_Signal = new TFile("../cmx12.root"); TFile * input_Background = new TFile("../background12.root"); std::cout << "--- TMVAClassification : Using input file for signal : " << input_Signal->GetName() << std::endl; std::cout << "--- TMVAClassification : Using input file for backgound : " << input_Background->GetName() << std::endl; // --- Register the training and test trees TTree *signal = (TTree*)input_Signal->Get("psiCand"); TTree *background = (TTree*)input_Background->Get("psiCand"); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = "QDecay < 300&&fdchi2 > 300"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = "QDecay < 300&&fdchi2> 300"; // for example: TCut mycutb = "abs(var1)<0.5"; factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]){ // factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); //factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=Norm:NCycles=600:HiddenLayers=N+5:TestRate=5" ); // factory->BookMethod( TMVA::Types::kMLP, "MLPCE", "H:!V:NeuronType=sigmoid:VarTransform=Norm:NCycles=600:HiddenLayers=N+5:TestRate=5:EstimatorType=CE" ); factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=sigmoid:VarTransform=Norm:NCycles=600:HiddenLayers=9:TestRate=5:EstimatorType=CE" ); // factory->BookMethod( TMVA::Types::kMLP, "MLPCE83", "H:!V:NeuronType=tanh:VarTransform=Norm:NCycles=600:HiddenLayers=8,3:TestRate=5:EstimatorType=CE" ); } if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) { // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5" ); // factory->BookMethod( TMVA::Types::kBDT, "BDTGI", // "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5:SeparationType=GiniIndexWithLaplace" ); // factory->BookMethod( TMVA::Types::kBDT, "BDTG6", // "!H:!V:NTrees=600:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.4:nCuts=20:NNodesMax=6" ); //factory->BookMethod( TMVA::Types::kBDT, "BDTG2", // "!H:!V:NTrees=800:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.4:nCuts=20:NNodesMax=6" ); factory->BookMethod( TMVA::Types::kBDT, "BDTG3", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.4:nCuts=20:NNodesMax=6" ); // factory->BookMethod( TMVA::Types::kBDT, "BDTG4", // "!H:!V:NTrees=1200:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.4:nCuts=20:NNodesMax=6" ); // factory->BookMethod( TMVA::Types::kBDT, "BDTG5", // "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.4:nCuts=20:NNodesMax=5" ); } if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:nEventsMin=150:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","GA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros // if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
//require mumucl>0.6 //opening angle >10 //coplanarity >90 //pang<90 void TMVAClassification_cc1pcoh_bdt_ver6noveract( TString myMethodList = "" ) { //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // to get access to the GUI and all tmva macros TString thisdir = gSystem->DirName(gInterpreter->GetCurrentMacroName()); gROOT->SetMacroPath(thisdir + ":" + gROOT->GetMacroPath()); gROOT->ProcessLine(".L TMVAGui.C"); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 1; Use["CutsD"] = 1; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 1; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 1; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 1; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 1; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 1; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 1; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 1; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 1; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 1; // // --- Boosted Decision Trees Use["BDT"] = 1; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 1; // --------------------------------------------------------------- // Choose method std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // --------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA_cc1pcoh_bdt_ver6noveract.root" );//newchange TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification_ver6noveract", outputFile,//newchange "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // Add variable //sprintf(select, "Ntrack==2&&mumucl>0.6&&pmucl>0.25&&pang<90&&muang_t<15 && veract*7.66339869e-2<34"); //factory->AddVariable( "Ntrack", 'F' ); factory->AddVariable( "mumucl", 'F' ); factory->AddVariable( "pmucl", 'F' ); factory->AddVariable( "pang_t", 'F' );//use pang instead of pang_t factory->AddVariable( "muang_t", 'F' ); //factory->AddVariable( "veract", 'F' ); factory->AddVariable( "ppe", 'F'); factory->AddVariable( "mupe", 'F'); factory->AddVariable( "range", 'F'); factory->AddVariable( "coplanarity", 'F'); factory->AddVariable( "opening", 'F');//newadd // Add spectator factory->AddSpectator( "fileIndex", 'I' ); factory->AddSpectator( "nuE", 'F' ); factory->AddSpectator( "inttype", 'I' ); factory->AddSpectator( "norm", 'F' ); factory->AddSpectator( "totcrsne", 'F' ); factory->AddSpectator( "veract", 'F' ); factory->AddSpectator( "pang", 'F' ); factory->AddSpectator( "mupdg", 'I' ); factory->AddSpectator( "ppdg", 'I' ); // --------------------------------------------------------------- // --- Get weight TString fratioStr="/home/kikawa/macros/nd34_tuned_11bv3.1_250ka.root"; // --------------------------------------------------------------- // --- Add sample TString fsignalStr="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmva/pm_merged_ccqe_tot.root"; TString fbarStr="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmva/pmbar_merged_ccqe.root"; TString fbkgStr="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmva/wall_merged_ccqe_tot.root"; TString fbkg2Str="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmva/ingrid_merged_nd3_ccqe_tot.root"; /*TString fsignalStr="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmvafix/pm_merged_ccqe_tot.root"; TString fbarStr="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmvafix/pmbar_merged_ccqe.root"; TString fbkgStr="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmvafix/wall_merged_ccqe_tot.root"; TString fbkg2Str="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmvafix/ingrid_merged_nd3_ccqe_tot.root";*/ TFile *pfileSignal = new TFile(fsignalStr); TFile *pfileBar = new TFile(fbarStr); TFile *pfileBkg = new TFile(fbkgStr); TFile *pfileBkg2 = new TFile(fbkg2Str); TFile *pfileRatio = new TFile(fratioStr); TTree *ptree_sig = (TTree*)pfileSignal->Get("tree"); TTree *ptree_bar = (TTree*)pfileBar->Get("tree"); TTree *ptree_bkg = (TTree*)pfileBkg->Get("tree"); TTree *ptree_bkg2 = (TTree*)pfileBkg2->Get("tree"); // POT normalization const int nmcFile = 3950; const int nbarFile = 986; const int nbkgFile = 55546;//(31085+24461); const int nbkg2File = 7882;//(3941+3941); // global event weights per tree (see below for setting event-wise weights) // adding for signal sample // using this as standard and add other later Double_t signalWeight_sig = 1.0; Double_t backgroundWeight_sig = 1.0; factory->AddSignalTree ( ptree_sig, signalWeight_sig ); factory->AddBackgroundTree( ptree_sig, backgroundWeight_sig ); // Add Numubar sample //Double_t signalWeight_bar = nmcFile/float(nbarFile); Double_t backgroundWeight_bar = nmcFile/float(nbarFile); //factory->AddSignalTree ( ptree_bar, signalWeight_bar ); factory->AddBackgroundTree( ptree_bar, backgroundWeight_bar ); // Add wall background //Double_t signalWeight_bkg = nmcFile/float(nbkgFile); Double_t backgroundWeight_bkg = nmcFile/float(nbkgFile); //factory->AddSignalTree ( ptree_bkg, signalWeight_bkg ); factory->AddBackgroundTree( ptree_bkg, backgroundWeight_bkg ); // Add INGRID background //Double_t signalWeight_bkg2 = nmcFile/float(nbkg2File); Double_t backgroundWeight_bkg2 = nmcFile/float(nbkg2File); //factory->AddSignalTree ( ptree_bkg2, signalWeight_bkg2 ); factory->AddBackgroundTree( ptree_bkg2, backgroundWeight_bkg2 ); //factory->SetSignalWeightExpression ("norm*totcrsne*2.8647e-13"); //factory->SetBackgroundWeightExpression( "norm*totcrsne*2.8647e-13" ); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = "Ntrack==2 && abs(inttype)==16 && fileIndex==1 && pang<90 && mumucl>0.6 && opening>10 && coplanarity>90 && pmucl>0.2"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = "Ntrack==2 && (abs(inttype)!=16 || fileIndex>1) && pang<90 && mumucl>0.6 && opening>10 && coplanarity>90 && pmucl>0.2"; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" ); /*if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );*/ if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // ---- STILL EXPERIMENTAL and only implemented for BDT's ! // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","FitGA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros //if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVAClassificationCategory() { //--------------------------------------------------------------- // Example for usage of different event categories with classifiers std::cout << std::endl << "==> Start TMVAClassificationCategory" << std::endl; bool batchMode = false; // Create a new root output file. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object (see TMVAClassification.C for more information) std::string factoryOptions( "!V:!Silent:Transformations=I;D;P;G,D" ); if (batchMode) factoryOptions += ":!Color:!DrawProgressBar"; TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationCategory", outputFile, factoryOptions ); // Define the input variables used for the MVA training factory->AddVariable( "var1", 'F' ); factory->AddVariable( "var2", 'F' ); factory->AddVariable( "var3", 'F' ); factory->AddVariable( "var4", 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables factory->AddSpectator( "eta" ); // Load the signal and background event samples from ROOT trees TFile *input(0); TString fname( "" ); if (UseOffsetMethod) fname = "data/toy_sigbkg_categ_offset.root"; else fname = "data/toy_sigbkg_categ_varoff.root"; if (!gSystem->AccessPathName( fname )) { // first we try to find tmva_example.root in the local directory std::cout << "--- TMVAClassificationCategory: Accessing " << fname << std::endl; input = TFile::Open( fname ); } if (!input) { std::cout << "ERROR: could not open data file: " << fname << std::endl; exit(1); } TTree *signal = (TTree*)input->Get("TreeS"); TTree *background = (TTree*)input->Get("TreeB"); /// Global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; /// You can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the factory how to use the training and testing events factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // Fisher discriminant factory->BookMethod( TMVA::Types::kFisher, "Fisher", "!H:!V:Fisher" ); // Likelihood factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // --- Categorised classifier TMVA::MethodCategory* mcat = 0; // The variable sets TString theCat1Vars = "var1:var2:var3:var4"; TString theCat2Vars = (UseOffsetMethod ? "var1:var2:var3:var4" : "var1:var2:var3"); // Fisher with categories TMVA::MethodBase* fiCat = factory->BookMethod( TMVA::Types::kCategory, "FisherCat","" ); mcat = dynamic_cast<TMVA::MethodCategory*>(fiCat); mcat->AddMethod( "abs(eta)<=1.3", theCat1Vars, TMVA::Types::kFisher, "Category_Fisher_1","!H:!V:Fisher" ); mcat->AddMethod( "abs(eta)>1.3", theCat2Vars, TMVA::Types::kFisher, "Category_Fisher_2","!H:!V:Fisher" ); // Likelihood with categories TMVA::MethodBase* liCat = factory->BookMethod( TMVA::Types::kCategory, "LikelihoodCat","" ); mcat = dynamic_cast<TMVA::MethodCategory*>(liCat); mcat->AddMethod( "abs(eta)<=1.3",theCat1Vars, TMVA::Types::kLikelihood, "Category_Likelihood_1","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); mcat->AddMethod( "abs(eta)>1.3", theCat2Vars, TMVA::Types::kLikelihood, "Category_Likelihood_2","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassificationCategory is done!" << std::endl; // Clean up delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void classifyBDT(TString inputVariables = "trainingVars.txt", TString signalName = "/mnt/hscratch/dabercro/skims2/BDT_Signal.root", TString backName = "/mnt/hscratch/dabercro/skims2/BDT_Background.root") { TMVA::Tools::Instance(); std::cout << "==> Start TMVAClassification" << std::endl; // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA/TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationCategory", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;N" ); // A very simple MVA (feel free to uncomment and comment what you like) => as a rule of thumb 10-20 variables is where people start to get worried about total number ifstream configFile; configFile.open(inputVariables.Data()); TString tempFormula; configFile >> tempFormula; // Is the name of the BDT while(!configFile.eof()){ configFile >> tempFormula; if(tempFormula != ""){ factory->AddVariable(tempFormula,'F'); } } TString lVars; // TCut lCut = "jet1qg2<2.&&jet1pt>250.&&jet1pullAngle>-5.";// < 10 && jet1mass_m2 > 60 && jet1mass_m2 < 120"; // TCut lCut = "passZ > 3 && fjet1pt > 250 && fjet1MassPruned < 120 && fatjetid < 2"; TCut lCut = "abs(fjet1PartonId)!=24&&abs(fjet1PartonId)!=23"; // std::string lEventCut = "event % 2 == 1"; // lCut += lEventCut.c_str(); // TCut lSCut = "passT > 0 && fjet1pt > 250 && fjet1MassPruned < 120 && abs(fjet1PartonId) == 24&& fatjetid < 2"; TCut lSCut = "abs(fjet1PartonId)==24||abs(fjet1PartonId)==23"; // lSCut += lEventCut.c_str(); TCut cleanCut = "fjet1QGtagSub2 > -10 && fjet1PullAngle > -4 && abs(fjet1pt/fjet1MassTrimmed)<200 && abs(fjet1pt/fjet1MassPruned)<200"; TFile *lSAInput = TFile::Open(signalName); TTree *lSASignal = (TTree*)lSAInput ->Get("DMSTree"); TFile *lSBInput = TFile::Open(backName); TTree *lSBSignal = (TTree*)lSBInput ->Get("DMSTree"); Double_t lSWeight = 1.0; Double_t lBWeight = 1.0; gROOT->cd( outfileName+TString(":/") ); factory->AddSignalTree ( lSASignal, lSWeight ); gROOT->cd( outfileName+TString(":/") ); factory->AddBackgroundTree( lSBSignal, lBWeight ); factory->SetWeightExpression("weight"); std::stringstream pSignal,pBackground; pSignal << "nTrain_Signal="<< lSASignal->GetEntries() << ":nTrain_Background=" << lSBSignal->GetEntries(); // factory->PrepareTrainingAndTestTree( lSCut, lCut,(pSignal.str()+":SplitMode=Block:NormMode=NumEvents:!V").c_str() ); factory->PrepareTrainingAndTestTree(lSCut&&cleanCut,lCut&&cleanCut,"nTrain_Signal=0:nTrain_Background=0:SplitMode=Alternate:NormMode=NumEvents:!V"); std::string lName = "alpha_VBF"; TString lBDTDef = "!H:!V:NTrees=400:BoostType=Grad:Shrinkage=0.1:UseBaggedGrad=F:nCuts=2000:NNodesMax=10000:MaxDepth=5:UseYesNoLeaf=F:nEventsMin=200"; // TString lBDTDef = "!H:!V:NTrees=400:BoostType=Grad:Shrinkage=0.1:UseBaggedGrad=F:nCuts=2000:MaxDepth=5:UseYesNoLeaf=F:MinNodeSize=0.086:NegWeightTreatment=IgnoreNegWeightsInTraining"; factory->BookMethod(TMVA::Types::kBDT,"BDT_simple_alpha",lBDTDef); factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; //if (!gROOT->IsBatch()) TMVAGui( outfileName ); //TString lBDTDef = "!H:!V:NTrees=100:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad=F:nCuts=2000:NNodesMax=10000:MaxDepth=3:SeparationType=GiniIndex"; }
/********************************************************************************** * Project : TMVA - a ROOT-integrated toolkit for multivariate data analysis * * Package : TMVA * * Root Macro: TMVAClassification * * * * This macro provides examples for the training and testing of the * * TMVA classifiers. * * * * As input data is used a toy-MC sample consisting of four Gaussian-distributed * * and linearly correlated input variables. * * * * The methods to be used can be switched on and off by means of booleans, or * * via the prompt command, for example: * * * * root -l ./TMVAClassification.C\(\"Fisher,Likelihood\"\) * * * * (note that the backslashes are mandatory) * * If no method given, a default set of classifiers is used. * * * * The output file "TMVA.root" can be analysed with the use of dedicated * * macros (simply say: root -l <macro.C>), which can be conveniently * * invoked through a GUI that will appear at the end of the run of this macro. * * Launch the GUI via the command: * * * * root -l ./TMVAGui.C * * * **********************************************************************************/ void TMVAClassification( TString myMethodList = "") { TTree *signal = (TTree *)gDirectory->Get("VertexG"); if (! signal) { std::cout << "No signal TTree" << std::endl; return;} TTree *background = (TTree *)gDirectory->Get("VertexB"); if (! background) { std::cout << "No background TTree" << std::endl; return;} //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string, int> Use; // --- Cut optimisation Use["Cuts"] = 1; Use["CutsD"] = 1; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 1; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 1; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 1; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 1; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 1; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 1; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 1; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 1; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 1; // // --- Boosted Decision Trees Use["BDT"] = 1; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting Use["myBDTD"] = 1; // mine // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 1; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string, int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (size_t i = 0; i < mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string, int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile *outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // load the signal and background event samples from ROOT trees std::cout << " starts ... " << std::endl; // global event weights per tree (see below for setting event-wise weights) // Float_t w; double signalWeight = 1.0; double backgroundWeight = 1.0; std::cout << " signalWeight = " << signalWeight << " backWeight = " << backgroundWeight << std::endl; factory->AddSignalTree( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); TString separator(":"); TString Vnames(vnames); TObjArray *array = Vnames.Tokenize(separator); std::vector<std::string> inputVars; TIter next(array); TObjString *objs; while ((objs = (TObjString *) next())) { // std::cout << objs->GetString() << std::endl; TString name(objs->GetString()); if (name == "BEMC") continue; if (name == "noBEMC") continue; factory->AddVariable(name, 'F'); } // This would set individual event weights (the variables defined in the // expression need to exist in the original TTree) // for signal : factory->SetSignalWeightExpression("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); // commented JB : 04/26 ?? //factory->dSetBackgroundWeightExpression("weight"); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; TCut mycutb = ""; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: //factory->PrepareTrainingAndTestTree( mycuts,mycutb,"NSigTrain=9000:NBkgTrain=50000:NSigTest=9000:NBkgTest=50000:SplitMode=Random:!V" ); factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=4900:nTrain_Background=49000:nTest_Signal=4900:nTest_Background=49000:SplitMode=Random:!V"); // for KFVertex // factory->PrepareTrainingAndTestTree( mycuts, mycutb,"nTrain_Signal=20000:nTrain_Background=40000:nTest_Signal=20000:nTest_Background=40000:SplitMode=Random:!V"); // for PPV // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); if (Use["myBDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTDTEST", "!H:!V:NTrees=1000:nEventsMin=400:MaxDepth=6:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:nEventsMin=150:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // TMVA::IMethod* category = factory->BookMethod( TMVA::Types::kCategory,"Category","" ); // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events #if 0 factory->OptimizeAllMethods("SigEffAt001", "Scan"); factory->OptimizeAllMethods("ROCIntegral", "GA"); #endif // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; }
void test2(){ //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); TString outfileName( "trainingBDT_tZq.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "BDT_trainning_tzq", outputFile,"!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); TFile *input_sig = TFile::Open( "../TreeReader/outputroot/histofile_tZq.root" ); TFile *input_wz = TFile::Open( "../TreeReader/outputroot/histofile_WZ.root" ); TTree *signal = (TTree*)input_sig->Get("Ttree_tZq"); TTree *background = (TTree*)input_wz->Get("Ttree_WZ"); factory->AddSignalTree ( signal, 1.); factory->AddBackgroundTree( background, 1.); std::vector<TString > varList; varList.push_back("tree_cosThetaStar");; varList.push_back("tree_topMass"); varList.push_back("tree_totMass"); varList.push_back("tree_deltaPhilb"); varList.push_back("tree_deltaRlb"); varList.push_back("tree_deltaRTopZ"); varList.push_back("tree_asym"); varList.push_back("tree_Zpt"); varList.push_back("tree_ZEta"); varList.push_back("tree_topPt"); varList.push_back("tree_topEta"); varList.push_back("tree_NJets"); varList.push_back("tree_NBJets"); varList.push_back("tree_deltaRZl"); varList.push_back("tree_deltaPhiZmet"); varList.push_back("tree_btagDiscri"); varList.push_back("tree_totPt"); varList.push_back("tree_totEta"); varList.push_back("tree_leptWPt"); varList.push_back("tree_leptWEta"); varList.push_back("tree_leadJetPt"); varList.push_back("tree_leadJetEta"); varList.push_back("tree_deltaRZleptW"); varList.push_back("tree_deltaPhiZleptW"); varList.push_back("tree_met" ); varList.push_back("tree_mTW" ); for(unsigned int i=0; i< varList.size() ; i++) factory->AddVariable( varList[i].Data(), 'F'); factory->SetSignalWeightExpression ("tree_EvtWeight"); factory->SetBackgroundWeightExpression("tree_EvtWeight"); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); //factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); // factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:nEventsMin=100:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:nEventsMin=100:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVAClassification( std::string selectionName, std::string charge, TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) // this loads the library TMVA::Tools::Instance(); //--------------------------------------------------------------- // default MVA methods to be trained + tested std::map<std::string,int> Use; Use["Cuts"] = 1; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // --- Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // --- Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDERSkNN"] = 0; // depreciated until further notice Use["PDEFoam"] = 0; // -- Use["KNN"] = 0; // --- Use["HMatrix"] = 0; Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; Use["LD"] = 0; // --- Use["FDA_GA"] = 0; Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // --- Use["MLP"] = 0; // this is the recommended ANN Use["MLPBFGS"] = 0; // recommended ANN with optional training method Use["CFMlpANN"] = 0; // *** missing Use["TMlpANN"] = 0; // --- Use["SVM"] = 0; // --- Use["BDT"] = 0; Use["BDTD"] = 0; Use["BDTG"] = 0; Use["BDTB"] = 0; // --- Use["RuleFit"] = 0; // --- Use["Plugin"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // Create a new root output file. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] //factory->AddVariable( "MET" , "ME_{T}", "GeV", 'F'); //factory->AddVariable( "TMath::Max(pT1,pT2)" , "Lead Lepton p_{T}", "GeV", 'F'); factory->AddVariable( "HT" , "H_{T}", "GeV", 'F'); //factory->AddVariable( "M3" , "M_{3}", "GeV", 'F'); factory->AddVariable( "TMath::Min(pT1,pT2)" , "Sublead Lepton p_{T}", "GeV", 'F'); //factory->AddVariable( "NbJ" , "N B Jets", "", 'I'); //factory->AddVariable( "NbJmed" , "N B Jets (medium)", "", 'I'); //factory->AddVariable( "NJ" , "N Jets", "", 'I'); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables //factory->AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ); //factory->AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ); // read training and test data if (ReadDataFromAsciiIFormat) { // load the signal and background event samples from ascii files // format in file must be: // var1/F:var2/F:var3/F:var4/F // 0.04551 0.59923 0.32400 -0.19170 // ... TString datFileS = "tmva_example_sig.dat"; TString datFileB = "tmva_example_bkg.dat"; factory->SetInputTrees( datFileS, datFileB ); } else { // load the signal and background event samples from ROOT trees TFile *input(0); //TString fname = "../macros/tmva_example.root"; //TString fname = "opt_ttW_Apr10_Iso005_NoZVeto_jet20.root"; TString fname = "opt_ttW_Nov20_muDetIso0p05_elDetIso0p05_jet20_withZveto_optimization.root"; //TString fname = "opt_ttW_" + selectionName + ".root"; if (!gSystem->AccessPathName( fname )) { input = TFile::Open( fname ); // check if file in local directory exists } else { input = TFile::Open( "http://root.cern.ch/files/tmva_class_example.root" ); // if not: download from ROOT server } if (!input) { std::cout << "ERROR: could not open data file" << std::endl; exit(1); } std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl; TTree* opt_tree = (TTree*)input->Get("tree_opt"); TFile* signalFile = TFile::Open("/shome/mdunser/workspace/CMSSW_5_2_5/src/DiLeptonAnalysis/NTupleProducer/macros/plots/Nov26_muPFIso0p05_elPFIso0p05_jet20_withZveto/TTbarW_Yields.root"); TTree *signal = (TTree*)signalFile->Get("SigEvents"); TChain* background = new TChain("SigEvents"); background->Add("/shome/mdunser/workspace/CMSSW_5_2_5/src/DiLeptonAnalysis/NTupleProducer/macros/plots/Nov26_muPFIso0p05_elPFIso0p05_jet20_withZveto/TTJets_Yields.root"); background->Add("/shome/mdunser/workspace/CMSSW_5_2_5/src/DiLeptonAnalysis/NTupleProducer/macros/plots/Nov26_muPFIso0p05_elPFIso0p05_jet20_withZveto/WZTo3LNu_Yields.root"); //TTree *background = (TTree*)opt_tree->CopyTree("SName==\"TTJets\" || SName==\"DYJets\" || SName==\"WZTo3LNu\""); //TTree *background = (TTree*)opt_tree->CopyTree("SName==\"DYJets\""); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // ====== register trees ==================================================== // // the following method is the prefered one: // you can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); // To give different trees for training and testing, do as follows: // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // // // --- begin ---------------------------------------------------------- // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables // Float_t treevars[4]; // for (Int_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (Int_t i=0; i<signal->GetEntries(); i++) { // signal->GetEntry(i); // for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < signal->GetEntries()/2) factory->AddSignalTrainingEvent( vars, signalWeight ); // else factory->AddSignalTestEvent ( vars, signalWeight ); // } // // for (Int_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (Int_t i=0; i<background->GetEntries(); i++) { // background->GetEntry(i); // for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight ); // else factory->AddBackgroundTestEvent ( vars, backgroundWeight ); // } // // --- end ------------------------------------------------------------ // // ====== end of register trees ============================================== } // This would set individual event weights (the variables defined in the // expression need to exist in the original TTree) // for signal : factory->SetSignalWeightExpression("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); //factory->SetSignalWeightExpression("eventWeight"); factory->SetBackgroundWeightExpression("1./SLumi"); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts; TCut mycutb; if( charge == "plus" ) { mycuts = "Charge==1 && SystFlag==0 && NJ>=3 && pT1>20. && pT2>20. && NbJmed>0 && TLCat==0 && Flavor<3 && PassZVeto==1"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; mycutb = "Charge==1 && SystFlag==0 && NJ>=3 && pT1>20. && pT2>20. && NbJmed>0 && TLCat==0 && Flavor<3 && PassZVeto==1"; // for example: TCut mycutb = "abs(var1)<0.5"; } else if( charge == "minus" ) { mycuts = "Charge==-1 && SystFlag==0 && NJ>=3 && pT1>20. && pT2>20. && NbJmed>0 && TLCat==0 && Flavor<3 && PassZVeto==1"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; mycutb = "Charge==-1 && SystFlag==0 && NJ>=3 && pT1>20. && pT2>20. && NbJmed>0 && TLCat==0 && Flavor<3 && PassZVeto==1"; // for example: TCut mycutb = "abs(var1)<0.5"; } else if( charge == "all" ) { mycuts = " SystFlag==0 && NJ>=3 && pT1>20. && pT2>20. && NbJmed>0 && TLCat==0 && Flavor<3 && PassZVeto==1"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; mycutb = " SystFlag==0 && NJ>=3 && pT1>20. && pT2>20. && NbJmed>0 && TLCat==0 && Flavor<3 && PassZVeto==1"; // for example: TCut mycutb = "abs(var1)<0.5"; } else { std::cout << "only 'plus' and 'minus' and 'all' are allowed for charge." <<std::endl; return; } //if( btagMed_presel_ ) { // mycuts += "NbJmed>0"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; // mycutb += "NbJmed>0"; // for example: TCut mycutb = "abs(var1)<0.5"; //} // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // If no numbers of events are given, half of the events in the tree are used for training, and // the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); // ---- Book MVA methods // // please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) { std::string bookConditions; bookConditions = "H:!V:FitMethod=MC"; bookConditions += ":VarProp[0]=FMax"; //HT bookConditions += ":VarProp[1]=FMax"; //pt2 bookConditions += ":EffSel:SampleSize=500000000"; //bookConditions += ":EffSel:SampleSize=50000"; factory->BookMethod( TMVA::Types::kCuts, "Cuts", bookConditions.c_str() ); //factory->BookMethod( TMVA::Types::kCuts, "Cuts", // "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); } if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // test the decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // test the new kernel density estimator if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // test the mixed splines and kernel density estimator (depending on which variable) if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSkNN"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSkNN", "!H:!V:VolumeRangeMode=kNN:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:CutNmin=T:Nmin=100:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" ); // Fisher discriminant if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=60:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2"); // Linear discriminant (same as Fisher) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=N+5:TestRate=10:EpochMonitoring" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=N+5:TestRate=10:TrainingMethod=BFGS:!EpochMonitoring" ); // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // As an example how to use the ROOT plugin mechanism, book BDT via // plugin mechanism if (Use["Plugin"]) { // // first the plugin has to be defined, which can happen either through the following line in the local or global .rootrc: // // # plugin handler plugin name(regexp) class to be instanciated library constructor format // Plugin.TMVA@@MethodBase: ^BDT TMVA::MethodBDT TMVA.1 "MethodBDT(TString,TString,DataSet&,TString)" // // or by telling the global plugin manager directly gPluginMgr->AddHandler("TMVA@@MethodBase", "BDT", "TMVA::MethodBDT", "TMVA.1", "MethodBDT(TString,TString,DataSet&,TString)"); factory->BookMethod( TMVA::Types::kPlugins, "BDT", "!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=50" ); } // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); if (Use["Cuts"]) { for( unsigned iEff=1; iEff<11; ++iEff ) { TMVA::IMethod* method = (TMVA::IMethod*)factory->GetMethod("Cuts"); TMVA::MethodCuts* cuts = dynamic_cast<TMVA::MethodCuts*>(method); std::string optcutsdir = "optcuts_" + selectionName + "_" + charge; std::string mkdir_command = "mkdir -p " + optcutsdir; system(mkdir_command.c_str()); char cutsFileName[500]; sprintf( cutsFileName, "%s/cuts_Seff%d.txt", optcutsdir.c_str(), 10*iEff ); ofstream ofs(cutsFileName); std::vector<Double_t> cutsMin, cutsMax; cuts->GetCuts((float)iEff*0.10, cutsMin, cutsMax); bool found_pT1 = false; bool found_pT2 = false; bool found_NJ = false; bool found_NbJ = false; bool found_NbJmed = false; for( unsigned iCut=0; iCut<cutsMin.size(); ++iCut) { TString varName = factory->DefaultDataSetInfo().GetVariableInfo(iCut).GetInternalName(); if( varName=="TMath_Min_pT1,pT2_") { ofs << "pT1 " << cutsMin[iCut] << " " << cutsMax[iCut] << std::endl; ofs << "pT2 " << cutsMin[iCut] << " " << cutsMax[iCut] << std::endl; found_pT1 = true; found_pT2 = true; } else { ofs << varName << " " << cutsMin[iCut] << " " << cutsMax[iCut] << std::endl; } if( varName=="pT1" ) found_pT1 = true; if( varName=="pT2" ) found_pT2 = true; if( varName=="NJ" ) found_NJ = true; if( varName=="NbJ" ) found_NbJ = true; if( varName=="NbJmed" ) found_NbJmed = true; } // preselection cuts (if not optimized): if( !found_pT1 ) ofs << "pT1 20. 100000." << std::endl; if( !found_pT2 ) ofs << "pT2 20. 100000." << std::endl; if( !found_NJ ) ofs << "NJ 3 100000." << std::endl; if( !found_NbJ ) ofs << "NbJ 1 100000." << std::endl; if( !found_NbJmed && btagMed_presel_ ) ofs << "NbJmed 1 100000." << std::endl; if( charge=="plus" ) ofs << "Charge 1 10" << std::endl; else if( charge=="minus" ) ofs << "Charge -10 0" << std::endl; ofs.close(); } // for eff } // if cuts // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVAClassification( ) { // this loads the library TMVA::Tools::Instance(); std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Create a new root output file. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" ); // ---------- input variables factory->AddVariable("iso12x12", 'F'); factory->AddVariable("iso4x4", 'F'); factory->AddVariable("isoLshaped", 'F'); factory->AddVariable("PUM0", 'F'); // ---------- spectators ---------------- factory->AddSpectator("pt", "F"); factory->AddSpectator("eta", "F"); factory->AddSpectator("phi", "F"); factory->AddSpectator("e", "F"); // read training and test data TString fSig = "DrellYan_Zee.root"; TString fBkg = "MinBias.root"; TFile *fileSig = TFile::Open( fSig ); TFile *fileBkg = TFile::Open( fBkg ); TTree *signal = (TTree*)fileSig->Get("tree"); TTree *background = (TTree*)fileBkg->Get("tree"); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // ====== register trees ==================================================== // you can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); TCut mycuts = "pt > 10"; TCut mycutb = "pt > 10"; // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // If no numbers of events are given, half of the events in the tree are used for training, and // the other half for testing: // ---- Book MVA methods // Boosted Decision Trees factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); // ---- Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVAtest1() { // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString TrainName = "TMVA"; TString outfileName( TrainName+".root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( TrainName, outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory->AddVariable ("ptphoton", 'F'); // factory->AddVariable ("etaphoton", 'F'); factory->AddVariable ("ptmuon", 'F'); // factory->AddVariable ("etamuon", 'F'); factory->AddVariable ("ptjet", 'F'); // factory->AddVariable ("etajet", 'F'); // factory->AddVariable ("masstop", 'F'); // factory->AddVariable ("mtw", 'F'); factory->AddVariable ("deltaRphotonjet", 'F'); factory->AddVariable ("deltaRphotonmuon", 'F'); // factory->AddVariable ("ht", 'F'); // factory->AddVariable ("photonmuonmass", 'F'); factory->AddVariable ("costopphoton", 'F'); // factory->AddVariable ("topphotonmass", 'F'); //factory->AddVariable ("pttop", 'F'); //factory->AddVariable ("etatop", 'F'); factory->AddVariable ("jetmultiplicity", 'F'); // factory->AddVariable ("bjetmultiplicity", 'F'); factory->AddVariable ("deltaphiphotonmet", 'F'); factory->AddVariable ("cvsdiscriminant", 'F'); // factory->AddVariable ("leptoncharge", 'F'); //Load the signal and background event samples from ROOT trees // TFile *inputSTrain(0); // TFile *inputBTrain(0); TString sigFileTrain = "SIGNALtGu.root"; TString bkgFileTrain = "WPHJET.root"; TString ttbarFileTrain1 ="TTBAR1.root"; TString ttbarFileTrain2 ="TTBAR2.root"; TString ttbarFileTrain3 ="TTBAR3.root"; TString TTGFileTrain ="TTG.root"; TString WWFileTrain ="WW.root"; TString WZFileTrain ="WZ.root"; TString ZZFileTrain ="ZZ.root"; TString ZGAMMAFileTrain ="ZGAMMA.root"; // TString bkgFileTrain = "TTG.root"; TFile *inputSTrain = TFile::Open( sigFileTrain ); if (!inputSTrain) { std::cout << "ERROR: could not open data file" << std::endl; exit(1); } TFile *inputBTrain = TFile::Open( bkgFileTrain ); TFile *inputBttbar1 = TFile::Open( ttbarFileTrain1); TFile *inputBttbar2 = TFile::Open( ttbarFileTrain2); TFile *inputBttbar3 = TFile::Open( ttbarFileTrain3); TFile *inputTTG= TFile::Open(TTGFileTrain ); TFile *inputWW= TFile::Open(WWFileTrain ); TFile *inputWZ= TFile::Open(WZFileTrain ); TFile *inputZZ= TFile::Open(ZZFileTrain ); TFile *inputZGAMMA= TFile::Open( ZGAMMAFileTrain); if (!inputBTrain) { std::cout << "ERROR: could not open data file" << std::endl; exit(1); } std::cout << "--- TMVAnalysis : Accessing Signal Train: " << sigFileTrain << std::endl; std::cout << "--- TMVAnalysis : Accessing Background Train: " << bkgFileTrain << std::endl; TTree *signalTrain = (TTree*)inputSTrain->FindObjectAny("atq"); TTree *backgroundTrain = (TTree*)inputBTrain->FindObjectAny("atq"); TTree *ttbarbackground1 = (TTree*)inputBttbar1->FindObjectAny("atq"); TTree *ttbarbackground2 = (TTree*)inputBttbar2->FindObjectAny("atq"); TTree *ttbarbackground3 = (TTree*)inputBttbar3->FindObjectAny("atq"); TTree *TTGbackground = (TTree*)inputTTG->FindObjectAny("atq"); TTree *WWbackground = (TTree*)inputWW->FindObjectAny("atq"); TTree *WZbackground = (TTree*)inputWZ->FindObjectAny("atq"); TTree *ZZbackground = (TTree*)inputZZ->FindObjectAny("atq"); TTree *ZGAMMAbackground = (TTree*)inputZGAMMA->FindObjectAny("atq"); // TTree *signalTrain = (TTree*)inputSTrain->FindObjectAny("insidetopmass"); // TTree *backgroundTrain = (TTree*)inputBTrain->FindObjectAny("insidetopmass"); // factory->AddSignalTree( signalTrain, 1); //cout<<"reza1"; // factory->AddBackgroundTree( backgroundTrain, 1, ); //cout<<"reza2"; factory->SetInputTrees(signalTrain,backgroundTrain,1,1); factory->AddBackgroundTree( ttbarbackground1, 1.1); factory->AddBackgroundTree( ttbarbackground2, 1.1); // factory->AddBackgroundTree( ttbarbackground3, 0.3); factory->AddBackgroundTree( TTGbackground, 1); factory->AddBackgroundTree( WWbackground, 1); factory->AddBackgroundTree(WZbackground , 1); factory->AddBackgroundTree(ZZbackground , 1); factory->AddBackgroundTree(ZGAMMAbackground , 1); // Set xs-weight // factory->SetSignalWeightExpression ("weight"); //factory->SetBackgroundWeightExpression("weight"); // Apply additional cuts on the signal and background samples (can be different) TCut mycut = ""; // TCut mycutb = ""; Int_t nSignalTrain = signalTrain->GetEntries(); Int_t nBackTrain = backgroundTrain->GetEntries(); factory->PrepareTrainingAndTestTree( "","", "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=0:nTest_Background=0:SplitMode=Random:SplitSeed=88!V" ); //factory->PrepareTrainingAndTestTree( "", "", // "nTrain_Signal=100:nTrain_Background=100:nTest_Signal=100:nTest_Background=100:!V" ); // factory->PrepareTrainingAndTestTree( mycuts, mycutb, //":NSigTrain=:NBkgTrain=:NSigTest=:NBkgTest=:SplitMode=Alternate:!V" ); //":nTrain_Signal=10000:nTest_Signal=2260:nTrain_Background=100000:nTest_Background=100000:SplitMode=Alternate:!V" ); // ":nTrain_Signal=nSignalTrain:nTrain_Background=nBackTrain:!V"); //":nTrain_Signal=nSignalTrain:nTest_Signal=nSignalTest:nTrain_Background=nBackTrain:nTest_Background=nBackTest:SplitMode=Block:!V"); //":SplitMode=Alternate:!V"); //":nTrain_Signal=:nTest_Signal=:nTrain_Background=:nTest_Background=:SplitMode=Block:!V"); //":SplitMode=Alternate:!V" ); // ---- Book MVA methods // factory->BookMethod( TMVA::Types::kBDT, "BDT", // "!H:!V:NTrees=300:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=-1:NNodesMax=5:UseNvars=4:PruneStrength=5:PruneMethod=CostComplexity:MaxDepth=4" ); //factory->BookMethod( TMVA::Types::kBDT, "BDT", // "!H:!V:NTrees=400:BoostType=AdaBoost:AdaBoostBeta=0.5:MaxDepth=3:SeparationType=GiniIndex:PruneMethod=NoPruning" ); factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=400:BoostType=AdaBoost:AdaBoostBeta=0.8:MaxDepth=3:SeparationType=GiniIndex:PruneMethod=NoPruning" ); // factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5"); // factory->BookMethod(TMVA::Types::kFisher, "Fisher", "H:!V:Fisher"); // ---- Now you can tell the factory to train, test, and evaluate the MVAs // factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); // factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001" ); // factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit","H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros // if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVAClassification( TString eventsToTrain = "0", const TString & region = "barrel", const TString index = "", TString myMethodList = "BDT") { std::cout << "running classification for " << region << " for " << myMethodList << std::endl; if( region != "barrel" && region != "endcaps" ) { std::cout << "Error, region can only be barrel or endcaps. Selected region was: " << region << std::endl; exit(1); } if( index != "" && index != "0" && index != "1" && index != "2" ) { std::cout << "Error, index can only be \"\", \"0\", \"1\" or \"2\". Selected index was: " << index << std::endl; exit(1); } // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 0; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 0; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 1; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; std::exit(2); } Use[regMethod] = 1; } } // Input and output file names TString fnameTrainS = "BsMC12_barrel_preselection"; TString fnameTrainB = "Barrel_preselection"; TString fnameTestS = "BsMC12_barrel_preselection"; TString fnameTestB = "Barrel_preselection"; TString outputFileName = "TMVA_barrel"; TString weightDirName = "barrel"; if( region == "endcaps" ) { fnameTrainS = "BsMC12_endcaps_preselection"; fnameTrainB = "Endcaps_preselection"; fnameTestS = "BsMC12_endcaps_preselection"; fnameTestB = "Endcaps_preselection"; outputFileName = "TMVA_endcaps"; weightDirName = "endcaps"; } if( index != "" ) { fnameTrainS += "_"+index; fnameTrainB += "_"+index; TString indexTest = ""; // The test index is the train index +1 (2+1 -> 0) if( index == "0" ) indexTest = "1"; else if( index == "1" ) indexTest = "2"; else if( index == "2" ) indexTest = "0"; fnameTestS += "_"+indexTest; fnameTestB += "_"+indexTest; outputFileName += "_"+index; weightDirName += index; } fnameTrainS = rootDir + fnameTrainS + ".root"; fnameTrainB = rootDir + fnameTrainB + ".root"; fnameTestS = rootDir + fnameTestS + ".root"; fnameTestB = rootDir + fnameTestB + ".root"; outputFileName = rootDir + outputFileName + ".root"; weightDirName = weightsDir + weightDirName + "Weights"; // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName(outputFileName); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // (TMVA::gConfig().GetIONames()).fWeightFileDir = outputFileName; (TMVA::gConfig().GetIONames()).fWeightFileDir = weightDirName; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] bool useNewMuonID = false; factory->AddVariable( "fls3d", "fls3d", "", 'F' ); factory->AddVariable( "alpha", "alpha", "", 'F' ); factory->AddVariable( "pvips", "pvips", "", 'F' ); factory->AddVariable( "iso", "iso", "", 'F' ); factory->AddVariable( "m1iso", "m1iso", "", 'F' ); factory->AddVariable( "m2iso", "m2iso", "", 'F' ); factory->AddVariable( "chi2dof", "chi2/dof", "", 'F' ); if( region == "barrel" ) { factory->AddVariable( "eta", "eta", "", 'F' ); factory->AddVariable( "maxdoca", "maxdoca", "cm", 'F' ); } else { factory->AddVariable( "pt", "pt", "GeV/c", 'F' ); factory->AddVariable( "pvip", "pvip", "cm", 'F' ); } factory->AddVariable( "docatrk", "docatrk", "cm", 'F' ); // factory->AddVariable( "pt", "pt", "GeV/c", 'F' ); // factory->AddVariable( "closetrk", "closetrk", "", 'I' ); // factory->AddVariable( "y", "y", "", 'F' ); // factory->AddVariable( "l3d", "l3d", "cm", 'F' ); // factory->AddVariable( "cosAlphaXY", "cosAlphaXY", "", 'F' ); // factory->AddVariable( "mu1_dxy", "mu1_dxy", "cm", 'F' ); // factory->AddVariable( "mu2_dxy", "mu2_dxy", "cm", 'F' ); if( useNewMuonID ) { // New Muon-id factory->AddVariable( "mu1_MVAMuonID", "mu1_MVAMuonID", "", 'F'); factory->AddVariable( "mu2_MVAMuonID", "mu2_MVAMuonID", "", 'F'); } // Extra variables // factory->AddVariable( "mu1_pt", "mu1_pt", "GeV/c", 'F' ); // factory->AddVariable( "mu2_pt", "mu2_pt", "GeV/c", 'F' ); // factory->AddVariable( "pvw8", "pvw8", "", 'F' ); // factory->AddVariable( "cosAlpha3D", "cosAlpha3D", "", 'F' ); // factory->AddVariable( "countTksOfPV", "countTksOfPV", "", 'I' ); // factory->AddVariable( "ctauErrPV", "ctauErrPV", "", 'F' ); // factory->AddVariable( "ctauPV", "ctauPV", "", 'F' ); // factory->AddVariable( "dcaxy", "dcaxy", "", 'F' ); // factory->AddVariable( "mu1_glbTrackProb", "mu1_glbTrackProb", "", 'F' ); // factory->AddVariable( "mu1_nChi2", "mu1_nChi2", "", 'F' ); // factory->AddVariable( "mu1_nMuSegs", "mu1_nMuSegs", "", 'F' ); // factory->AddVariable( "mu1_nMuSegsCln", "mu1_nMuSegsCln", "", 'F' ); // factory->AddVariable( "mu1_nPixHits", "mu1_nPixHits", "", 'F' ); // factory->AddVariable( "mu1_nTrHits", "mu1_nTrHits", "", 'F' ); // factory->AddVariable( "mu1_segComp", "mu1_segComp", "", 'F' ); // factory->AddVariable( "mu1_trkEHitsOut", "mu1_trkEHitsOut", "", 'F' ); // factory->AddVariable( "mu1_trkVHits", "mu1_trkVHits", "", 'F' ); // factory->AddVariable( "mu1_validFrac", "mu1_validFrac", "", 'F' ); // factory->AddVariable( "mu1_chi2LocMom", "mu1_chi2LocMom", "", 'F' ); // factory->AddVariable( "mu1_chi2LocPos", "mu1_chi2LocPos", "", 'F' ); // factory->AddVariable( "mu2_glbTrackProb", "mu2_glbTrackProb", "", 'F' ); // factory->AddVariable( "mu2_nChi2", "mu2_nChi2", "", 'F' ); // factory->AddVariable( "mu2_nMuSegs", "mu2_nMuSegs", "", 'F' ); // factory->AddVariable( "mu2_nMuSegsCln", "mu2_nMuSegsCln", "", 'F' ); // factory->AddVariable( "mu2_nPixHits", "mu2_nPixHits", "", 'F' ); // factory->AddVariable( "mu2_nTrHits", "mu2_nTrHits", "", 'F' ); // factory->AddVariable( "mu2_segComp", "mu2_segComp", "", 'F' ); // factory->AddVariable( "mu2_trkEHitsOut", "mu2_trkEHitsOut", "", 'F' ); // factory->AddVariable( "mu2_trkVHits", "mu2_trkVHits", "", 'F' ); // factory->AddVariable( "mu2_validFrac", "mu2_validFrac", "", 'F' ); // factory->AddVariable( "mu2_chi2LocMom", "mu2_chi2LocMom", "", 'F' ); // factory->AddVariable( "mu2_chi2LocPos", "mu2_chi2LocPos", "", 'F' ); // factory->AddVariable( "l3d := ctauPV*pt/mass", "l3d", "cm", 'F' ); // factory->AddVariable( "l3dSig := ctauPV/ctauErrPV", "l3dSig", "", 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables // factory->AddSpectator( "spec1 := mass*2", "Spectator 1", "units", 'F' ); // factory->AddSpectator( "spec2 := mass*3", "Spectator 2", "units", 'F' ); factory->AddSpectator( "mass", "mass", "GeV/c^{2}", 'F' ); // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) if (gSystem->AccessPathName( fnameTrainS )) { // file does not exist in local directory std::cout << "Did not access " << fnameTrainS << " exiting." << std::endl; std::exit(4); } //gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root"); TFile *inputTrainS = TFile::Open( fnameTrainS ); TFile *inputTrainB = TFile::Open( fnameTrainB ); TFile *inputTestS = TFile::Open( fnameTestS ); TFile *inputTestB = TFile::Open( fnameTestB ); // --- Register the training and test trees TTree *signalTrainTree = (TTree*)inputTrainS->Get("probe_tree"); TTree *backgroundTrainTree = (TTree*)inputTrainB->Get("probe_tree"); TTree *signalTestTree = (TTree*)inputTestS->Get("probe_tree"); TTree *backgroundTestTree = (TTree*)inputTestB->Get("probe_tree"); // global event weights per tree (see below for setting event-wise weights) Double_t signalTrainWeight = 1.0; Double_t backgroundTrainWeight = 1.0; Double_t signalTestWeight = 1.0; Double_t backgroundTestWeight = 1.0; // Decide if using the split and mixing or the full trees if( fnameTrainS == fnameTestS ) { if( fnameTrainB != fnameTestB ) { std::cout << "This macro cannot handle cases where the same signal sample is used for training and testing, but different background samples are used."; exit(1); } std::cout << "--- TMVAClassification : Using input file: " << inputTrainS->GetName() << std::endl; std::cout << "--- and file: " << inputTrainB->GetName() << std::endl; // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( signalTrainTree, signalTrainWeight ); factory->AddBackgroundTree( backgroundTrainTree, backgroundTrainWeight ); } else { if( fnameTrainB == fnameTestB ) { std::cout << "This macro cannot handle cases where the same background sample is used for training and testing, but different signal samples are used."; exit(1); } std::cout << "--- TMVAClassification : Using input file: " << inputTrainS->GetName() << std::endl; std::cout << "--- and file: " << inputTrainB->GetName() << " for training and" << std::endl; std::cout << "--- input file: " << inputTestS->GetName() << std::endl; std::cout << "--- and file: " << inputTestB->GetName() << " for testing." << std::endl; // To give different trees for training and testing, do as follows: factory->AddSignalTree( signalTrainTree, signalTrainWeight, "Training" ); factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); factory->AddBackgroundTree( backgroundTrainTree, backgroundTrainWeight, "Training" ); factory->AddBackgroundTree( backgroundTestTree, backgroundTestWeight, "Test" ); } // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; TCut mycutb = ""; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal="+eventsToTrain+":nTrain_Background="+eventsToTrain+":SplitMode=Random:NormMode=NumEvents:!V" ); // factory->PrepareTrainingAndTestTree( mycuts, mycutb, // "nTrain_Signal=3000:nTrain_Background=3000:nTest_Signal=3000:nTest_Background=3000:SplitMode=Random:NormMode=NumEvents:!V" ); // factory->PrepareTrainingAndTestTree( mycuts, mycutb, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) // factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+8:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=800:nEventsMin=50:MaxDepth=2:BoostType=AdaBoost:AdaBoostBeta=1:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:NNodesMax=5" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:nEventsMin=150:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","GA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events std::cout << "Training all methods" << std::endl; factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events std::cout << "Testing all methods" << std::endl; factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs std::cout << "Evaluating all methods" << std::endl; factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
int main( int argc, char* argv[] ) { std::cout << std::endl << std::endl; std::cout << "------------------------------------------------------" << std::endl; std::cout << "| |" << std::endl; std::cout << "| |" << std::endl; std::cout << "| Running runZGAnalysis |" << std::endl; std::cout << "| |" << std::endl; std::cout << "| |" << std::endl; std::cout << "------------------------------------------------------" << std::endl; std::cout << std::endl << std::endl; if( argc<2 ) { std::cout << "USAGE: ./runZGAnalysis [configFileName] [data/MC]" << std::endl; std::cout << "Exiting." << std::endl; exit(11); } std::string configFileName(argv[1]); ZGConfig cfg(configFileName); bool onlyData = false; bool onlyMC = false; bool onlySignal = false; bool noSignals = false; if( argc > 2 ) { std::string dataMC(argv[2]); if( dataMC=="data" ) onlyData = true; else if( dataMC=="MC" || dataMC=="mc" ) onlyMC = true; else if( dataMC=="mcbg" || dataMC=="mcBG" || dataMC=="MCBG" || dataMC=="mc_bg" || dataMC=="MC_BG" ) { onlyMC = true; noSignals = true; } else if( dataMC=="signal" ) onlySignal = true; else { std::cout << "-> You passed a second argument that isn't 'data', nor 'MC', nor 'signal', so I don't know what to do about it." << std::endl; } } else { std::cout << "-> Will run on both data and MC." << std::endl; } if( onlyMC ) { std::cout << "-> Will run only on MC." << std::endl; if( noSignals ) { std::cout << "-> Will skip signal." << std::endl; } } if( onlyData ) { std::cout << "-> Will run only on data." << std::endl; } std::string outputdir = cfg.getEventYieldDir(); system(Form("mkdir -p %s", outputdir.c_str())); std::string outfileName(Form("%s/trees_tmp.root", outputdir.c_str())); TFile* outfile = TFile::Open(outfileName.c_str(), "update"); outfile->cd(); if( !onlyData && !onlySignal ) { // run on MC std::string samplesFileName = "../samples/samples_" + cfg.mcSamples() + ".dat"; std::cout << std::endl << std::endl; std::cout << "-> Loading samples from file: " << samplesFileName << std::endl; std::vector<ZGSample> fSamples = ZGSample::loadSamples(samplesFileName); //std::vector<ZGSample> fSamples = ZGSample::loadSamples(samplesFileName, 100, 999); if( fSamples.size()==0 ) { std::cout << "There must be an error: samples is empty!" << std::endl; exit(120); } //addTreeToFile( outfile, "zg", fSamples, cfg); addTreeToFile( outfile, "zg", fSamples, cfg, 851, 852 ); addTreeToFile( outfile, "dy", fSamples, cfg, 700, 710 ); //addTreeToFile( outfile, "top", fSamples, cfg, 300, 499 ); // irrelevant std::cout << "-> Done looping on MC samples." << std::endl; } // if MC samples // load signal samples, if any if( cfg.mcSamples()!="" && cfg.additionalStuff()!="noSignals" && !noSignals && !onlyData ) { std::string samplesFileName = "../samples/samples_" + cfg.mcSamples() + ".dat"; std::cout << std::endl << std::endl; std::cout << "-> Loading signal samples from file: " << samplesFileName << std::endl; std::vector<ZGSample> fSamples = ZGSample::loadSamples(samplesFileName, 1000); // only signal (id>=1000) if( fSamples.size()==0 ) { std::cout << "No signal samples found, skipping." << std::endl; } else { for( unsigned i=0; i<fSamples.size(); ++i ) addTreeToFile( outfile, fSamples[i], cfg ); } // if samples != 0 } // if mc samples if( !(cfg.dummyAnalysis()) && cfg.dataSamples()!="" && !onlyMC && !onlySignal ) { std::string samplesFile_data = "../samples/samples_" + cfg.dataSamples() + ".dat"; std::cout << std::endl << std::endl; std::cout << "-> Loading data from file: " << samplesFile_data << std::endl; std::vector<ZGSample> samples_data = ZGSample::loadSamples(samplesFile_data); if( samples_data.size()==0 ) { std::cout << "There must be an error: samples_data is empty!" << std::endl; exit(1209); } addTreeToFile( outfile, "data" , samples_data, cfg ); std::cout << "-> Done looping on data." << std::endl; } outfile->Close(); std::string finalFileName = outputdir + "/trees.root"; system( Form("cp %s %s", outfileName.c_str(), finalFileName.c_str()) ); std::cout << "-> Wrote trees to file: " << finalFileName << std::endl; return 0; }
void TMVAClassificationHwwNtuple( TString myMethodList = "" ) { // This loads the library TMVA::Tools::Instance(); gROOT->ProcessLine(".L TMVAGui.C"); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 1; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // Use["BDT"] = 1; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; cout<<regMethod<<" is on"<<endl; } } // ------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // For one variable //TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, // "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification" ); // For Multiple Variables TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); //factory->AddVariable( "pt1", "LeadLepton pt", "", 'F' ); //factory->AddVariable( "pt2", "TailLepton pt", "", 'F' ); factory->AddVariable( "pfmet", "MissingEt", "", 'F' ); factory->AddVariable( "mpmet", "Minimum Proj. Met", "", 'F' ); factory->AddVariable( "dphill", "DeltPhiOfLepLep", "", 'F' ); //factory->AddVariable( "mll", "DiLepton Mass", "", 'F' ); factory->AddVariable( "ptll", "DiLepton pt", "", 'F' ); // // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables //factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); //factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // //factory->AddSpectator( "mWW", "Higgs Mass", "", 'F' ); factory->AddSpectator( "pt1", "LeadLepton pt", "", 'F' ); factory->AddSpectator( "pt2", "TailLepton pt", "", 'F' ); factory->AddSpectator( "pfmet", "MissingEt", "", 'F' ); factory->AddSpectator( "mpmet", "Minimum Proj. Met", "", 'F' ); factory->AddSpectator( "dphill", "DeltPhiOfLepLep", "", 'F' ); factory->AddSpectator( "mll", "DiLepton Mass", "", 'F' ); factory->AddSpectator( "ptll", "DiLepton pt", "", 'F' ); // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) //TString fname = "./tmva_class_example.root"; //TString fname = "/afs/cern.ch/work/s/salee/private/HWWwidth/HWW/GGVvAnalyzer/MkNtuple/Hw1Int8TeV/MkNtuple.root"; //TString fname = "/terranova_0/HWWwidth/HWW/GGVvAnalyzer/MkNtuple/Hw1Int8TeV/MkNtuple.root"; //if (gSystem->AccessPathName( fname )) // file does not exist in local directory // exit(-1); //gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root"); //TFile *input = TFile::Open( fname ); //TFile *SB_OnPeak = TFile::Open("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntOnPeak_8TeV.root"); //TTree *SB_OnPeak_Tree = (TTree*)SB_OnPeak->Get("latino"); TChain *S_Chain = new TChain("latino"); TChain *C_Chain = new TChain("latino"); TChain *SCI_Chain = new TChain("latino"); TChain *qqWW_Chain = new TChain("latino"); S_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_SigOnPeak_8TeV.root"); S_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_SigShoulder_8TeV.root"); S_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_SigTail_8TeV.root"); SCI_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntOnPeak_8TeV.root"); SCI_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntShoulder_8TeV.root"); SCI_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntTail_8TeV.root"); C_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw25_CotHead_8TeV.root"); C_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw25_CotTail_8TeV.root"); qqWW_Chain->Add("/afs/cern.ch/user/m/maiko/work/public/Tree/tree_skim_wwmin/nominals/latino_000_WWJets2LMad.root"); // --- Register the training and test trees // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( S_Chain ); factory->AddBackgroundTree( qqWW_Chain ); factory->AddBackgroundTree( C_Chain ); // Classification training and test data in ROOT tree format with signal and background events being located in the same tree //factory->SetInputTrees(SCI_Chain, GenOffCut, GenOnCut); // To give different trees for training and testing, do as follows: // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); factory->SetWeightExpression ("2.1*puW*baseW*effW*triggW*19.468"); //factory->SetSignalWeightExpression ("2.1*puW*baseW*effW*triggW*19.468"); //factory->SetBackgroundWeightExpression("puW*baseW*effW*triggW*19.468"); //factory->PrepareTrainingAndTestTree( ChanCommOff, // "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=None:!V" ); //"nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"; factory->PrepareTrainingAndTestTree( ChanCommOff0J, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=None:!V" ); // ---- Book MVA methods // // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:V:NTrees=850:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); //"!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // ----------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // ---- STILL EXPERIMENTAL and only implemented for BDT's ! // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","FitGA"); // ----------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros //if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVAClassificationElecTau(std::string ordering_ = "Pt", std::string bkg_ = "qqH115vsWZttQCD") { TMVA::Tools::Instance(); TString outfileName( "TMVAElecTau"+ordering_+"Ord_"+bkg_+".root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationElecTau"+ordering_+"Ord_"+bkg_, outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" ); factory->AddVariable( "pt1", "pT-tag1", "GeV/c" , 'F' ); factory->AddVariable( "pt2", "pT-tag2", "GeV/c" , 'F' ); factory->AddVariable( "Deta","|y-tag1 - y-tag2|","" , 'F' ); //factory->AddVariable( "opposite:=abs(eta1*eta2)/eta1/eta2","sign1*sign2","" , 'F' ); //factory->AddVariable( "Dphi", "#Delta#phi" ,"" , 'F' ); factory->AddVariable( "Mjj", "M(tag1,tag2)", "GeV/c^{2}" , 'F' ); factory->AddSpectator( "eta1", "#eta_{tag1}" , 'F' ); factory->AddSpectator( "eta2", "#eta_{tag2}" , 'F' ); factory->SetWeightExpression( "sampleWeight" ); TString fSignalName = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleVBFH115-powheg-PUS1_Open_ElecTauStream.root"; TString fBackgroundNameDYJets = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleZjets-alpgen-PUS1_Open_ElecTauStream.root"; TString fBackgroundNameWJets = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleWJets-madgraph-PUS1_Open_ElecTauStream.root"; TString fBackgroundNameQCD = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleQCD_Open_ElecTauStream.root"; TString fBackgroundNameTTbar = "/data_CMS/cms/lbianchini/VbfJetsStudy/OpenNtuples/ElecTauStream2011/nTupleTTJets-madgraph-PUS1_Open_ElecTauStream.root"; TFile *fSignal(0); TFile *fBackgroundDYJets(0); TFile *fBackgroundWJets(0); TFile *fBackgroundQCD(0); TFile *fBackgroundTTbar(0); fSignal = TFile::Open( fSignalName ); fBackgroundDYJets = TFile::Open( fBackgroundNameDYJets ); fBackgroundWJets = TFile::Open( fBackgroundNameWJets ); fBackgroundQCD = TFile::Open( fBackgroundNameQCD ); fBackgroundTTbar = TFile::Open( fBackgroundNameTTbar ); if(!fSignal || !fBackgroundDYJets || !fBackgroundWJets || !fBackgroundQCD || !fBackgroundTTbar) { std::cout << "ERROR: could not open files" << std::endl; exit(1); } TString tree = "outTree"+ordering_+"Ord"; TCut mycuts = ""; TCut mycutb = ""; TCut cutA = "pt1>0 && tightestHPSWP>0"; TCut cutB = "pt1>0 && combRelIsoLeg1<0.1"; TCut cutBl = "pt1>0 && combRelIsoLeg1<0.3"; TCut cutC = "pt1>0 && diTauCharge==0"; TCut cutD = "pt1>0 && MtLeg1<40"; // select events for training TFile* dummy = new TFile("dummy.root","RECREATE"); TH1F* allEvents = new TH1F("allEvents","",1,-10,10); float totalEvents, cutEvents; // signal: all TTree *signal = ((TTree*)(fSignal->Get(tree)))->CopyTree(cutA&&cutB&&cutC&&cutD); cout << "Copied signal tree with full selection: " << ((TTree*)(fSignal->Get(tree)))->GetEntries() << " --> " << signal->GetEntries() << endl; allEvents->Reset(); signal->Draw("eta1>>allEvents","sampleWeight"); cutEvents = allEvents->Integral(); Double_t signalWeight = 1.0; cout << "Signal: expected yield " << cutEvents << " -- weight " << signalWeight << endl; // Z+jets: all TTree *backgroundDYJets = ((TTree*)(fBackgroundDYJets->Get(tree)))->CopyTree(cutA&&cutB&&cutC&&cutD); cout << "Copied DYJets tree with full selection: " << ((TTree*)(fBackgroundDYJets->Get(tree)))->GetEntries() << " --> " << backgroundDYJets->GetEntries() << endl; allEvents->Reset(); backgroundDYJets->Draw("eta1>>allEvents","sampleWeight"); cutEvents = allEvents->Integral(); Double_t backgroundDYJetsWeight = 1.0; cout << "ZJets: expected yield " << cutEvents << " -- weight " << backgroundDYJetsWeight << endl; // W+jets: iso+Mt TTree *backgroundWJets = ((TTree*)(fBackgroundWJets->Get(tree)))->CopyTree(cutB&&cutD); cout << "Copied WJets tree with iso+Mt selection: " << ((TTree*)(fBackgroundWJets->Get(tree)))->GetEntries() << " --> " << backgroundWJets->GetEntries() << endl; allEvents->Reset(); backgroundWJets->Draw("eta1>>allEvents","sampleWeight"); totalEvents = allEvents->Integral(); allEvents->Reset(); backgroundWJets->Draw("eta1>>allEvents","sampleWeight*(tightestHPSWP>0 && diTauCharge==0)"); cutEvents = allEvents->Integral(); Double_t backgroundWJetsWeight = cutEvents / totalEvents; cout << "WJets: expected yield " << cutEvents << " -- weight " << backgroundWJetsWeight << endl; // QCD: Mt+loose iso TTree *backgroundQCD = ((TTree*)(fBackgroundQCD->Get(tree)))->CopyTree(cutD&&cutBl); cout << "Copied QCD tree with Mt selection: " << ((TTree*)(fBackgroundQCD->Get(tree)))->GetEntries() << " --> " << backgroundQCD->GetEntries() << endl; allEvents->Reset(); backgroundQCD->Draw("eta1>>allEvents","sampleWeight"); totalEvents = allEvents->Integral(); allEvents->Reset(); backgroundQCD->Draw("eta1>>allEvents","sampleWeight*(tightestHPSWP>0 && diTauCharge==0 && combRelIsoLeg1<0.1)"); cutEvents = allEvents->Integral(); Double_t backgroundQCDWeight = cutEvents / totalEvents; cout << "QCD: expected yield " << cutEvents << " -- weight " << backgroundQCDWeight << endl; // TTbar: iso+Mt TTree *backgroundTTbar = ((TTree*)(fBackgroundTTbar->Get(tree)))->CopyTree(cutB&&cutD); cout << "Copied TTbar tree with iso+Mt selection: " << ((TTree*)(fBackgroundTTbar->Get(tree)))->GetEntries() << " --> " << backgroundTTbar->GetEntries() << endl; allEvents->Reset(); backgroundTTbar->Draw("eta1>>allEvents","sampleWeight"); totalEvents = allEvents->Integral(); allEvents->Reset(); backgroundTTbar->Draw("eta1>>allEvents","sampleWeight*(tightestHPSWP>0 && diTauCharge==0)"); cutEvents = allEvents->Integral(); Double_t backgroundTTbarWeight = cutEvents / totalEvents; cout << "TTbar: expected yield " << cutEvents << " -- weight " << backgroundTTbarWeight << endl; delete allEvents; factory->AddSignalTree ( signal, signalWeight ); //factory->AddBackgroundTree( backgroundDYJets, backgroundDYJetsWeight ); //factory->AddBackgroundTree( backgroundWJets, backgroundWJetsWeight ); factory->AddBackgroundTree( backgroundQCD, backgroundQCDWeight ); //factory->AddBackgroundTree( backgroundTTbar, backgroundTTbarWeight ); factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=1:nTest_Background=1:SplitMode=Random:NormMode=NumEvents:!V" ); factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=GA:EffSel:CutRangeMin[0]=25.:CutRangeMax[0]=999:CutRangeMin[1]=25.:CutRangeMax[1]=999.:CutRangeMin[2]=1.0:CutRangeMax[2]=9.:CutRangeMin[3]=100:CutRangeMax[3]=7000:VarProp=FSmart" ); /* factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=200:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); */ factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; //if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVAClassificationCategory() { //--------------------------------------------------------------- std::cout << std::endl << "==> Start TMVAClassificationCategory" << std::endl; bool batchMode(false); // Create a new root output file. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string std::string factoryOptions( "!V:!Silent:Transformations=I;D;P;G,D" ); if (batchMode) factoryOptions += ":!Color:!DrawProgressBar"; TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationCategory", outputFile, factoryOptions ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory->AddVariable( "var1", 'F' ); factory->AddVariable( "var2", 'F' ); factory->AddVariable( "var3", 'F' ); factory->AddVariable( "var4", 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables factory->AddSpectator( "eta" ); // load the signal and background event samples from ROOT trees TFile *input(0); TString fname( "" ); if (UseOffsetMethod) fname = "../execs/data/toy_sigbkg_categ_offset.root"; else fname = "../execs/data/toy_sigbkg_categ_varoff.root"; if (!gSystem->AccessPathName( fname )) { // first we try to find tmva_example.root in the local directory std::cout << "--- TMVAClassificationCategory: Accessing " << fname << std::endl; input = TFile::Open( fname ); } if (!input) { std::cout << "ERROR: could not open data file: " << fname << std::endl; exit(1); } TTree *signal = (TTree*)input->Get("TreeS"); TTree *background = (TTree*)input->Get("TreeB"); /// global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; /// you can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // Fisher discriminant factory->BookMethod( TMVA::Types::kFisher, "Fisher", "!H:!V:Fisher" ); // Likelihood factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Categorised classifier TMVA::MethodCategory* mcat = 0; // the variable sets TString theCat1Vars = "var1:var2:var3:var4"; TString theCat2Vars = (UseOffsetMethod ? "var1:var2:var3:var4" : "var1:var2:var3"); // the Fisher TMVA::MethodBase* fiCat = factory->BookMethod( TMVA::Types::kCategory, "FisherCat","" ); mcat = dynamic_cast<TMVA::MethodCategory*>(fiCat); mcat->AddMethod("abs(eta)<=1.3",theCat1Vars, TMVA::Types::kFisher,"Category_Fisher_1","!H:!V:Fisher"); mcat->AddMethod("abs(eta)>1.3", theCat2Vars, TMVA::Types::kFisher,"Category_Fisher_2","!H:!V:Fisher"); // the Likelihood TMVA::MethodBase* liCat = factory->BookMethod( TMVA::Types::kCategory, "LikelihoodCat","" ); mcat = dynamic_cast<TMVA::MethodCategory*>(liCat); mcat->AddMethod("abs(eta)<=1.3",theCat1Vars, TMVA::Types::kLikelihood,"Category_Likelihood_1","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50"); mcat->AddMethod("abs(eta)>1.3", theCat2Vars, TMVA::Types::kLikelihood,"Category_Likelihood_2","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50"); // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassificationCategory is done!" << std::endl; // Clean up delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
// Analysis_Lifetime::Setup() Analysis::RetType Analysis_Lifetime::Setup(ArgList& analyzeArgs, DataSetList* datasetlist, TopologyList* PFLin, DataFileList* DFLin, int debugIn) { // Get Keywords FileName outfileName( analyzeArgs.GetStringKey("out") ); std::string setname = analyzeArgs.GetStringKey("name"); bool sortSets = (!analyzeArgs.hasKey("nosort")); windowSize_ = analyzeArgs.getKeyInt("window", -1); averageonly_ = analyzeArgs.hasKey("averageonly"); cumulative_ = analyzeArgs.hasKey("cumulative"); deltaAvg_ = analyzeArgs.hasKey("delta"); cut_ = analyzeArgs.getKeyDouble("cut", 0.5); fuzzCut_ = analyzeArgs.getKeyInt("fuzz", -1); if (fuzzCut_ < 1) fuzzCut_ = -1; normalizeCurves_ = !analyzeArgs.hasKey("rawcurve"); if (analyzeArgs.hasKey("greater")) Compare_ = Compare_GreaterThan; else if (analyzeArgs.hasKey("less")) Compare_ = Compare_LessThan; else Compare_ = Compare_GreaterThan; // Select datasets from remaining args if (inputDsets_.AddSetsFromArgs( analyzeArgs.RemainingArgs(), *datasetlist )) { mprinterr("Error: lifetime: Could not add data sets.\n"); return Analysis::ERR; } // Sort data sets if (sortSets) inputDsets_.SortArray1D(); // Create output datasets DataFile* outfile = 0; DataFile* maxfile = 0; DataFile* avgfile = 0; if (setname.empty()) setname = datasetlist->GenerateDefaultName( "lifetime" ); if ( windowSize_ != -1) { outfile = DFLin->AddDataFile(outfileName, analyzeArgs); if (!averageonly_ && outfile != 0) { maxfile = DFLin->AddDataFile(outfileName.DirPrefix() + "max." + outfileName.Base(), analyzeArgs); avgfile = DFLin->AddDataFile(outfileName.DirPrefix() + "avg." + outfileName.Base(), analyzeArgs); } int didx = 0; for (Array1D::const_iterator set = inputDsets_.begin(); set != inputDsets_.end(); ++set) { MetaData md(setname, didx); md.SetLegend( (*set)->Meta().Legend() ); DataSet_1D* outSet = (DataSet_1D*)datasetlist->AddSet( DataSet::FLOAT, md ); if (CheckDsetError(outSet, "output", (*set)->legend())) return Analysis::ERR; outputDsets_.push_back( outSet ); if (outfile != 0) outfile->AddDataSet( outSet ); if (!averageonly_) { // MAX md.SetAspect("max"); outSet = (DataSet_1D*)datasetlist->AddSet(DataSet::INTEGER, md); if (CheckDsetError(outSet, "lifetime max", (*set)->legend())) return Analysis::ERR; maxDsets_.push_back( outSet ); if (maxfile != 0) maxfile->AddDataSet( outSet ); // AVG md.SetAspect("avg"); outSet = (DataSet_1D*)datasetlist->AddSet(DataSet::FLOAT, md); if (CheckDsetError(outSet, "lifetime avg", (*set)->legend())) return Analysis::ERR; avgDsets_.push_back( outSet ); if (avgfile != 0) avgfile->AddDataSet( outSet ); } ++didx; } // Set step to window size. std::string fileArgs = "xstep " + integerToString( windowSize_ ); if (outfile != 0) outfile->ProcessArgs( fileArgs ); if (maxfile != 0) maxfile->ProcessArgs( fileArgs ); if (avgfile != 0) avgfile->ProcessArgs( fileArgs ); } // Lifetime curves DataFile* crvfile = 0; if (!averageonly_) { if (!outfileName.empty()) { crvfile = DFLin->AddDataFile(outfileName.DirPrefix() + "crv." + outfileName.Base(), analyzeArgs); } MetaData md(setname, "curve"); for (int didx = 0; didx != (int)inputDsets_.size(); didx++) { md.SetIdx(didx); DataSet_1D* outSet = (DataSet_1D*)datasetlist->AddSet(DataSet::DOUBLE, md); if (CheckDsetError(outSet, "lifetime curve", inputDsets_[didx]->legend())) return Analysis::ERR; curveSets_.push_back( outSet ); if (crvfile != 0) crvfile->AddDataSet( outSet ); } } // Non-window output file if (!averageonly_ && windowSize_ == -1) { standalone_ = DFLin->AddCpptrajFile( outfileName, "Lifetimes", DataFileList::TEXT, true ); if (standalone_ == 0) return Analysis::ERR; } else standalone_ = 0; if (!averageonly_) mprintf(" LIFETIME: Calculating average lifetime using a cutoff of %f", cut_); else mprintf(" LIFETIME: Calculating only averages"); mprintf(" of data in %i sets\n", inputDsets_.size()); if (!sortSets) mprintf("\tInput data sets will not be sorted.\n"); if (debugIn > 0) for (Array1D::const_iterator set = inputDsets_.begin(); set != inputDsets_.end(); ++set) mprintf("\t%s\n", (*set)->legend()); if (Compare_ == Compare_GreaterThan) mprintf("\tValues greater than %f are considered present.\n", cut_); else mprintf("\tValues less than %f are considered present.\n", cut_); if (windowSize_ != -1) { mprintf("\tAverage of data over windows will be saved to sets named %s\n", setname.c_str()); mprintf("\tWindow size for averaging: %i\n", windowSize_); if (cumulative_) mprintf("\tCumulative averages will be saved.\n"); if (deltaAvg_) mprintf("\tChange of average from previous average will be saved.\n"); } if (outfile != 0) { mprintf("\tOutfile: %s", outfile->DataFilename().full()); if (!averageonly_ && outfile != 0) mprintf(", %s, %s", maxfile->DataFilename().base(), avgfile->DataFilename().base()); mprintf("\n"); } if (!averageonly_) { if (crvfile != 0) mprintf("\tLifetime curves output: %s\n", crvfile->DataFilename().base()); if (normalizeCurves_) mprintf("\tLifetime curves will be normalized.\n"); else mprintf("\tLifetime curves will not be normalized.\n"); } if (fuzzCut_ != -1) mprintf("\tFuzz value of %i frames will be used.\n", fuzzCut_); return Analysis::OK; }
void TMVAClassification( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // to get access to the GUI and all tmva macros //TString thisdir = gSystem->DirName(gInterpreter->GetCurrentMacroName()); //gROOT->SetMacroPath(thisdir + ":" + gROOT->GetMacroPath()); //gROOT->ProcessLine(".L TMVAGui.C"); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 0; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 1; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 0; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] // factory->AddVariable( "myvar1 := var1+var2", 'F' ); // factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ); // factory->AddVariable( "var3", "Variable 3", "units", 'F' ); // factory->AddVariable( "var4", "Variable 4", "units", 'F' ); factory->AddVariable( "pho_ecalClusterIsoR4", "pho_ecalClusterIsoR4", "units", 'F' ); factory->AddVariable( "pho_hcalRechitIsoR4", "pho_hcalRechitIsoR4", "units", 'F' ); factory->AddVariable( "pho_trackIsoR4PtCut20", "pho_trackIsoR4PtCut20", "units", 'F' ); factory->AddVariable( "phoHoverE", "phoHoverE", "units", 'F' ); factory->AddVariable( "phoSigmaIEtaIEta_2012", "phoSigmaIEtaIEta_2012", "units", 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables // factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); // factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) // TString fname = "./tmva_class_example.root"; TString fname = "/net/hisrv0001/home/juliusbl/alex/cut/cutTree.root"; if (gSystem->AccessPathName( fname )) // file does not exist in local directory gSystem->Exec("curl -O http://root.cern.ch/files/tmva_class_example.root"); TFile *input = TFile::Open( fname ); std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl; // --- Register the training and test trees TTree *signal = (TTree*)input->Get("cutT"); TTree *background = (TTree*)input->Get("cutT"); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); // To give different trees for training and testing, do as follows: // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // // // --- begin ---------------------------------------------------------- // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables // Float_t treevars[4], weight; // // // Signal // for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<signal->GetEntries(); i++) { // signal->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight ); // else factory->AddSignalTestEvent ( vars, signalWeight ); // } // // // Background (has event weights) // background->SetBranchAddress( "weight", &weight ); // for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<background->GetEntries(); i++) { // background->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight ); // else factory->AddBackgroundTestEvent ( vars, backgroundWeight*weight ); // } // --- end ------------------------------------------------------------ // // --- end of tree registration // Set individual event weights (the variables must exist in the original TTree) // for signal : factory->SetSignalWeightExpression ("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); // factory->SetBackgroundWeightExpression( "weight" ); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = "subid==0"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = "subid==1"; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // ---- STILL EXPERIMENTAL and only implemented for BDT's ! // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","FitGA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros //if (!gROOT->IsBatch()) // gROOT->ProcessLine(TString::Format("TMVAGui(\"%s\")", outfileName.Data())); // efficiencies( TString fin = "TMVA.root", Int_t type = 2, Bool_t useTMVAStyle = kTRUE ); }
void testBDT(){ //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); /* TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "testBDT", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // global event weights per tree (see below for setting event-wise weights) //Double_t signalWeight = 0.003582; //Double_t backgroundWeight = 0.0269; Double_t signalWeight = 1; Double_t backgroundWeight = 1; TFile *input_sig = TFile::Open( "signal_exclusif.root" ); TFile *input_wz = TFile::Open( "bruit_w_z.root" ); TTree *signal = (TTree*)input_sig->Get("tree"); TTree *background = (TTree*)input_wz->Get("tree"); // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); factory->AddVariable("PT_z" , 'F'); factory->AddVariable("ASYM" , 'F'); factory->AddVariable("PHI_lw_b", 'F'); factory->AddVariable("M_top", 'F'); */ TString outfileName( "bdtTMVA_FCNC_tZ.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "doBDT_FCNC_tZ", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // global event weights per tree (see below for setting event-wise weights) //Double_t signalWeight = 0.003582; //Double_t backgroundWeight = 0.0269; Double_t signalWeight = 1; Double_t backgroundWeight = 1; TFile *input_sig = TFile::Open( "proof.root" ); TFile *input_wz = TFile::Open( "proof.root" ); TTree *signal = (TTree*)input_sig->Get("Ttree_FCNCkut"); TTree *background_WZ = (TTree*)input_wz->Get("Ttree_WZ"); /*TTree *background_ZZ = (TTree*)input_wz->Get("Ttree_ZZ"); TTree *background_WW = (TTree*)input_wz->Get("Ttree_WW"); TTree *background_TTbar = (TTree*)input_wz->Get("Ttree_TTbar"); TTree *background_Zjets = (TTree*)input_wz->Get("Ttree_Zjets"); TTree *background_Wjets = (TTree*)input_wz->Get("Ttree_Wjets"); TTree *background_TtW = (TTree*)input_wz->Get("Ttree_TtW"); TTree *background_TbartW = (TTree*)input_wz->Get("Ttree_TbartW");*/ // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background_WZ, backgroundWeight ); /*factory->AddBackgroundTree( background_ZZ, backgroundWeight ); factory->AddBackgroundTree( background_WW, backgroundWeight ); factory->AddBackgroundTree( background_TTbar, backgroundWeight ); factory->AddBackgroundTree( background_Zjets, backgroundWeight ); factory->AddBackgroundTree( background_Wjets, backgroundWeight ); factory->AddBackgroundTree( background_TtW, backgroundWeight ); factory->AddBackgroundTree( background_TbartW, backgroundWeight );*/ factory->AddVariable("tree_topMass", 'F'); factory->AddVariable("tree_deltaPhilb", 'F'); factory->AddVariable("tree_asym", 'F'); factory->AddVariable("tree_Zpt", 'F'); // to set weights. The variable must exist in the tree // for signal : factory->SetSignalWeightExpression ("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:nEventsMin=100:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVAClassification( TString fname = "./tmva_class_example.root") { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // to get access to the GUI and all tmva macros TString tmva_dir(TString(gRootDir) + "/tmva"); if(gSystem->Getenv("TMVASYS")) tmva_dir = TString(gSystem->Getenv("TMVASYS")); gROOT->SetMacroPath(tmva_dir + "/test/:" + gROOT->GetMacroPath() ); gROOT->ProcessLine(".L TMVAGui.C"); // Default MVA methods to be trained + tested std::map<std::string,int> Use; Use["KNN"] = 1; // k-nearest neighbour method // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory->AddVariable( "pt_eH", 'D' ); factory->AddVariable( "max(pt_jet_eH,pt_eH)", 'D' ); factory->AddVariable( "njets", 'I' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables /// factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); /// factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) TFile *input(0); if (gSystem->AccessPathName( fname )){ // file does not exist in local directory gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root"); fname = "./tmva_class_example.root"; }else{ input= TFile::Open( fname ); } if (!input) { std::cout << "ERROR: could not open data file " << fname << std::endl; exit(1); } std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl; // --- Register the training and test trees TTree *inputTree = (TTree*)input->Get("FakeTreeSig"); TTree *background = (TTree*)input->Get("FakeTreeBG"); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // cuts for signal and background //~ TCut signalCut = "selected==1 && id_iso_eleH==1"; //~ TCut backgroundCut = "selected==1 && id_iso_eleH==0"; //~ //~ std::cout << " THe signal cut is " << signalCut.GetTitle() << " bg cut is " << backgroundCut.GetTitle() << std::endl; Int_t num_pass = inputTree->GetEntries(); Int_t num_fail = background->GetEntries(); std::cout << num_pass << " " << num_fail << std::endl; // You can add an arbitrary number of signal or background trees factory->AddSignalTree ( inputTree, 1.0 ); factory->AddBackgroundTree( background, 1.0 ); factory->SetWeightExpression( "weight" ); //factory->SetInputTrees( inputTree, signalCut, backgroundCut ); // To give different trees for training and testing, do as follows: // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // // // --- begin ---------------------------------------------------------- // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables // Float_t treevars[4], weight; // // // Signal // for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<signal->GetEntries(); i++) { // signal->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight ); // else factory->AddSignalTestEvent ( vars, signalWeight ); // } // // // Background (has event weights) // background->SetBranchAddress( "weight", &weight ); // for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<background->GetEntries(); i++) { // background->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight ); // else factory->AddBackgroundTestEvent ( vars, backgroundWeight*weight ); // } // --- end ------------------------------------------------------------ // // --- end of tree registration // Set individual event weights (the variables must exist in the original TTree) // for signal : factory->SetSignalWeightExpression ("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = "selected==1"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = "selected==1"; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); std::stringstream indexes; indexes.str(""); indexes << "nTrain_Signal=" << num_pass << ":nTrain_Background=" << num_fail << ":SplitMode=Random:NormMode=None:!V"; std::string input_opt=indexes.str(); std::cout << "Options are " << input_opt << std::endl; factory->PrepareTrainingAndTestTree( mycuts, mycutb, input_opt); //"nTrain_Signal="+num_pass+":nTrain_Background="+num_fail+":SplitMode=Random:NormMode=None:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=50:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","GA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events //factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs // factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
TString autoencoder (std::string inputFileName) { std::string tmstr (now ()); TString tmstmp (tmstr.c_str ()); std::cout << "==> Start Autoencoder " << std::endl; std::cout << "-------------------- open input file ---------------- " << std::endl; TString fname = pathToData + TString (inputFileName.c_str ()) + TString (".root"); TFile *input = TFile::Open( fname ); std::cout << "-------------------- get tree ---------------- " << std::endl; TTree *tree = (TTree*)input->Get("data"); TString outfileName( "TMVAAutoEnc__" ); outfileName += TString (inputFileName.c_str ()) + TString ("__") + tmstmp + TString (".root"); std::cout << "-------------------- open output file ---------------- " << std::endl; TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); std::cout << "-------------------- prepare factory ---------------- " << std::endl; TMVA::Factory *factory = new TMVA::Factory( "TMVAAutoencoder", outputFile, "AnalysisType=Regression:Color:DrawProgressBar" ); std::cout << "-------------------- add variables ---------------- " << std::endl; for (auto varname : variableNames+additionalVariableNames) { factory->AddVariable (varname.c_str (), 'F'); factory->AddTarget (varname.c_str (), 'F'); } std::cout << "-------------------- add tree ---------------- " << std::endl; // global event weights per tree (see below for setting event-wise weights) Double_t regWeight = 1.0; factory->AddRegressionTree (tree, regWeight); std::cout << "-------------------- prepare ---------------- " << std::endl; TCut mycut = ""; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1"; factory->PrepareTrainingAndTestTree( mycut, "nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents:!V" ); /* // This would set individual event weights (the variables defined in the */ /* // expression need to exist in the original TTree) */ /* factory->SetWeightExpression( "var1", "Regression" ); */ if (true) { TString layoutString ("Layout=TANH|100,TANH|20,TANH|40,LINEAR"); TString training0 ("LearningRate=1e-5,Momentum=0.5,Repetitions=1,ConvergenceSteps=500,BatchSize=50,TestRepetitions=7,WeightDecay=0.01,Regularization=NONE,DropConfig=0.5+0.5+0.5+0.5,DropRepetitions=2"); TString training1 ("LearningRate=1e-5,Momentum=0.9,Repetitions=1,ConvergenceSteps=500,BatchSize=30,TestRepetitions=7,WeightDecay=0.01,Regularization=L2,DropConfig=0.1+0.1+0.1,DropRepetitions=1"); TString training2 ("LearningRate=1e-4,Momentum=0.3,Repetitions=1,ConvergenceSteps=10,BatchSize=40,TestRepetitions=7,WeightDecay=0.1,Regularization=L2"); TString training3 ("LearningRate=1e-5,Momentum=0.1,Repetitions=1,ConvergenceSteps=10,BatchSize=10,TestRepetitions=7,WeightDecay=0.001,Regularization=NONE"); TString trainingStrategyString ("TrainingStrategy="); trainingStrategyString += training0 + "|" + training1 + "|" + training2 ; //+ "|" + training3; // TString trainingStrategyString ("TrainingStrategy=LearningRate=1e-1,Momentum=0.3,Repetitions=3,ConvergenceSteps=20,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,L1=false,DropFraction=0.0,DropRepetitions=5"); TString nnOptions ("!H:V:ErrorStrategy=SUMOFSQUARES:VarTransform=N:WeightInitialization=XAVIERUNIFORM"); // TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CHECKGRADIENTS"); nnOptions.Append (":"); nnOptions.Append (layoutString); nnOptions.Append (":"); nnOptions.Append (trainingStrategyString); factory->BookMethod( TMVA::Types::kNN, TString("NN_")+tmstmp, nnOptions ); // NN } // -------------------------------------------------------------------------------------------------- factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); outputFile->Close(); // TMVA::TMVARegGui (outfileName); delete factory; return TString("NN_")+tmstmp; }