void TMVAClassification() { TMVA::Tools::Instance(); std::cout << "==> Start TMVAClassification" << std::endl; TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "AnalysisType=Classification" ); factory->AddVariable("LifeTime", 'F'); factory->AddVariable("FlightDistance", 'F'); factory->AddVariable("pt", 'F'); TString fname = "../tau_data/training.root"; TFile *input = TFile::Open( fname ); TTree *tree = (TTree*)input->Get("data"); factory->AddTree(tree, "Signal", 1., "signal == 1", "Training"); factory->AddTree(tree, "Signal", 1., "signal == 1", "Test"); factory->AddTree(tree, "Background", 1., "signal == 0", "Training"); factory->AddTree(tree, "Background", 1., "signal == 0", "Test"); // gradient boosting training factory->BookMethod(TMVA::Types::kBDT, "GBDT", "NTrees=40:BoostType=Grad:Shrinkage=0.01:MaxDepth=7:UseNvars=6:nCuts=20:MinNodeSize=10"); factory->TrainAllMethods(); input->Close(); outputFile->Close(); delete factory; }
void TMVAClassification( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 1; Use["CutsD"] = 1; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 1; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 1; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 1; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 1; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 1; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 1; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 1; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 1; // // --- Boosted Decision Trees Use["BDT"] = 1; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 1; // --------------------------------------------------------------- (TMVA::gConfig().GetVariablePlotting()).fMaxNumOfAllowedVariablesForScatterPlots = 20; std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins //FILEOUT // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA_2GeV_barrel2.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string // TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, // "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); TMVA::Factory* factory = new TMVA::Factory("TMVAMulticlass_2GeV_barrel2",outputFile, "!V:!Silent:Color:!DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=multiclass" ); TFile* file = TFile::Open("/scratchfs/higgs/yudan/SingleParticle/Reco//all_2GeV/PID_2GeV_E30L_E10mm_H48L_H10mm_2GeV.root"); if (!file->IsOpen()) std::cout << "could not open file" <<std::endl; TTree* treePi = (TTree*)file->Get("Pion"); if (treePi == 0) std::cout << "could not open tree" <<std::endl; TTree* treeMu = (TTree*)file->Get("Muon"); if (treeMu == 0) std::cout << "could not open tree" <<std::endl; TTree* treeE = (TTree*)file->Get("Electron"); if (treeE == 0) std::cout << "could not open tree" <<std::endl; // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] // TCut mycuts = "NTrk==1 && MCPP[2]/MCPEn<0.7"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; factory->AddTree( treePi, TString("Pi"), 1. ,"abs(cosTheta)>0.3&&abs(cosTheta)<0.55"); factory->AddTree( treeMu, TString("Mu"), 1.,"abs(cosTheta)>0.3&&abs(cosTheta)<0.55"); factory->AddTree( treeE, TString("E"), 1.,"abs(cosTheta)>0.3&&abs(cosTheta)<0.55"); factory->AddVariable("EcalNHit","EcalNHit", "units", 'I'); factory->AddVariable("HcalNHit","HcalNHit", "units", 'I'); factory->AddVariable("NLEcal","NLEcal", "units", 'I'); factory->AddVariable("NLHcal","NLHcal", "units", 'I'); factory->AddVariable("maxDisHtoL","maxDisHtoL", "units", 'F'); factory->AddVariable("avDisHtoL","avDisHtoL", "units", 'F'); factory->AddVariable("EE := EcalEn/EClu","EcalEn", "units", 'F'); factory->AddVariable("graDepth","graDepth", "units", 'F'); factory->AddVariable("cluDepth","cluDepth", "units", 'F'); factory->AddVariable("minDepth","minDepth", "units", 'F'); factory->AddVariable("MaxDisHel","MaxDisHel", "units", 'F'); factory->AddVariable("FD_all","FD_all", "units", 'F'); factory->AddVariable("FD_ECAL","FD_ECAL", "units", 'F'); factory->AddVariable("FD_HCAL","FD_HCAL", "units", 'F'); factory->AddVariable("E_10 := EEClu_L10/EcalEn","EEClu_L10", "units", 'F'); factory->AddVariable("E_R := EEClu_R/EcalEn","EEClu_R", "units", 'F'); factory->AddVariable("E_r := EEClu_r/EcalEn","EEClu_r", "units", 'F'); factory->AddVariable("rms_Hcal","rms_Hcal", "units", 'F'); factory->AddVariable("av_NHH","av_NHH", "units", 'F'); factory->AddVariable("AL_Ecal","AL_Ecal", "units", 'I'); factory->AddVariable("FD_ECALF10","FD_ECALF10", "units", 'F'); factory->AddVariable("FD_ECALL20","FD_ECALL20", "units", 'F'); factory->AddVariable("NH_ECALF10","NH_ECALF10", "units", 'I'); factory->AddVariable("dEdx","dEdx", "units", 'F'); std::cout << "--- TMVAClassification : Using input file: " << std::endl; // --- Register the training and test trees // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // You can add an arbitrary number of signal or background trees // To give different trees for training and testing, do as follows: // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // // // --- begin ---------------------------------------------------------- // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables // Float_t treevars[4], weight; // // // Signal // for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<signal->GetEntries(); i++) { // signal->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight ); // else factory->AddSignalTestEvent ( vars, signalWeight ); // } // // // Background (has event weights) // background->SetBranchAddress( "weight", &weight ); // for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<background->GetEntries(); i++) { // background->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight ); // else factory->AddBackgroundTestEvent ( vars, backgroundWeight*weight ); // } // --- end ------------------------------------------------------------ // // --- end of tree registration // Set individual event weights (the variables must exist in the original TTree) // for signal : factory->SetSignalWeightExpression ("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); //factory->SetBackgroundWeightExpression( "weight" ); // Apply additional cuts on the signal and background samples (can be different) // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); factory->PrepareTrainingAndTestTree( "", "SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.50:nCuts=20:NNodesMax=8" ); // "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:MaxDepth=2" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","FitGA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // // // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // // // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros // if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
int main(int argc, char** argv) { if(argc != 2) { std::cerr << ">>>>> analysis.cpp::usage: " << argv[0] << " configFileName MVAconfigFileName" << std::endl ; return 1; } // Parse the config file parseConfigFile (argv[1]) ; std::string treeName = gConfigParser -> readStringOption("Input::treeName"); std::string fileSamples = gConfigParser -> readStringOption("Input::fileSamples"); std::string inputDirectory = gConfigParser -> readStringOption("Input::inputDirectory"); std::string inputBeginningFile = "out_NtupleProducer_"; try { inputBeginningFile = gConfigParser -> readStringOption("Input::inputBeginningFile"); } catch (char const* exceptionString) { std::cerr << " exception = " << exceptionString << std::endl; } std::cout << ">>>>> Input::inputBeginningFile " << inputBeginningFile << std::endl; double LUMI = gConfigParser -> readDoubleOption("Options::Lumi"); std::vector<std::string> SignalName; SignalName = gConfigParser -> readStringListOption("Options::SignalName"); for (int iSignalSample=0; iSignalSample<SignalName.size(); iSignalSample++) { std::cout << " Signal[" << iSignalSample << "] = " << SignalName.at(iSignalSample) << std::endl; } std::string nameWeight = "1"; try { nameWeight = gConfigParser -> readStringOption("Options::nameWeight"); } catch (char const* exceptionString) { std::cerr << " exception = " << exceptionString << std::endl; } std::cout << ">>>>> Input::nameWeight " << nameWeight << std::endl; TTree *treeJetLepVect[200]; char *nameSample[1000]; char *nameHumanReadable[1000]; char* xsectionName[1000]; char nameFileIn[1000]; sprintf(nameFileIn,"%s",fileSamples.c_str()); int numberOfSamples = ReadFile(nameFileIn, nameSample, nameHumanReadable, xsectionName); double Normalization[1000]; double xsection[1000]; for (int iSample=0; iSample<numberOfSamples; iSample++) { xsection[iSample] = atof(xsectionName[iSample]); } for (int iSample=0; iSample<numberOfSamples; iSample++) { char nameFile[20000]; sprintf(nameFile,"%s/%s%s.root",inputDirectory.c_str(),inputBeginningFile.c_str(),nameSample[iSample]); TFile* f = new TFile(nameFile, "READ"); treeJetLepVect[iSample] = (TTree*) f->Get(treeName.c_str()); char nameTreeJetLep[100]; sprintf(nameTreeJetLep,"treeJetLep_%d",iSample); treeJetLepVect[iSample]->SetName(nameTreeJetLep); double XSection; XSection = xsection[iSample]; Normalization[iSample] = XSection * LUMI / 1000.; } //==== cut std::string CutFile = gConfigParser -> readStringOption("Selections::CutFile"); std::vector<std::string> vCut; std::cout << " nCuts = " << ReadFileCut(CutFile, vCut) << std::endl; std::string Cut; if (vCut.size() != 0) { Cut = vCut.at(0); } else { Cut = "1"; } //==== HiggsMass std::string HiggsMass = gConfigParser -> readStringOption("Options::HiggsMass"); //==== list of methods std::vector<std::string> vectorMyMethodList = gConfigParser -> readStringListOption("Options::MVAmethods"); TString myMethodList; for (int iMVA = 0; iMVA < vectorMyMethodList.size(); iMVA++) { if (iMVA == 0) myMethodList = Form ("%s",vectorMyMethodList.at(iMVA).c_str()); else myMethodList = Form ("%s,%s",myMethodList.Data(),vectorMyMethodList.at(iMVA).c_str()); } //==== output TString outfileName = gConfigParser -> readStringOption("Output::outFileName"); // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string,int> Use; Use["MLP"] = 1; Use["BDTG"] = 1; Use["FDA_GA"] = 0; Use["PDEFoam"] = 0; std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return 0; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a new root output file TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // TMVA::Factory *factory = new TMVA::Factory( "TMVAMulticlass", outputFile, "AnalysisType=multiclass:!V:!Silent:!V:Transformations=I;D" ); TMVA::Factory *factory = new TMVA::Factory( "TMVAMulticlass", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=multiclass" ); factory->AddVariable( "jetpt1" , 'F'); factory->AddVariable( "jetpt2" , 'F'); factory->AddVariable( "mjj" , 'F'); factory->AddVariable( "detajj" , 'F'); factory->AddVariable( "dphilljetjet" , 'F'); factory->AddVariable( "pt1" , 'F'); factory->AddVariable( "pt2" , 'F'); factory->AddVariable( "mll" , 'F'); factory->AddVariable( "dphill" , 'F'); factory->AddVariable( "mth" , 'F'); factory->AddVariable( "dphillmet" , 'F'); factory->AddVariable( "mpmet" , 'F'); factory->AddSpectator( "channel" , 'F'); for (int iSample=0; iSample<numberOfSamples; iSample++) { int numEnt = treeJetLepVect[iSample]->GetEntries(Cut.c_str()); std::cout << " Sample = " << nameSample[iSample] << " ~ " << nameHumanReadable[iSample] << " --> " << numEnt << std::endl; if (numEnt != 0) { if (iSample == 0) factory->AddTree( treeJetLepVect[iSample], "Signal", Normalization[iSample] ); else if (iSample == 1) factory->AddTree( treeJetLepVect[iSample], "Background", Normalization[iSample] ); else factory->AddTree( treeJetLepVect[iSample], TString(nameHumanReadable[iSample]), Normalization[iSample] ); // factory->AddTree( treeJetLepVect[iSample], TString(nameHumanReadable[iSample]), Normalization[iSample] ); // factory->AddTree( treeJetLepVect[iSample], TString(nameHumanReadable[iSample]), Normalization[iSample] , nameWeight.c_str()); // factory->AddTree( treeJetLepVect[iSample], TString(nameHumanReadable[iSample])); } } // for (int iSample=0; iSample<numberOfSamples; iSample++){ // int numEnt = treeJetLepVect[iSample]->GetEntries(Cut.c_str()); // std::cout << " Sample = " << nameSample[iSample] << " ~ " << nameHumanReadable[iSample] << " --> " << numEnt << std::endl; // if (numEnt != 0) { // bool isSig = false; // for (std::vector<std::string>::const_iterator itSig = SignalName.begin(); itSig != SignalName.end(); itSig++){ // if (nameHumanReadable[iSample] == *itSig) isSig = true; // } // if (isSig) { // factory->AddTree( treeJetLepVect[iSample], TString("Signal"), Normalization[iSample] ); //---> ci deve essere uno chiamato Signal! // } // else { // factory->AddTree( treeJetLepVect[iSample], TString(nameHumanReadable[iSample]), Normalization[iSample] ); // } // } // } // // for (int iSample=0; iSample<numberOfSamples; iSample++){ // int numEnt = treeJetLepVect[iSample]->GetEntries(Cut.c_str()); // std::cout << " Sample = " << nameSample[iSample] << " ~ " << nameHumanReadable[iSample] << " --> " << numEnt << std::endl; // if (numEnt != 0) { // bool isSig = false; // for (std::vector<std::string>::const_iterator itSig = SignalName.begin(); itSig != SignalName.end(); itSig++){ // if (nameHumanReadable[iSample] == *itSig) isSig = true; // } // if (isSig) { // // factory->AddTree( treeJetLepVect[iSample], TString("Signal"), Normalization[iSample] ); //---> ci deve essere uno chiamato Signal! // } // else { // factory->AddTree( treeJetLepVect[iSample], TString(nameHumanReadable[iSample]), Normalization[iSample] ); // } // } // } std::cerr << " AAAAAAAAAAAAAAAAAAAAAAAAAAAAA " << std::endl; TCut mycuts = Cut.c_str(); // factory->SetWeightExpression( nameWeight.c_str() ); // factory->SetBackgroundWeightExpression( nameWeight.c_str() ); // factory->SetSignalWeightExpression ( nameWeight.c_str() ); std::cerr << " BBBBBBBBBBBBBBBBBBBBBBBBBBBBB " << std::endl; factory->PrepareTrainingAndTestTree( mycuts ,"SplitMode=Random:NormMode=None:!V"); // factory->PrepareTrainingAndTestTree( "" ,"SplitMode=Random:NormMode=None:!V"); std::cerr << " CCCCCCCCCCCCCCCCCCCCCCCCCCCCC " << std::endl; // gradient boosted decision trees // if (Use["BDTG"]) factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.50:nCuts=20:NNodesMax=8"); if (Use["BDTG"]) factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=600:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.50:nCuts=20:NNodesMax=8"); // neural network if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:NCycles=1000:HiddenLayers=N+5,5:TestRate=5:EstimatorType=MSE"); // functional discriminant with GA minimizer if (Use["FDA_GA"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); // PDE-Foam approach if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); //==== Optimize parameters in MVA methods // factory->OptimizeAllMethods(); // factory->OptimizeAllMethods("ROCIntegral","Scan"); //==== Train MVAs using the set of training events ==== factory->TrainAllMethods(); //==== Evaluate all MVAs using the set of test events ==== factory->TestAllMethods(); //==== Evaluate and compare performance of all configured MVAs ==== factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAnalysis is done!" << std::endl; delete factory; //==== change position of weights file std::string toDo; toDo = "rm -r Weights-MVA-MultiClass/weights_" + HiggsMass + "_testVariables"; std::cerr << "toDo = " << toDo << std::endl; system (toDo.c_str()); toDo = "mv weights Weights-MVA-MultiClass/weights_" + HiggsMass + "_testVariables"; std::cerr << "toDo = " << toDo << std::endl; system (toDo.c_str()); // Launch the GUI for the root macros // if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVA_stop( TString signal_name = "T2tt", int train_region = 1, float x_parameter = 0.25) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVA_stop.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVA_stop.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //----------------------------------------------------- // define event selection (store in TCut sel) //----------------------------------------------------- TCut njets4("mini_njets>=4"); TCut met100("mini_met>=100"); TCut mt120("mini_mt>=120"); TCut nb1("mini_nb>=1"); TCut isotrk("mini_passisotrk==1"); TCut lep_pt30("mini_nlep>=1 && mini_lep1pt>30.0"); TCut sig("mini_sig==1"); TCut sel0 = njets4 + met100 + mt120 + nb1 + isotrk + lep_pt30 + sig; cout << "Using selection : " << sel0.GetTitle() << endl; cout << "Doing signal point : " << train_region << endl; //----------------------------------------------------- // choose which variables to include in MVA training //----------------------------------------------------- std::map<std::string,int> mvaVar; mvaVar[ "met" ] = 1; mvaVar[ "lep1pt" ] = 0; mvaVar[ "mt2w" ] = 1; mvaVar[ "htratiom" ] = 1; mvaVar[ "chi2" ] = 1; mvaVar[ "dphimjmin" ] = 1; mvaVar[ "pt_b" ] = 0; mvaVar[ "nb" ] = 0; mvaVar[ "pt_J1" ] = 0; mvaVar[ "pt_J2" ] = 0; mvaVar[ "rand" ] = 0; mvaVar[ "mt" ] = 0; mvaVar[ "mt2bl" ] = 0; mvaVar[ "mt2b" ] = 0; mvaVar[ "lep1eta" ] = 0; mvaVar[ "thrjetlm" ] = 0; mvaVar[ "apljetlm" ] = 0; mvaVar[ "sphjetlm" ] = 0; mvaVar[ "cirjetlm" ] = 0; mvaVar[ "chi2min" ] = 0; mvaVar[ "chi2min_mt2b" ] = 0; mvaVar[ "chi2min_mt2bl" ] = 0; mvaVar[ "chi2min_mt2w" ] = 0; mvaVar[ "mt2bmin" ] = 0; mvaVar[ "mt2blmin" ] = 0; mvaVar[ "mt2wmin_chi2" ] = 0; mvaVar[ "mt2bmin_chi2" ] = 0; mvaVar[ "mt2blmin_chi2" ] = 0; mvaVar[ "mt2wmin_chi2prob" ] = 0; mvaVar[ "mt2bmin_chi2prob" ] = 0; mvaVar[ "mt2blmin_chi2prob" ] = 0; mvaVar[ "htratiol" ] = 0; mvaVar[ "dphimj1" ] = 0; mvaVar[ "dphimj2" ] = 0; mvaVar[ "metsig" ] = 0; //--------------------------------- //choose bkg samples to include //--------------------------------- cout << "Background trees: " << endl; int n_backgrounds = 8; TString backgrounds[] = {"ttdl_powheg", "ttsl_powheg", "w1to4jets", "tW_lep", "triboson", "diboson", "ttV", "DY1to4Jtot" }; TString bkgPath = "/nfs-3/userdata/stop/Train/V00-02-18__V00-03-00_4jetsMET100_bkg/"; TChain* chBackground = new TChain("t"); for (int i = 0; i < n_backgrounds; i++) { TString backgroundChain = bkgPath + "/" + backgrounds[i] + ".root"; cout << " " << backgroundChain << endl; chBackground ->Add(backgroundChain ); } //--------------------------------- //choose signal sample to include //--------------------------------- cout << "Signal trees: " << endl; TString s_train_region = ""; s_train_region += train_region; TString s_x_parameter = ""; s_x_parameter = Form("%.2f",x_parameter); TString signalPath = "/nfs-3/userdata/stop/Train/"; TString signalVersion = "V00-02-18__V00-03-00_4jetsMET100_"; TChain *chSignal = new TChain("t"); TString base_name = signalPath + "/" + signalVersion + signal_name + "/" + signal_name + "_" + s_train_region; if (signal_name == "T2bw") base_name = base_name + "_" + s_x_parameter; TString signalChain = base_name + ".root" ; cout << " " << signalChain << endl; chSignal->Add(signalChain); //----------------------------------------------------- // choose backgrounds to include for multiple outputs //----------------------------------------------------- // bool doMultipleOutputs = false; // TChain *chww = new TChain("Events"); // chww->Add(Form("%s/WWTo2L2Nu_PU_testFinal_baby.root",babyPath)); // chww->Add(Form("%s/GluGluToWWTo4L_PU_testFinal_baby.root",babyPath)); // TChain *chwjets = new TChain("Events"); // chwjets->Add(Form("%s/WJetsToLNu_PU_testFinal_baby.root",babyPath)); // TChain *chtt = new TChain("Events"); // chtt->Add(Form("%s/TTJets_PU_testFinal_baby.root",babyPath)); // std::map<std::string,int> includeBkg; // includeBkg["ww"] = 1; // includeBkg["wjets"] = 0; // includeBkg["tt"] = 0; //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 0; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 0; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 1; // uses Adaptive Boost Use["BDT1"] = 0; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // // --- multi-output MVA's Use["multi_BDTG"] = 0; Use["multi_MLP"] = 0; Use["multi_FDA_GA"] = 0; // // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName = "TMVA_" + signal_name + "_" + s_train_region; if (signal_name == "T2bw") outfileName = outfileName +"_" + s_x_parameter; outfileName += ".root"; TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TString classification_name = "classification_" + signal_name + "_" + s_train_region; if (signal_name == "T2bw") classification_name = classification_name +"_" + s_x_parameter; /* TString multioutfileName( "TMVA_HWW_multi.root" ); TFile* multioutputFile; if( doMultipleOutputs ) multioutputFile = TFile::Open( multioutfileName, "RECREATE" ); */ // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( classification_name, outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); /* TMVA::Factory *multifactory; if( doMultipleOutputs ) multifactory= new TMVA::Factory( "TMVAMulticlass", multioutputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=multiclass" ); */ // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] //factory->AddVariable( "myvar1 := var1+var2", 'F' ); //factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ); //factory->AddVariable( "var3", "Variable 3", "units", 'F' ); //factory->AddVariable( "var4", "Variable 4", "units", 'F' ); //-------------------------------------------------------- // choose which variables to include in training //-------------------------------------------------------- if( mvaVar[ "met" ] == 1 ) factory->AddVariable( "mini_met" , "E_{T}^{miss}" , "GeV", 'F' ); if( mvaVar[ "mt" ] == 1 ) factory->AddVariable( "mini_mt" , "M_{T}" , "GeV", 'F' ); if( mvaVar[ "mt2w" ] == 1 ) factory->AddVariable( "mini_mt2w" , "MT2W" , "GeV", 'F' ); if( mvaVar[ "mt2bl" ] == 1 ) factory->AddVariable( "mini_mt2bl" , "MT2bl" , "GeV", 'F' ); if( mvaVar[ "mt2b" ] == 1 ) factory->AddVariable( "mini_mt2b" , "MT2b" , "GeV", 'F' ); if( mvaVar[ "chi2" ] == 1 ) factory->AddVariable( "mini_chi2" , "chi2" , "" , 'F' ); if( mvaVar[ "lep1pt" ] == 1 ) factory->AddVariable( "mini_lep1pt" , "lepton pt" , "" , 'F' ); if( mvaVar[ "lep1eta" ] == 1 ) factory->AddVariable( "mini_lep1eta" , "lepton eta" , "" , 'F' ); if( mvaVar[ "thrjetlm" ] == 1 ) factory->AddVariable( "mini_thrjetlm" , "thrust" , "" , 'F' ); if( mvaVar[ "apljetlm" ] == 1 ) factory->AddVariable( "mini_apljetlm" , "aplanarity" , "" , 'F' ); if( mvaVar[ "sphjetlm" ] == 1 ) factory->AddVariable( "mini_sphjetlm" , "sphericity" , "" , 'F' ); if( mvaVar[ "cirjetlm" ] == 1 ) factory->AddVariable( "mini_cirjetlm" , "circularity" , "" , 'F' ); if( mvaVar[ "chi2min" ] == 1 ) factory->AddVariable( "mini_min(chi2min,100)" , "#chi^{2}_{min}" , "" , 'F' ); if( mvaVar[ "chi2minprob" ] == 1 ) factory->AddVariable( "mini_chi2minprob" , "Prob(#chi^{2}_{min})" , "" , 'F' ); if( mvaVar[ "chi2min_mt2b" ] == 1 ) factory->AddVariable( "mini_chi2min_mt2b" , "MT2b(#chi^{2}_{min})" , "" , 'F' ); if( mvaVar[ "chi2min_mt2bl" ] == 1 ) factory->AddVariable( "mini_chi2min_mt2bl" , "MT2bl(#chi^{2}_{min})" , "" , 'F' ); if( mvaVar[ "chi2min_mt2w" ] == 1 ) factory->AddVariable( "mini_chi2min_mt2w" , "MT2W(#chi^{2}_{min})" , "" , 'F' ); if( mvaVar[ "mt2bmin" ] == 1 ) factory->AddVariable( "mini_mt2bmin" , "MT2b_{min}" , "" , 'F' ); if( mvaVar[ "mt2blmin" ] == 1 ) factory->AddVariable( "mini_mt2blmin" , "MT2bl_{min}" , "" , 'F' ); if( mvaVar[ "mt2wmin" ] == 1 ) factory->AddVariable( "mini_mt2wmin" , "MT2W_{min}" , "" , 'F' ); if( mvaVar[ "mt2bmin_chi2" ] == 1 ) factory->AddVariable( "min(mt2bmin_chi2,100)" , "#chi^{2}(MT2b_{min})" , "" , 'F' ); if( mvaVar[ "mt2blmin_chi2" ] == 1 ) factory->AddVariable( "min(mt2blmin_chi2,100)" , "#chi^{2}(MT2bl_{min})" , "" , 'F' ); if( mvaVar[ "mt2wmin_chi2" ] == 1 ) factory->AddVariable( "min(mt2wmin_chi2,100)" , "#chi^{2}(MT2W_{min})" , "" , 'F' ); if( mvaVar[ "mt2bmin_chi2prob" ] == 1 ) factory->AddVariable( "mt2bmin_chi2prob" , "Prob(#chi^{2}(MT2b_{min}))" , "" , 'F' ); if( mvaVar[ "mt2blmin_chi2prob" ] == 1 ) factory->AddVariable( "mt2blmin_chi2prob" , "Prob(#chi^{2}(MT2bl_{min}))" , "" , 'F' ); if( mvaVar[ "mt2wmin_chi2prob" ] == 1 ) factory->AddVariable( "mt2wmin_chi2prob" , "Prob(#chi^{2}(MT2W_{min}))" , "" , 'F' ); if( mvaVar[ "htratiol" ] == 1 ) factory->AddVariable( "mini_htssl/(mini_htosl+mini_htssl)" , "H_{T}^{SSL}/H_{T}" , "" , 'F' ); if( mvaVar[ "htratiom" ] == 1 ) factory->AddVariable( "mini_htssm/(mini_htosm+mini_htssm)" , "H_{T}^{SSM}/H_{T}" , "" , 'F' ); if( mvaVar[ "dphimj1" ] == 1 ) factory->AddVariable( "mini_dphimj1" , "#Delta#phi(j1,E_{T}^{miss})", "" , 'F' ); if( mvaVar[ "dphimj2" ] == 1 ) factory->AddVariable( "mini_dphimj2" , "#Delta#phi(j2,E_{T}^{miss})", "" , 'F' ); if( mvaVar[ "dphimjmin" ] == 1 ) factory->AddVariable( "mini_dphimjmin" , "min(#Delta#phi(j_{1,2},E_{T}^{miss}))", "" , 'F' ); if( mvaVar[ "rand" ] == 1 ) factory->AddVariable( "mini_rand" , "random(0,1)" , "" , 'F' ); if( mvaVar[ "metsig" ] == 1 ) factory->AddVariable( "met/sqrt(htosl+htssl)" , "E_{T}^{miss}/#sqrt{H_{T}}" , "#sqrt{GeV}" , 'F' ) ; if( mvaVar[ "pt_b" ] == 1 ) factory->AddVariable( "mini_pt_b" , "P_T(b) GeV" , 'F' ); if( mvaVar[ "nb" ] == 1 ) factory->AddVariable( "mini_nb" , "P_T(b) GeV" , 'F' ); if( mvaVar[ "pt_J1" ] == 1 ) factory->AddVariable( "pt_J1" , "P_T(J1) GeV" , 'F' ); if( mvaVar[ "pt_J2" ] == 1 ) factory->AddVariable( "pt_J2" , "P_T(J2) GeV" , 'F' ); /* if( doMultipleOutputs ){ if (mvaVar["lephard_pt"]) multifactory->AddVariable( "lephard_pt", "1st lepton pt", "GeV", 'F' ); if (mvaVar["lepsoft_pt"]) multifactory->AddVariable( "lepsoft_pt", "2nd lepton pt", "GeV", 'F' ); if (mvaVar["dil_dphi"]) multifactory->AddVariable( "dil_dphi", "dphi(ll)", "", 'F' ); if (mvaVar["dil_mass"]) multifactory->AddVariable( "dil_mass", "M(ll)", "GeV", 'F' ); if (mvaVar["event_type"]) multifactory->AddVariable( "event_type", "Dil Flavor Type", "", 'F' ); if (mvaVar["met_projpt"]) multifactory->AddVariable( "met_projpt", "Proj. MET", "GeV", 'F' ); if (mvaVar["met_pt"]) multifactory->AddVariable( "met_pt", "MET", "GeV", 'F' ); if (mvaVar["mt_lephardmet"]) multifactory->AddVariable( "mt_lephardmet", "MT(lep1,MET)", "GeV", 'F' ); if (mvaVar["mt_lepsoftmet"]) multifactory->AddVariable( "mt_lepsoftmet", "MT(lep2,MET)", "GeV", 'F' ); if (mvaVar["mthiggs"]) multifactory->AddVariable( "mthiggs", "MT(Higgs)", "GeV", 'F' ); if (mvaVar["dphi_lephardmet"]) multifactory->AddVariable( "dphi_lephardmet", "dphi(lep1,MET)", "GeV", 'F' ); if (mvaVar["dphi_lepsoftmet"]) multifactory->AddVariable( "dphi_lepsoftmet", "dphi(lep2,MET)", "GeV", 'F' ); if (mvaVar["lepsoft_fbrem"]) multifactory->AddVariable( "lepsoft_fbrem", "2nd lepton f_{brem}", "", 'F' ); if (mvaVar["lepsoft_eOverPIn"]) multifactory->AddVariable( "lepsoft_eOverPIn", "2nd lepton E/p", "", 'F' ); if (mvaVar["lepsoft_qdphi"]) multifactory->AddVariable( "lepsoft_q * lepsoft_dPhiIn", "2nd lepton q#times#Delta#phi", "", 'F' ); } */ // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables //factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); //factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // TTree* signalTrainingTree = (TTree*) chSignalTrain; // TTree* signalTestTree = (TTree*) chSignalTest; // // TTree* bkgTrainingTree = (TTree*) chBkgTrain; // TTree* bkgTestTree = (TTree*) chBkgTest; // std::cout << "--- TMVAClassification : Using bkg input files: -------------------" << std::endl; // // TObjArray *listOfBkgFiles = chbackground->GetListOfFiles(); // TIter bkgFileIter(listOfBkgFiles); // TChainElement* currentBkgFile = 0; // // while((currentBkgFile = (TChainElement*)bkgFileIter.Next())) { // std::cout << currentBkgFile->GetTitle() << std::endl; // } // // std::cout << "--- TMVAClassification : Using sig input files: -------------------" << std::endl; // // TObjArray *listOfSigFiles = chsignal->GetListOfFiles(); // TIter sigFileIter(listOfSigFiles); // TChainElement* currentSigFile = 0; // // while((currentSigFile = (TChainElement*)sigFileIter.Next())) { // std::cout << currentSigFile->GetTitle() << std::endl; // } // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // You can add an arbitrary number of signal or background trees // factory->AddSignalTree ( chSignal, signalWeight ); // factory->AddBackgroundTree( chBackground, backgroundWeight ); factory->AddTree(chSignal, "Signal", signalWeight, sel0+"mini_rand < 0.5", "train"); factory->AddTree(chSignal, "Signal", signalWeight, sel0+"mini_rand >= 0.5", "test"); factory->AddTree(chBackground, "Background", backgroundWeight, sel0+"mini_rand < 0.5", "train"); factory->AddTree(chBackground, "Background", backgroundWeight, sel0+"mini_rand >= 0.5", "test"); // To give different trees for training and testing, do as follows: //factory->AddSignalTree( signalTrainingTree, signalWeight, "Training" ); //factory->AddSignalTree( signalTestTree, signalWeight, "Test" ); //factory->AddBackgroundTree( bkgTrainingTree, backgroundWeight, "Training" ); //factory->AddBackgroundTree( bkgTestTree, backgroundWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // // // --- begin ---------------------------------------------------------- // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables // Float_t treevars[4], weight; // // // Signal // for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<signal->GetEntries(); i++) { // signal->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight ); // else factory->AddSignalTestEvent ( vars, signalWeight ); // } // // // Background (has event weights) // background->SetBranchAddress( "weight", &weight ); // for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<background->GetEntries(); i++) { // background->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight ); // else factory->AddBackgroundTestEvent ( vars, backgroundWeight*weight ); // } // // --- end ------------------------------------------------------------ // // --- end of tree registration // Set individual event weights (the variables must exist in the original TTree) factory->SetSignalWeightExpression ("mini_weight"); factory->SetBackgroundWeightExpression("mini_weight"); /* if( doMultipleOutputs ){ multifactory->AddTree(signal,"Signal"); multifactory->SetSignalWeightExpression ("event_scale1fb"); multifactory->SetBackgroundWeightExpression("event_scale1fb"); multifactory->SetWeightExpression("event_scale1fb"); if( includeBkg["ww"] ){ TTree* ww = (TTree*) chww; multifactory->AddTree(ww,"WW"); cout << "Added WW to multi-MVA" << endl; } if( includeBkg["wjets"] ){ TTree* wjets = (TTree*) chwjets; multifactory->AddTree(wjets,"WJets"); cout << "Added W+jets to multi-MVA" << endl; } if( includeBkg["tt"] ){ TTree* tt = (TTree*) chtt; multifactory->AddTree(tt,"tt"); cout << "Added ttbar multi-MVA" << endl; } } */ // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = sel0; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = sel0; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); //Use random splitting // factory->PrepareTrainingAndTestTree( mycuts, mycutb, // "nTrain_Signal=100000:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); factory->PrepareTrainingAndTestTree( "", "", "nTrain_Signal=0:nTrain_Background=0:NormMode=None:!V" ); // if( doMultipleOutputs ){ // multifactory->PrepareTrainingAndTestTree( mycuts, mycutb, // "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // } //Use alternate splitting //(this is preferable since its easier to track which events were used for training, but the job crashes! need to fix this...) //factory->PrepareTrainingAndTestTree( mycuts, mycutb, // "nTrain_Signal=0:nTrain_Background=0:SplitMode=Alternate:NormMode=NumEvents:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); // factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:VarTransform=N:NCycles=1000:HiddenLayers=N+N:TestRate=5:!UseRegulator:LearningRate=0.2:DecayRate=0.001:BPMode=batch:BatchSize=500"); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDT1"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT1", "!H:!V:NTrees=200:nEventsMin=300:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=4:PruneMethod=NoPruning" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // if( doMultipleOutputs ){ // if (Use["multi_BDTG"]) // gradient boosted decision trees // multifactory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.50:nCuts=20:NNodesMax=8"); // if (Use["multi_MLP"]) // neural network // multifactory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:NCycles=1000:HiddenLayers=N+5,5:TestRate=5:EstimatorType=MSE"); // if (Use["multi_FDA_GA"]) // functional discriminant with GA minimizer // multifactory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); // } // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","GA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // if( doMultipleOutputs ){ // // Train nulti-MVAs using the set of training events // multifactory->TrainAllMethods(); // // ---- Evaluate all multi-MVAs using the set of test events // multifactory->TestAllMethods(); // // ----- Evaluate and compare performance of all configured multi-MVAs // multifactory->EvaluateAllMethods(); // } // -------------------------------------------------------------- // Save the output outputFile->Close(); //if( doMultipleOutputs ) multioutputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVAMulticlass(){ TString outfileName = "TMVAMulticlass.root"; TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=multiclass" ); factory->AddVariable( "var0", 'F' ); factory->AddVariable( "var1", 'F' ); TFile *input(0); TString fname = "./data.root"; if (!gSystem->AccessPathName( fname )) { // first we try to find data.root in the local directory std::cout << "--- TMVAMulticlass : Accessing " << fname << std::endl; input = TFile::Open( fname ); } else { gROOT->LoadMacro( "./createData.C"); create_multiclassdata(20000); cout << " created data.root for tests of the multiclass features"<<endl; input = TFile::Open( fname ); } if (!input) { std::cout << "ERROR: could not open data file" << std::endl; exit(1); } TTree *tree = (TTree*)input->Get("TreeR"); gROOT->cd( outfileName+TString(":/") ); factory->AddTree ( tree, "Signal1", 1. , "cls==0" ); factory->AddTree ( tree, "Signal2", 1. , "cls==1" ); factory->AddTree ( tree, "Background", 1., "cls==2" ); factory->PrepareTrainingAndTestTree( "", "SplitMode=Random:NormMode=NumEvents:!V" ); factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5"); factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:VarTransform=N:NCycles=100:HiddenLayers=N+5,3:TestRate=5"); // testing vartransforms factory->BookMethod( TMVA::Types::kMLP, "MLP2", "!H:!V:NeuronType=tanh:NCycles=100:HiddenLayers=N+5,3:TestRate=5"); factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x0*x1:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
int TMVAKaggleHiggs ( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 0; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 0; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN Use["NN"] = 1; // improved implementation of a NN // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 1; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAKaggleHiggs" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return 1; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) TString fname = "./training.root"; if (gSystem->AccessPathName( fname )) // file does not exist in local directory gSystem->Exec("curl -O http://root.cern.ch/files/tmva_class_example.root"); TFile *input = TFile::Open( fname ); std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl; // --- Register the training and test trees TTree *tree = (TTree*)input->Get("data"); // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] // factory->AddVariable( "myvar1 := var1+var2", 'F' ); // factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ); // factory->AddVariable( "var3", "Variable 3", "units", 'F' ); // factory->AddVariable( "var4", "Variable 4", "units", 'F' ); TString limit ("-900.0"); TString replacementValue ("0.0"); std::vector<std::string> vars = {"DER_mass_MMC","DER_mass_transverse_met_lep","DER_mass_vis","DER_pt_h","DER_deltaeta_jet_jet","DER_mass_jet_jet","DER_prodeta_jet_jet","DER_deltar_tau_lep","DER_pt_tot","DER_sum_pt","DER_pt_ratio_lep_tau","DER_met_phi_centrality","DER_lep_eta_centrality","PRI_tau_pt","PRI_tau_eta","PRI_tau_phi","PRI_lep_pt","PRI_lep_eta","PRI_lep_phi","PRI_met","PRI_met_phi","PRI_met_sumet","PRI_jet_num","PRI_jet_leading_pt","PRI_jet_leading_eta","PRI_jet_leading_phi","PRI_jet_subleading_pt","PRI_jet_subleading_eta","PRI_jet_subleading_phi","PRI_jet_all_pt"}; for (std::vector<std::string>::iterator it = vars.begin (), itEnd = vars.end (); it != itEnd; ++it) { std::string s = *it; TString current; current.Form ("%s:=(%s<%s?%s:%s)",s.c_str (), s.c_str (), limit.Data (), replacementValue.Data (), s.c_str ()); factory->AddVariable (current, 'F'); } // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables // factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); // factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // global event weights per tree (see below for setting event-wise weights) Double_t weight = 1.0; // Double_t backgroundWeight = 1.0; // You can add an arbitrary number of signal or background trees // factory->AddBackgroundTree( background, backgroundWeight ); factory->AddTree(tree, "Signal", 1., "Label == 1"); factory->AddTree(tree, "Background", 1., "Label == 0"); // To give different trees for training and testing, do as follows: // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // // // --- begin ---------------------------------------------------------- // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables // Float_t treevars[4], weight; // // // Signal // for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<signal->GetEntries(); i++) { // signal->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight ); // else factory->AddSignalTestEvent ( vars, signalWeight ); // } // // // Background (has event weights) // background->SetBranchAddress( "weight", &weight ); // for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<background->GetEntries(); i++) { // background->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight ); // else factory->AddBackgroundTestEvent ( vars, backgroundWeight*weight ); // } // --- end ------------------------------------------------------------ // // --- end of tree registration // Set individual event weights (the variables must exist in the original TTree) // for signal : factory->SetSignalWeightExpression ("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); factory->SetSignalWeightExpression( "Weight" ); factory->SetBackgroundWeightExpression( "Weight" ); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts = "Label==1"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = "Label==0"; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=0:nTest_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // factory->PrepareTrainingAndTestTree( mycuts, mycutb, // "nTrain_Signal=5000:nTrain_Background=5000:nTest_Signal=5000:nTest_Background=5000:SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // improved neural network implementation if (Use["NN"]) { // TString layoutString ("Layout=TANH|(N+100)*2,LINEAR"); // TString layoutString ("Layout=SOFTSIGN|100,SOFTSIGN|50,SOFTSIGN|20,LINEAR"); // TString layoutString ("Layout=RELU|300,RELU|100,RELU|30,RELU|10,LINEAR"); // TString layoutString ("Layout=SOFTSIGN|50,SOFTSIGN|30,SOFTSIGN|20,SOFTSIGN|10,LINEAR"); // TString layoutString ("Layout=TANH|50,TANH|30,TANH|20,TANH|10,LINEAR"); // TString layoutString ("Layout=SOFTSIGN|50,SOFTSIGN|20,LINEAR"); TString layoutString ("Layout=SOFTSIGN|70,SOFTSIGN|30,LINEAR"); std::vector<TString> strategy; strategy.push_back (TString ("LearningRate=1e-2,Momentum=0.9,Repetitions=1,ConvergenceSteps=70,BatchSize=120,TestRepetitions=7,WeightDecay=0.001,Regularization=NONE,DropConfig=0.5+0.5+0.5+0.5,DropRepetitions=1,Multithreading=True")); strategy.push_back (TString ("LearningRate=1e-4,Momentum=0.5,Repetitions=1,ConvergenceSteps=70,BatchSize=80,TestRepetitions=7,WeightDecay=0.001,Regularization=L2,Multithreading=True,DropConfig=0.1+0.1+0.1+0.1,DropRepetitions=1")); strategy.push_back (TString ("LearningRate=1e-5,Momentum=0.3,Repetitions=1,ConvergenceSteps=70,BatchSize=60,TestRepetitions=7,WeightDecay=0.0001,Regularization=L2,Multithreading=True")); strategy.push_back (TString ("LearningRate=1e-6,Momentum=0.0,Repetitions=1,ConvergenceSteps=70,BatchSize=40,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True")); // strategy.push_back (TString ("LearningRate=1e-6,Momentum=0.0,Repetitions=1,ConvergenceSteps=50,BatchSize=30,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True")); TString trainingStrategyString ("TrainingStrategy="); for (std::vector<TString>::const_iterator it = strategy.begin (), itEnd = strategy.end (); it != itEnd; ++it) { if (it != strategy.begin ()) trainingStrategyString += "|"; trainingStrategyString += *it; } // TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CROSSENTROPY"); TString nnOptions ("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=G:WeightInitialization=XAVIERUNIFORM"); // TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CHECKGRADIENTS"); nnOptions.Append (":"); nnOptions.Append (layoutString); nnOptions.Append (":"); nnOptions.Append (trainingStrategyString); factory->BookMethod( TMVA::Types::kNN, "NN", nnOptions ); // NN } // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // ---- STILL EXPERIMENTAL and only implemented for BDT's ! // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","FitGA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVA::TMVAGui( outfileName ); return 0; }
void TMVARegression( int optimIndex, int Cat=0, TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVARegression.C\(\"myMethod1,myMethod2,myMethod3\"\) // //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDEFoam"] = 0; Use["KNN"] = 0; // // --- Linear Discriminant Analysis Use["LD"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; // // --- Neural Network Use["MLP"] = 0; // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 0; Use["BDTG"] = 1; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVARegression" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a new root output file TString outfileName( Form("TMVAoutput/TMVAReg_%i.root",optimIndex) ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( Form("TMVARegression_%i_Cat%i",optimIndex,Cat), outputFile, "!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=Regression" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory->AddVariable( "jet_eta", "jet_eta", "units", 'F' ); factory->AddVariable( "jet_emfrac", "jet_emfrac", "units", 'F' ); factory->AddVariable( "jet_hadfrac", "jet_hadfrac", "units", 'F' ); factory->AddVariable( "jet_nconstituents", "jet_nconst", "units", 'F' ); factory->AddVariable( "jet_vtx3dL", "jet_vtx3dL", "units", 'F' ); factory->AddVariable( "MET", "MET", "units", 'F' ); factory->AddVariable( "jet_dPhiMETJet", "jet_dPhiMETJet", "units", 'F' ); //factory->AddVariable( "hJet_vtxPt", "hJet_vtxPt", "units", 'F' ); //factory->AddVariable( "hJet_JECUnc", "hJet_JECUnc", "units", 'F' ); //factory->AddVariable( "hJet_ptLeadTrack", "hJet_ptLeadTrack", "units", 'F' ); //factory->AddVariable( "hJet_SoftLeptPtCut:=(hJet_SoftLeptIdlooseMu>0. || hJet_SoftLeptId95>0.) ? (hJet_SoftLeptPt) : (-99)", "hJet_SoftLeptPt", "units", 'F' ); //factory->AddVariable( "hJet_En", "hJet_En", "units", 'F' ); //factory->AddVariable( "hJet_Et", "hJet_Et", "units", 'F' ); //factory->AddVariable( "hJet_Mt", "hJet_Mt", "units", 'F' ); //factory->AddVariable( "hJet_nch", "hJet_nch", "units", 'F' ); //factory->AddVariable( "hJet_vtx3deL", "hJet_vtx3deL", "units", 'F' ); //factory->AddVariable( "hJet_vtxMass", "hJet_vtxMass", "units", 'F' ); //factory->AddVariable( "hJet_ptRaw", "hJet_ptRaw", "units", 'F' ); //factory->AddVariable( "hJet_EnRaw", "hJet_EnRaw", "units", 'F' ); //factory->AddVariable( "hJet_SoftLeptptRelCut:=(hJet_SoftLeptIdlooseMu>0. || hJet_SoftLeptId95>0.) ? (hJet_SoftLeptptRel) : (-99)", "hJet_SoftLeptptRel", "units", 'F' ); //factory->AddVariable( "hJet_SoftLeptdRCut:=(hJet_SoftLeptIdlooseMu>0. || hJet_SoftLeptId95>0.) ? (hJet_SoftLeptdR) : (-99)", "hJet_SoftLeptdR", "units", 'F' ); //factory->AddVariable( "rho25", "rho25", "units", 'F' ); //factory->AddVariable( "dPhiMETJet", "dPhiMETJet", "units", 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables // Add the variable carrying the regression target //factory->AddTarget( "jet_genPt" ); factory->AddTarget( "jet_genJetPt/jet_pt" ); // It is also possible to declare additional targets for multi-dimensional regression, ie: // -- factory->AddTarget( "fvalue2" ); // BUT: this is currently ONLY implemented for MLP // Read training and test data (see TMVAClassification for reading ASCII files) // load the signal and background event samples from ROOT trees /* TFile *input(0); TString fname = "./tmva_reg_example.root"; if (!gSystem->AccessPathName( fname )) input = TFile::Open( fname ); // check if file in local directory exists else input = TFile::Open( "http://root.cern.ch/files/tmva_reg_example.root" ); // if not: download from ROOT server if (!input) { std::cout << "ERROR: could not open data file" << std::endl; exit(1); } std::cout << "--- TMVARegression : Using input file: " << input->GetName() << std::endl; // --- Register the regression tree TTree *regTree = (TTree*)input->Get("TreeR"); */ TChain chainTraining("Events"); chainTraining.Add("TrainingFiles/training.root"); TTree *regTreeTraining = (TTree*) chainTraining; TChain chainTesting("Events"); chainTesting.Add("TrainingFiles/testing.root"); TTree *regTreeTesting = (TTree*) chainTesting; // global event weights per tree (see below for setting event-wise weights) Double_t regWeight = 1.0; // You can add an arbitrary number of regression trees factory->AddTree( regTreeTraining, "Regression", regWeight, "", "training" ); factory->AddTree( regTreeTesting, "Regression", regWeight, "", "test" ); // Apply additional cuts on the signal and background samples (can be different) TCut mycut = "hJet_pt[0]>20. && hJet_pt[1]>20. && fabs(hJet_eta[0])<2.5 && fabs(hJet_eta[1])<2.5 && hJet_csv[0]>0. && hJet_csv[1]>0. && hJet_ptLeadTrack[0]<1500. && hJet_ptLeadTrack[1]<1500. && hJet_genJetPt[0]>0. && hJet_genJetPt[1]>0. && hJet_puJetIdL[0]>0.0 && hJet_puJetIdL[1]>0.0"; TCut testingCut = "hJet_pt[0]>20. && hJet_pt[1]>20. && abs(hJet_eta[0])<2.5 && abs(hJet_eta[1])<2.5"; TCut mjjCut = "sqrt(pow(hJet_pt[0]*cos(hJet_phi[0])+hJet_pt[1]*cos(hJet_phi[1]),2)+pow(hJet_pt[0]*sin(hJet_phi[0])+hJet_pt[1]*sin(hJet_phi[1]),2)) < 110"; if(Cat==1) mjjCut = "sqrt(pow(hJet_pt[0]*cos(hJet_phi[0])+hJet_pt[1]*cos(hJet_phi[1]),2)+pow(hJet_pt[0]*sin(hJet_phi[0])+hJet_pt[1]*sin(hJet_phi[1]),2)) > 110"; TCut jetPtCut="jet_pt>90"; if(Cat==1) jetPtCut="jet_pt>90"; //TCut trainingCut = "hJet_pt[0]>20. && hJet_pt[1]>20. && abs(hJet_eta[0])<2.5 && abs(hJet_eta[1])<2.5 && hJet_genJetPt[0]>0. && hJet_genJetPt[1]>0. && hJet_csv[0]>0.0 && hJet_csv[1]>0.0 && hJet_ptLeadTrack[0]<1500. && hJet_ptLeadTrack[1]<1500."; TCut trainingCut = "jet_pt>20. && abs(jet_eta)<2.5 && jet_genJetPt>0. && jet_dRJetGenJet < 0.4 && (jet_partonID)==5"; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1"; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1"; // tell the factory to use all remaining events in the trees after training for testing: // factory->PrepareTrainingAndTestTree(mycut, "nTrain_Regression=600000:nTest_Regression=600000:SplitMode=Random:NormMode=NumEvents:!V"); //factory->PrepareTrainingAndTestTree(mycut, "nTrain_Regression=337500:nTest_Regression=337500:SplitMode=Random:NormMode=NumEvents:!V"); // factory->PrepareTrainingAndTestTree(mycut, "nTrain_Regression=158393:nTest_Regression=158393:SplitMode=Random:NormMode=NumEvents:!V"); //factory->PrepareTrainingAndTestTree(mycut, "nTrain_Regression=14197:nTest_Regression=14197:SplitMode=Random:NormMode=NumEvents:!V"); factory->PrepareTrainingAndTestTree(trainingCut+jetPtCut, "!V"); // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // ---- Book MVA methods // // please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // PDE - RS method if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" ); // And the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // Linear discriminant if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "!H:!V:VarTransform=None" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); // Neural network (MLP) if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" ); // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); //100, 5 // Boosted Decision Tree //100,5 nCuts=-1 if (Use["BDT"]) factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:nEventsMin=4:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=-1:PruneMethod=CostComplexity:PruneStrength=30" ); const bool doScan1=0; int ntreesArray[8] = {100,200,300,400,500,600,700,800}; float shrinkageArray[3] = {0.1,0.2,0.3}; float gradbaggingfracArray[3] = {0.7,0.8,0.9}; int maxdepthArray[3] = {2,3,4}; int nnodesmaxArray[3] = {5,10,15}; if(!doScan1) shrinkageArray[2]=1.0; int nnodesmaxIndex=-1,maxdepthIndex=-1,gradbaggingfracIndex=-1,shrinkageIndex=-1,ntreesIndex=-1; if(doScan1){ nnodesmaxIndex = optimIndex/216; maxdepthIndex = (optimIndex-nnodesmaxIndex*216)/72; gradbaggingfracIndex = (optimIndex-nnodesmaxIndex*216-maxdepthIndex*72)/24; shrinkageIndex = (optimIndex-nnodesmaxIndex*216-maxdepthIndex*72-gradbaggingfracIndex*24)/8; ntreesIndex = (optimIndex-nnodesmaxIndex*216-maxdepthIndex*72-gradbaggingfracIndex*24-shrinkageIndex*8); } else{ nnodesmaxIndex = optimIndex/72; maxdepthIndex = (optimIndex-nnodesmaxIndex*72)/24; shrinkageIndex = (optimIndex-nnodesmaxIndex*72-maxdepthIndex*24)/8; ntreesIndex = (optimIndex-nnodesmaxIndex*72-maxdepthIndex*24-shrinkageIndex*8); } if (Use["BDTG"]){ if(doScan1) factory->BookMethod( TMVA::Types::kBDT, "BDTG", Form("!H:!V:NTrees=%i::BoostType=Grad:Shrinkage=%.1f:UseBaggedGrad:GradBaggingFraction=%.1f:nCuts=200:MaxDepth=%i:NNodesMax=%i",ntreesArray[ntreesIndex],shrinkageArray[shrinkageIndex],gradbaggingfracArray[gradbaggingfracIndex],maxdepthArray[maxdepthIndex],nnodesmaxArray[nnodesmaxIndex]) ); else factory->BookMethod( TMVA::Types::kBDT, "BDTG", Form("!H:!V:IgnoreNegWeights:NTrees=%i::BoostType=Grad:Shrinkage=%.1f:nCuts=200:MaxDepth=%i:NNodesMax=%i",ntreesArray[ntreesIndex],shrinkageArray[shrinkageIndex],maxdepthArray[maxdepthIndex],nnodesmaxArray[nnodesmaxIndex]) ); } if(doScan1) cout << Form("!H:!V:NTrees=%i::BoostType=Grad:Shrinkage=%.2f:UseBaggedGrad:GradBaggingFraction=%.2f:nCuts=200:MaxDepth=%i:NNodesMax=%i",ntreesArray[ntreesIndex],shrinkageArray[shrinkageIndex],gradbaggingfracArray[gradbaggingfracIndex],maxdepthArray[maxdepthIndex],nnodesmaxArray[nnodesmaxIndex])<<endl; else cout << Form("!H:!V:IgnoreNegWeights:NTrees=%i::BoostType=Grad:Shrinkage=%.1f:nCuts=200:MaxDepth=%i:NNodesMax=%i",ntreesArray[ntreesIndex],shrinkageArray[shrinkageIndex],maxdepthArray[maxdepthIndex],nnodesmaxArray[nnodesmaxIndex]) << endl; // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVARegression is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVARegGui( outfileName ); }
int main(int argc, char * argv[]) { //Processing input options int c; std::string outFname; outFname = std::string("QualityNaF.root"); // Open input files, get the trees TChain *mc = InputFileReader("FileListNtuples_ext.txt","parametri_geo"); // Preparing options for the TMVA::Factory std::string options( "!V:" "!Silent:" "Color:" "DrawProgressBar:" "Transformations=I;D;P;G,D:" "AnalysisType=Classification" ); //Creating the factory TFile * ldFile = new TFile(outFname.c_str(),"RECREATE"); TMVA::Factory * factory = new TMVA::Factory("QualityNaF", ldFile, options.c_str()); //Preparing variables //general /*factory->AddVariable("Chisquare", 'F'); factory->AddVariable("Layernonusati", 'I'); factory->AddVariable("NTofUsed", 'I'); factory->AddVariable("diffR", 'F'); factory->AddVariable("TOF_Up_Down", 'F');*/ //Tof //factory->AddVariable("TOFchisq_s", 'F'); //factory->AddVariable("TOFchisq_t", 'F'); //RICH factory->AddVariable("Richtotused", 'F'); factory->AddVariable("RichPhEl", 'F'); factory->AddVariable("RICHprob", 'F'); factory->AddVariable("RICHcollovertotal"); factory->AddVariable("RICHLipBetaConsistency"); factory->AddVariable("RICHTOFBetaConsistency"); factory->AddVariable("RICHChargeConsistency"); factory->AddVariable("RICHPmts"); factory->AddVariable("RICHgetExpected"); factory->AddVariable("tot_hyp_p_uncorr"); factory->AddVariable("Bad_ClusteringRICH"); factory->AddVariable("NSecondariesRICHrich"); //factory->AddVariable("HitHValldir"); //factory->AddVariable("HitHVallrefl"); //factory->AddVariable("HVBranchCheck:= (HitHValldir - HitHVoutdir) - (HitHVallrefl - HitHVoutrefl)"); factory->AddVariable("HitHVoutdir"); factory->AddVariable("HitHVoutrefl"); //Spectator Variables factory->AddSpectator("R", 'F'); factory->AddSpectator("BetaRICH_new", 'F'); //Preselection cuts std::string PreSelection = "qL1>0&&(joinCutmask&187)==187&&qL1<1.75&&R>0"; std::string ChargeCut = "qUtof>0.8&&qUtof<1.3&&qLtof>0.8&&qLtof<1.3"; std::string VelocityCut = /*"Beta<0.8";*/"((joinCutmask>>11))==1024&&BetaRICH_new>0&&BetaRICH_new<0.975"; std::string signalCut = /*"(R/Beta)*(1-Beta^2)^0.5>1.65&&GenMass>1&&GenMass<2";*/"(R/BetaRICH_new)*(1-BetaRICH_new^2)^0.5>0.5&&(R/BetaRICH_new)*(1-BetaRICH_new^2)^0.5<1.5"; std::string bkgndCut = /*"(R/Beta)*(1-Beta^2)^0.5>1.65&&GenMass>0&&GenMass<1";*/"(R/BetaRICH_new)*(1-BetaRICH_new^2)^0.5>3"; factory->AddTree(mc,"Signal" ,1,(PreSelection +"&&"+ ChargeCut + "&&" + VelocityCut + "&&"+ signalCut).c_str()); factory->AddTree(mc,"Background",1,(PreSelection +"&&"+ ChargeCut + "&&" + VelocityCut + "&&"+ bkgndCut).c_str()); // Preparing std::string preselection = ""; std::string inputparams( "SplitMode=Random:" "NormMode=NumEvents:" "!V" ); factory->PrepareTrainingAndTestTree(preselection.c_str(),inputparams.c_str()); // Training std::string trainparams ="!H:!V:MaxDepth=3"; factory->BookMethod(TMVA::Types::kBDT, "BDT", trainparams.c_str()); trainparams ="!H:!V"; factory->BookMethod(TMVA::Types::kLikelihood, "Likelihood", trainparams.c_str()); trainparams ="!H:!V:VarTransform=Decorrelate"; //factory->BookMethod(TMVA::Types::kLikelihood, "LikelihoodD", trainparams.c_str()); trainparams ="!H:!V"; //factory->BookMethod(TMVA::Types::kCuts, "Cuts", trainparams.c_str()); factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); }
void TMVAMulticlass( TString myMethodList = "" ) { // This loads the library TMVA::Tools::Instance(); // to get access to the GUI and all tmva macros TString tmva_dir(TString(gRootDir) + "/tmva"); if(gSystem->Getenv("TMVASYS")) tmva_dir = TString(gSystem->Getenv("TMVASYS")); gROOT->SetMacroPath(tmva_dir + "/test/:" + gROOT->GetMacroPath() ); gROOT->ProcessLine(".L TMVAMultiClassGui.C"); //--------------------------------------------------------------- // default MVA methods to be trained + tested std::map<std::string,int> Use; Use["MLP"] = 1; Use["BDTG"] = 1; Use["FDA_GA"] = 0; Use["PDEFoam"] = 0; //--------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAMulticlass" << std::endl; if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // Create a new root output file. TString outfileName = "TMVAMulticlass.root"; TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "TMVAMulticlass", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=multiclass" ); factory->AddVariable( "var1", 'F' ); factory->AddVariable( "var2", "Variable 2", "", 'F' ); factory->AddVariable( "var3", "Variable 3", "units", 'F' ); factory->AddVariable( "var4", "Variable 4", "units", 'F' ); TFile *input(0); TString fname = "./tmva_example_multiple_background.root"; if (!gSystem->AccessPathName( fname )) { // first we try to find the file in the local directory std::cout << "--- TMVAMulticlass : Accessing " << fname << std::endl; input = TFile::Open( fname ); } else { cout << "Creating testdata...." << std::endl; gROOT->ProcessLine(".L createData.C+"); gROOT->ProcessLine("create_MultipleBackground(2000)"); cout << " created tmva_example_multiple_background.root for tests of the multiclass features"<<endl; input = TFile::Open( fname ); } if (!input) { std::cout << "ERROR: could not open data file" << std::endl; exit(1); } TTree *signal = (TTree*)input->Get("TreeS"); TTree *background0 = (TTree*)input->Get("TreeB0"); TTree *background1 = (TTree*)input->Get("TreeB1"); TTree *background2 = (TTree*)input->Get("TreeB2"); gROOT->cd( outfileName+TString(":/") ); factory->AddTree (signal,"Signal"); factory->AddTree (background0,"bg0"); factory->AddTree (background1,"bg1"); factory->AddTree (background2,"bg2"); factory->PrepareTrainingAndTestTree( "", "SplitMode=Random:NormMode=NumEvents:!V" ); if (Use["BDTG"]) // gradient boosted decision trees factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.50:nCuts=20:MaxDepth=2"); if (Use["MLP"]) // neural network factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:NCycles=1000:HiddenLayers=N+5,5:TestRate=5:EstimatorType=MSE"); if (Use["FDA_GA"]) // functional discriminant with GA minimizer factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["PDEFoam"]) // PDE-Foam approach factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAMultiClassGui( outfileName ); }
std::pair<TString,TString> TMVAClassification ( TString infilename, AnalysisType analysisType = AnalysisType::DIRECT, TString additionalRootFileName = "") { TMVA::Tools::Instance(); std::string tmstr (now ()); TString tmstmp (tmstr.c_str ()); std::cout << "==> Start TMVAClassification" << std::endl; std::cout << "-------------------- open input file ---------------- " << std::endl; TString fname = infilename; //pathToData + infilename + TString (".root"); if (analysisType != AnalysisType::TRANSFORMED) fname = pathToData + infilename + TString (".root"); std::cout << "open file " << std::endl << fname.Data () << std::endl; std::cout << "-------------------- get tree ---------------- " << std::endl; TString treeName = "data"; if (analysisType == AnalysisType::TRANSFORMED) treeName = "transformed"; std::cout << "-------------------- create tchain with treeName ---------------- " << std::endl; std::cout << treeName << std::endl; TChain* tree = new TChain (treeName); std::cout << "add file" << std::endl; std::cout << fname << std::endl; tree->Add (fname); TChain* treeFriend (NULL); if (additionalRootFileName.Length () > 0) { std::cout << "-------------------- add additional input file ---------------- " << std::endl; std::cout << additionalRootFileName << std::endl; treeFriend = new TChain (treeName); treeFriend->Add (additionalRootFileName); tree->AddFriend (treeFriend,"p"); } // tree->Draw ("mass:prediction"); // return std::make_pair(TString("hallo"),TString ("nix")); TString outfileName; if (analysisType == AnalysisType::BACKGROUND) { outfileName = TString ("BACK_" + infilename) + tmstmp + TString (".root"); } else outfileName += TString ( "TMVA__" ) + tmstmp + TString (".root"); std::cout << "-------------------- open output file ---------------- " << std::endl; TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); std::cout << "-------------------- prepare factory ---------------- " << std::endl; TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "AnalysisType=Classification:Transformations=I:!V" ); std::cout << "-------------------- add variables ---------------- " << std::endl; for (auto varname : variableNames) { factory->AddVariable (varname.c_str (), 'F'); } for (auto varname : spectatorNames) { factory->AddSpectator (varname.c_str (), 'F'); } std::cout << "-------------------- add trees ---------------- " << std::endl; TCut signalCut ("signal==1"); TCut backgroundCut ("signal==0"); if (analysisType == AnalysisType::TRANSFORMED) { signalCut = "(signal_original==1 && signal_in==0)"; backgroundCut = "(signal_original==0 && signal_in==0)"; } if (analysisType == AnalysisType::BACKGROUND) { signalCut = TString("(signal==0) * (prediction > 0.7)"); backgroundCut = TString("(signal==0) * (prediction < 0.4)"); } //tree->Draw ("prediction",signalCut); //return std::make_pair(TString("hallo"),TString ("nix")); factory->AddTree(tree, "Signal", 1.0, baseCut + signalCut, "TrainingTesting"); factory->AddTree(tree, "Background", 1.0, baseCut + backgroundCut, "TrainingTesting"); TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5"; /* // Set individual event weights (the variables must exist in the original TTree) */ if (analysisType == AnalysisType::BACKGROUND) { factory->SetSignalWeightExpression ("prediction"); factory->SetBackgroundWeightExpression ("1"); } std::cout << "-------------------- prepare ---------------- " << std::endl; factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=0:nTest_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); TString methodName (""); if (analysisType == AnalysisType::BACKGROUND) methodName = TString ("TONBKG_") + tmstmp; if (false) { // gradient boosting training methodName += TString("GBDT"); factory->BookMethod(TMVA::Types::kBDT, methodName, "NTrees=40:BoostType=Grad:Shrinkage=0.01:MaxDepth=7:UseNvars=6:nCuts=20:MinNodeSize=10"); } if (false) { methodName += TString("Likelihood"); factory->BookMethod( TMVA::Types::kLikelihood, methodName, "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); } if (false) { TString layoutString ("Layout=TANH|100,LINEAR"); TString training0 ("LearningRate=1e-1,Momentum=0.0,Repetitions=1,ConvergenceSteps=300,BatchSize=20,TestRepetitions=15,WeightDecay=0.001,Regularization=NONE,DropConfig=0.0+0.5+0.5+0.5,DropRepetitions=1,Multithreading=True"); TString training1 ("LearningRate=1e-2,Momentum=0.5,Repetitions=1,ConvergenceSteps=300,BatchSize=30,TestRepetitions=7,WeightDecay=0.001,Regularization=L2,Multithreading=True,DropConfig=0.0+0.1+0.1+0.1,DropRepetitions=1"); TString training2 ("LearningRate=1e-2,Momentum=0.3,Repetitions=1,ConvergenceSteps=300,BatchSize=40,TestRepetitions=7,WeightDecay=0.0001,Regularization=L2,Multithreading=True"); TString training3 ("LearningRate=1e-3,Momentum=0.1,Repetitions=1,ConvergenceSteps=200,BatchSize=70,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True"); TString trainingStrategyString ("TrainingStrategy="); trainingStrategyString += training0 + "|" + training1 + "|" + training2 + "|" + training3; TString nnOptions ("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=G:WeightInitialization=XAVIERUNIFORM"); nnOptions.Append (":"); nnOptions.Append (layoutString); nnOptions.Append (":"); nnOptions.Append (trainingStrategyString); methodName += TString("NNgauss"); factory->BookMethod( TMVA::Types::kNN, methodName, nnOptions ); // NN } if (false) { TString layoutString ("Layout=TANH|200,TANH|70,LINEAR"); TString training0 ("LearningRate=1e-2,Momentum=0.0,Repetitions=1,ConvergenceSteps=300,BatchSize=20,TestRepetitions=15,WeightDecay=0.001,Regularization=NONE,DropConfig=0.0+0.5+0.5+0.5,DropRepetitions=1,Multithreading=True"); TString training1 ("LearningRate=1e-3,Momentum=0.5,Repetitions=1,ConvergenceSteps=300,BatchSize=30,TestRepetitions=7,WeightDecay=0.001,Regularization=L2,Multithreading=True,DropConfig=0.0+0.1+0.1+0.1,DropRepetitions=1"); TString training2 ("LearningRate=1e-4,Momentum=0.3,Repetitions=1,ConvergenceSteps=300,BatchSize=40,TestRepetitions=7,WeightDecay=0.0001,Regularization=L2,Multithreading=True"); TString training3 ("LearningRate=1e-5,Momentum=0.1,Repetitions=1,ConvergenceSteps=200,BatchSize=70,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True"); TString trainingStrategyString ("TrainingStrategy="); trainingStrategyString += training0 + "|" + training1 + "|" + training2 + "|" + training3; // trainingStrategyString += training0 + "|" + training2 + "|" + training3; // trainingStrategyString += training0 + "|" + training2; // TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CROSSENTROPY"); TString nnOptions ("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=N:WeightInitialization=XAVIERUNIFORM"); // TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CHECKGRADIENTS"); nnOptions.Append (":"); nnOptions.Append (layoutString); nnOptions.Append (":"); nnOptions.Append (trainingStrategyString); methodName = TString("NNnormalized"); factory->BookMethod( TMVA::Types::kNN, methodName, nnOptions ); // NN } if (true) { TString layoutString ("Layout=TANH|100,TANH|50,LINEAR"); TString training0 ("LearningRate=1e-2,Momentum=0.0,Repetitions=1,ConvergenceSteps=100,BatchSize=20,TestRepetitions=7,WeightDecay=0.001,Regularization=NONE,DropConfig=0.0+0.5+0.5+0.5,DropRepetitions=1,Multithreading=True"); TString training1 ("LearningRate=1e-3,Momentum=0.0,Repetitions=1,ConvergenceSteps=20,BatchSize=30,TestRepetitions=7,WeightDecay=0.001,Regularization=L2,Multithreading=True,DropConfig=0.0+0.1+0.1+0.1,DropRepetitions=1"); TString training2 ("LearningRate=1e-4,Momentum=0.0,Repetitions=1,ConvergenceSteps=20,BatchSize=40,TestRepetitions=7,WeightDecay=0.0001,Regularization=L2,Multithreading=True"); TString training3 ("LearningRate=1e-5,Momentum=0.0,Repetitions=1,ConvergenceSteps=30,BatchSize=70,TestRepetitions=7,WeightDecay=0.0001,Regularization=NONE,Multithreading=True"); TString trainingStrategyString ("TrainingStrategy="); trainingStrategyString += training0 + "|" + training1 + "|" + training2 + "|" + training3; TString nnOptions ("!H:!V:ErrorStrategy=CROSSENTROPY:VarTransform=P+G:WeightInitialization=XAVIERUNIFORM"); nnOptions.Append (":"); nnOptions.Append (layoutString); nnOptions.Append (":"); nnOptions.Append (trainingStrategyString); methodName += TString("NNPG"); factory->BookMethod( TMVA::Types::kNN, methodName, nnOptions ); // NN } factory->TrainAllMethods(); // return std::make_pair(TString("hallo"),TString ("nix")); factory->TestAllMethods(); factory->EvaluateAllMethods(); //input->Close(); outputFile->Close(); // TMVA::TMVAGui (outfileName); delete factory; delete tree; switch (analysisType) { case AnalysisType::BACKGROUND: std::cout << "DONE BACKGROUND" << std::endl; break; case AnalysisType::DIRECT: std::cout << "DONE DIRECT" << std::endl; break; case AnalysisType::TRANSFORMED: std::cout << "DONE TRANSFORMED" << std::endl; break; }; std::cout << "classification, return : " << outfileName << " , " << methodName << std::endl; return std::make_pair (outfileName, methodName); }