void trainBDT(void) { // Open input file and get tree TFile *infile = new TFile("l3bdt.root"); TTree *l3tree = (TTree*)infile->Get("l3tree"); if(l3tree == NULL){ cout << "Couldn't open \"l3bdt.root\"!" << endl; return; } // Open output root file (for TMVA) TFile *outfile = new TFile("l3BDT_out.root", "RECREATE"); TMVA::Factory *fac = new TMVA::Factory("L3",outfile,""); // Specify input tree that contains both signal and background TCut signalCut("is_good==1"); TCut backgroundCut("is_good==0"); fac->SetInputTrees(l3tree, signalCut, backgroundCut); // Add variables fac->AddVariable("Nstart_counter", 'I'); fac->AddVariable("Ntof", 'I'); fac->AddVariable("Nbcal_points", 'I'); fac->AddVariable("Nbcal_clusters", 'I'); fac->AddVariable("Ebcal_points", 'F'); fac->AddVariable("Ebcal_clusters", 'F'); fac->AddVariable("Nfcal_clusters", 'I'); fac->AddVariable("Efcal_clusters", 'F'); fac->AddVariable("Ntrack_candidates", 'I'); fac->AddVariable("Ptot_candidates", 'F'); TCut preSelectCut(""); fac->PrepareTrainingAndTestTree(preSelectCut,""); fac->BookMethod(TMVA::Types::kBDT, "BDT", ""); fac->TrainAllMethods(); fac->TestAllMethods(); fac->EvaluateAllMethods(); delete fac; outfile->Close(); delete outfile; }
void trainBJetIdMVA(TString SELECTION) { // the training is done using a dedicated tree format TFile *src = TFile::Open("bjetId_"+SELECTION+".root"); TTree *tr = (TTree*)src->Get("jets"); TFile *outf = new TFile("bjetId_"+SELECTION+"_MVA.root","RECREATE"); TCut signalCut = "abs(partonId) == 5"; TCut bkgCut = "abs(partonId) != 5"; TCut preselectionCut = "btagIdx<4 && etaIdx<4 && etaIdx>-1 && ptIdx<4"; int N = 100000; cout<<"NUMBER OF TRAINING EVENTS = "<<N<<endl; TMVA::Factory* factory = new TMVA::Factory("factory_"+SELECTION+"_",outf,"!V:!Silent:Color:DrawProgressBar:Transformations=I;G:AnalysisType=Classification" ); factory->SetInputTrees(tr,signalCut,bkgCut); factory->AddVariable("btagIdx",'I'); factory->AddVariable("etaIdx" ,'I'); factory->AddVariable("btag" ,'F'); factory->AddVariable("eta" ,'F'); char name[1000]; sprintf(name,"nTrain_Signal=%d:nTrain_Background=%d:nTest_Signal=%d:nTest_Background=%d",N,N,N,N); factory->PrepareTrainingAndTestTree(preselectionCut,name); // specify the training methods factory->BookMethod(TMVA::Types::kLikelihood,"Likelihood"); //factory->BookMethod(TMVA::Types::kBDT,"BDT_DEF"); //factory->BookMethod(TMVA::Types::kBDT,"BDT_ADA","NTrees=600:AdaBoostBeta=0.1:nCuts=35"); //factory->BookMethod(TMVA::Types::kBDT,"BDT_GRAD1","NTrees=600:nCuts=40:BoostType=Grad:Shrinkage=0.5"); factory->BookMethod(TMVA::Types::kBDT,"BDT_GRAD2","NTrees=600:nCuts=25:BoostType=Grad:Shrinkage=0.2"); factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); }
//void TMVAClassification( TString myMethodList = "" ) void Example_Eric( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) // this loads the library TMVA::Tools::Instance(); //--------------------------------------------------------------- // default MVA methods to be trained + tested std::map<std::string,int> Use; Use["Cuts"] =0; Use["CutsD"] =0; Use["CutsPCA"] =0; Use["CutsGA"] =0; Use["CutsSA"] =0; // --- Use["Likelihood"] =0; Use["LikelihoodD"] =0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] =1; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] =0; Use["LikelihoodMIX"] =0; // --- Use["PDERS"] =0; Use["PDERSD"] =0; Use["PDERSPCA"] =0; Use["PDERSkNN"] =0; // depreciated until further notice Use["PDEFoam"] =0; // -- Use["KNN"] =0; // --- Use["HMatrix"] =0; Use["Fisher"] =0; Use["FisherG"] =0; Use["BoostedFisher"] =0; Use["LD"] =0; // --- Use["FDA_GA"] =0; Use["FDA_SA"] =0; Use["FDA_MC"] =0; Use["FDA_MT"] =0; Use["FDA_GAMT"] =0; Use["FDA_MCMT"] =0; // --- Use["MLP"] = 1; // this is the recommended ANN Use["MLPBFGS"] = 0; // recommended ANN with optional training method Use["CFMlpANN"] =0; // *** missing Use["TMlpANN"] =0; // --- Use["SVM"] =1; // --- Use["BDT"] =1; Use["BDTD"] =0; Use["BDTG"] =0; Use["BDTB"] =0; // --- Use["RuleFit"] =1; // --- Use["Plugin"] =0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // Create a new root output file. TString outfileName( "TMVA_Eric2.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] // factory->AddVariable( "myvar1 := var1+var2", 'F' ); // factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ); // factory->AddVariable( "var3", "Variable 3", "units", 'F' ); // factory->AddVariable( "var4", "Variable 4", "units", 'F' ); factory->AddVariable( "Mqq := Mqq", 'F' ); factory->AddVariable( "Pt_qq := Pt_qq", 'F' ); factory->AddVariable( "Eta_qq := Eta_qq", 'F' ); factory->AddVariable( "Charge_qq := Charge_qq", 'F' ); factory->AddVariable( "DPhi_ll := DPhi_ll", 'F' ); factory->AddVariable( "DPt_ll := DPt_ll", 'F' ); //factory->AddVariable( "MinDPhi_lMET := MinDPhi_lMET", 'F' ); //factory->AddVariable( "Aplanarity := aplanarity", 'F' ); //factory->AddVariable( "chargeEta := chargeEta", 'F' ); //factory->AddVariable( "MET := Met", 'F' ); //factory->AddVariable( "MtauJet := MtauJet", 'F' ); //factory->AddVariable( "HT := Ht", 'F' ); //factory->AddVariable( "Chi2 := kinFitChi2", 'F' ); //factory->AddVariable( "DeltaPhiTauMET := DeltaPhiTauMet", 'F' ); //factory->AddVariable( "Mt := Mt", 'F' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables // factory->AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ); // factory->AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ); // read training and test data if (ReadDataFromAsciiIFormat) { // load the signal and background event samples from ascii files // format in file must be: // var1/F:var2/F:var3/F:var4/F // 0.04551 0.59923 0.32400 -0.19170 // ... TString datFileS = "tmva_example_sig.dat"; TString datFileB = "tmva_example_bkg.dat"; factory->SetInputTrees( datFileS, datFileB ); } else { //TFile* f0 = new TFile("/opt/sbg/data/data1/cms/lebihan/clean_january_2012_2/CMSSW_4_2_8_patch7/src/MiniTreeAnalysis/NTupleAnalysis/macros/TopTauJets/TMVA_sig_newLumi.root"); //TFile* f1 = new TFile("/opt/sbg/data/data1/cms/lebihan/clean_january_2012_2/CMSSW_4_2_8_patch7/src/MiniTreeAnalysis/NTupleAnalysis/macros/TopTauJets/TMVA_bkg_newLumi.root"); TFile* f0 = TFile::Open("/opt/sbg/data/data1/cms/echabert/ttbarMET/ProdAlexMars13/CMSSW_5_3_2_patch4/src/NTuple/NTupleAnalysis/macros/TTbarMET/backup_outputProof10-04-13_16-00-57/proof_ttW.root"); TFile* f1 = TFile::Open("/opt/sbg/data/data1/cms/echabert/ttbarMET/ProdAlexMars13/CMSSW_5_3_2_patch4/src/NTuple/NTupleAnalysis/macros/TTbarMET/backup_outputProof10-04-13_16-00-57/proof_tt-dilepton.root"); TTree *signal = (TTree*)f0->Get("theTree2"); TTree *background = (TTree*)f1->Get("theTree2"); cout<<"trees: "<<signal<<" "<<background<<endl; //Double_t backgroundWeight = 1.0; //Double_t signalWeight = 1.0; Double_t signalWeight = 0.30*20/185338; Double_t backgroundWeight = 222.*0.1*20/9982625; // ====== register trees ==================================================== // // the following method is the prefered one: // you can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); // factory->AddSignalTree ( signal ); //factory->AddBackgroundTree( background ); // To give different trees for training and testing, do as follows: // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // // // --- begin ---------------------------------------------------------- // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables // Float_t treevars[4]; // for (Int_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (Int_t i=0; i<signal->GetEntries(); i++) { // signal->GetEntry(i); // for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < signal->GetEntries()/2) factory->AddSignalTrainingEvent( vars, signalWeight ); // else factory->AddSignalTestEvent ( vars, signalWeight ); // } // // for (Int_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (Int_t i=0; i<background->GetEntries(); i++) { // background->GetEntry(i); // for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight ); // else factory->AddBackgroundTestEvent ( vars, backgroundWeight ); // } // // --- end ------------------------------------------------------------ // // ====== end of register trees ============================================== } // This would set individual event weights (the variables defined in the // expression need to exist in the original TTree) // for signal : factory->SetSignalWeightExpression("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); //factory->SetBackgroundWeightExpression("weight_BTAG"); //factory->SetSignalWeightExpression("weight*weight_BTAG"); // Apply additional cuts on the signal and background samples (can be different) // TCut mycuts = "MHt >=0 && MMTauJet >=0 && MM3 >= 0"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; // TCut mycutb = "MHt >=0 && MMTauJet >=0 && MM3 >= 0"; // for example: TCut mycutb = "abs(var1)<0.5"; //TCut mycuts = "Met>=20 "; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; //TCut mycutb = "Met>=20 "; // for example: TCut mycutb = "abs(var1)<0.5"; TCut mycuts; TCut mycutb; // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=3000:nTrain_Background=5000:SplitMode=Random:NormMode=NumEvents:!V" ); // If no numbers of events are given, half of the events in the tree are used for training, and // the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); // ---- Book MVA methods // // please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // test the decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // test the new kernel density estimator if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // test the mixed splines and kernel density estimator (depending on which variable) if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSkNN"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSkNN", "!H:!V:VolumeRangeMode=kNN:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:CutNmin=T:Nmin=100:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" ); // Fisher discriminant if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=60:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2"); // Linear discriminant (same as Fisher) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS" ); // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // -------------------------------------------------------------------------------------------------- // As an example how to use the ROOT plugin mechanism, book BDT via // plugin mechanism if (Use["Plugin"]) { // // first the plugin has to be defined, which can happen either through the following line in the local or global .rootrc: // // # plugin handler plugin name(regexp) class to be instanciated library constructor format // Plugin.TMVA@@MethodBase: ^BDT TMVA::MethodBDT TMVA.1 "MethodBDT(TString,TString,DataSet&,TString)" // // or by telling the global plugin manager directly gPluginMgr->AddHandler("TMVA@@MethodBase", "BDT", "TMVA::MethodBDT", "TMVA.1", "MethodBDT(TString,TString,DataSet&,TString)"); factory->BookMethod( TMVA::Types::kPlugins, "BDT", "!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=50" ); } // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethodsForClassification(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void mvaDonut(TString Type = "Dl", int iChannel = 1, TString Sample = "Sig") { TString Channels[] = {"D0","Ds0","Dp","Dsp"}; TString fname = "mva"; if(Sample=="Dss") fname += Sample; fname += Type; fname += Channels[iChannel-1]; TString outfileName = fname; outfileName += ".root"; TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); int isDss = 0; if(Sample=="Dss") isDss=1; TMVA::Factory *factory = new TMVA::Factory( fname, outputFile, Form("!V:!Silent:%sColor", gROOT->IsBatch()?"!":"") ); TChain c("ntp1"); c.Add("~/releases/ntuplePID50/workdir/AWG82/ntuples/small/Add_R24MVA_RunAll.root"); TString sigCuts[] = {"(MCType==1||MCType==3||MCType==5)", "(MCType==2||MCType==4||MCType==6)", "(MCType==7||MCType==9||MCType==11)", "(MCType==8||MCType==10||MCType==12)", "MCType>12"}; TString bkgCuts[2][2] = {{"MCType>6", "(MCType>0&&MCType<7||MCType>12)"}, {"MCType>0&&MCType<13","MCType>0&&MCType<13"}}; TString sigStr = "candLepTru==1&&"; if(isDss) sigStr += "pmisspi0"; else sigStr += "candPMiss"; sigStr += ">0.2&&candType=="; sigStr += iChannel; sigStr += "&&"; if(isDss) sigStr += sigCuts[4]; else sigStr += sigCuts[iChannel-1]; TString bkgStr = "candType=="; bkgStr += iChannel; bkgStr += "&&"; if(isDss) bkgStr += "pmisspi0"; else bkgStr += "candPMiss"; bkgStr += ">0.2&&"; if(Type=="Dl") bkgStr += bkgCuts[isDss][(iChannel-1)/2]; else bkgStr += "MCType==0"; TCut sigCut = "1", bkgCut = "1", mycuts = "", mycutb = ""; sigCut += sigStr; bkgCut += bkgStr; // --- Base --- // int nSig = 9, nDpi0 = 10; // TString sigVari[] = {"candEExtra","candMES","candDmass","candDeltam","candTagChargedMult","candBTagDeltam", // "candBTagDmass","candDeltaE","candCosT"}; // TString Dpi0Vari[] = {"mpi0","candDmass","dmpi0","eextrapi0","ppi0","e1pi0","candCosT","candDeltam", // "candMES","candDeltaE"}; // --- NoDmNoMp0 --- // int nSig = 8, nDpi0 = 9; // TString sigVari[] = {"candEExtra","candMES","candDmass","candDeltam","candTagChargedMult", // "candBTagDmass","candDeltaE","candCosT"}; // TString Dpi0Vari[] = {"candDmass","dmpi0","eextrapi0","ppi0","e1pi0","candCosT","candDeltam", // "candMES","candDeltaE"}; // sigCuts[4] = "MCType>12&&mpi0>.125&&mpi0<.145"; // --- NoMes --- // int nSig = 8, nDpi0 = 9; // TString sigVari[] = {"candEExtra","candDmass","candDeltam","candTagChargedMult","candBTagDeltam", // "candBTagDmass","candDeltaE","candCosT"}; // TString Dpi0Vari[] = {"mpi0","candDmass","dmpi0","eextrapi0","ppi0","e1pi0","candCosT","candDeltam", // "candDeltaE"}; // --- NoMulYesDm --- int nSig = 8, nDpi0 = 11; TString sigVari[] = {"candEExtra","candMES","candDmass","candDeltam","candBTagDeltam", "candBTagDmass","candDeltaE","candCosT"}; TString Dpi0Vari[] = {"mpi0","candDmass","dmpi0","eextrapi0","ppi0","e1pi0","candCosT","candDeltam", "candMES","candDeltaE","candBTagDeltam"}; factory->SetInputTrees(&c, sigCut, bkgCut); if(isDss==0){ for(int vari = 0; vari < nSig; vari++){ if(sigVari[vari]=="candDeltam" && iChannel%2==1) continue; char variChar = 'F'; if(sigVari[vari]=="candTagChargedMult") variChar = 'I'; factory->AddVariable(sigVari[vari], variChar); } } else { for(int vari = 0; vari < nDpi0; vari++){ if(Dpi0Vari[vari]=="candDeltam" && iChannel%2==1) continue; factory->AddVariable(Dpi0Vari[vari], 'F'); } } factory->PrepareTrainingAndTestTree( mycuts, mycutb, "NSigTest=100:NBkgTest=100:SplitMode=Random:NormMode=NumEvents:!V" ); factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=500:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=2.5" ); factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; delete factory; // Launch the GUI for the root macros //if (!gROOT->IsBatch()) TMVAGui( outfileName ); gROOT->ProcessLine(".q"); }
int main () { TFile * outputfile = TFile::Open ("outputTMVA.root","RECREATE"); TMVA::Factory * TMVAtest = new TMVA::Factory ("TMVAtest", outputfile, "S") ; //PG get the signal and deliver it to the TMVA factory TChain signalTree ("sample") ; signalTree.Add ("data/sig_0.root") ; std::cout << "READ " << signalTree.GetEntries () << " signal events\n" ; TMVAtest->AddSignalTree (&signalTree, 1) ; //PG get the bkg and deliver it to the TMVA factory TChain bkgTree ("sample") ; bkgTree.Add ("data/bkg_0.root") ; std::cout << "READ " << bkgTree.GetEntries () << " bkg events\n" ; TMVAtest->AddBackgroundTree (&bkgTree, 1) ; //PG get the training and test samples and deliver them to the TMVA factory TChain signalTrainTree ("sample") ; signalTrainTree.Add ("data/sig_1.root") ; std::cout << "READ " << signalTrainTree.GetEntries () << " signal train events\n" ; TChain bkgTrainTree ("sample") ; bkgTrainTree.Add ("data/bkg_1.root") ; std::cout << "READ " << bkgTrainTree.GetEntries () << " bkg train events\n" ; TMVAtest->SetInputTrees (signalTrainTree.GetTree (), bkgTrainTree.GetTree (), 1., 1.) ; //PG variables to be used for the selection //PG must be defined in the TTrees TMVAtest->AddVariable ("vars.x", 'F') ; TMVAtest->AddVariable ("vars.y" , 'F') ; int signalNumTrain = signalTrainTree.GetEntries () * 4 / 5 ; int bkgNumTrain = bkgTrainTree.GetEntries () * 4 / 5 ; int signalNumTest = signalTrainTree.GetEntries () - signalNumTrain ; int bkgNumTest = bkgTrainTree.GetEntries () - bkgNumTrain ; char trainOptions[120] ; sprintf (trainOptions,"NSigTrain=%d:NBkgTrain=%d:NSigTest=%d:NBkgTest=%d", signalNumTrain, bkgNumTrain, signalNumTest, bkgNumTest) ; sprintf (trainOptions,"NSigTrain=%d:NBkgTrain=%d:NSigTest=%d:NBkgTest=%d", 0,0,0,0) ; std::cout << "TRAINING CONFIGURATION : " << trainOptions << "\n" ; TMVAtest->PrepareTrainingAndTestTree ("",trainOptions) ; //PG prepare the classifier //PG cut-based, default params TMVAtest->BookMethod (TMVA::Types::kCuts, "Cuts") ; TMVAtest->TrainAllMethods () ; TMVAtest->TestAllMethods () ; TMVAtest->EvaluateAllMethods () ; delete TMVAtest ; delete outputfile ; }
void TMVAClassificationHwwNtuple( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // to get access to the GUI and all tmva macros //TString tmva_dir(TString(gRootDir) + "/tmva"); //if(gSystem->Getenv("TMVASYS")) // tmva_dir = TString(gSystem->Getenv("TMVASYS")); //cout<<"tmva_dir:"<<tmva_dir<<endl; //gROOT->SetMacroPath(tmva_dir + "/test/:" + gROOT->GetMacroPath() ); gROOT->ProcessLine(".L TMVAGui.C"); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Cut optimisation Use["Cuts"] = 1; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // // --- 1-dimensional likelihood ("naive Bayes estimator") Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDEFoam"] = 0; Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting Use["KNN"] = 0; // k-nearest neighbour method // // --- Linear Discriminant Analysis Use["LD"] = 0; // Linear Discriminant identical to Fisher Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; // uses generalised MVA method boosting Use["HMatrix"] = 0; // // --- Function Discriminant analysis Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN Use["MLPBFGS"] = 0; // Recommended ANN with optional training method Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH Use["TMlpANN"] = 0; // ROOT's own ANN // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 1; // uses Adaptive Boost Use["BDTG"] = 0; // uses Gradient Boost Use["BDTB"] = 0; // uses Bagging Use["BDTD"] = 0; // decorrelation + Adaptive Boost Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting // // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules") Use["RuleFit"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; cout<<regMethod<<" is on"<<endl; } } // ------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] //factory->AddVariable( "myvar1 := var1+var2", 'F' ); //factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ); //factory->AddVariable( "var3", "Variable 3", "units", 'F' ); //factory->AddVariable( "var4", "Variable 4", "units", 'F' ); factory->AddVariable( "pt1", "LeadLepton pt", "", 'F' ); factory->AddVariable( "pt2", "TailLepton pt", "", 'F' ); factory->AddVariable( "pfmetTypeI", "MissingEt", "", 'F' ); factory->AddVariable( "mpmet", "Minimum Proj. Met", "", 'F' ); factory->AddVariable( "dphill", "DeltPhiOfLepLep", "", 'F' ); factory->AddVariable( "mll", "DiLepton Mass", "", 'F' ); factory->AddVariable( "ptll", "DiLepton pt", "", 'F' ); // // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables //factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); //factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // factory->AddSpectator( "mWW", "Higgs Mass", "", 'F' ); factory->AddSpectator( "pt1", "LeadLepton pt", "", 'F' ); factory->AddSpectator( "pt2", "TailLepton pt", "", 'F' ); factory->AddSpectator( "pfmetTypeI", "MissingEt", "", 'F' ); factory->AddSpectator( "mpmet", "Minimum Proj. Met", "", 'F' ); factory->AddSpectator( "dphill", "DeltPhiOfLepLep", "", 'F' ); factory->AddSpectator( "mll", "DiLepton Mass", "", 'F' ); factory->AddSpectator( "ptll", "DiLepton pt", "", 'F' ); // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) //TString fname = "./tmva_class_example.root"; //TString fname = "/afs/cern.ch/work/s/salee/private/HWWwidth/HWW/GGVvAnalyzer/MkNtuple/Hw1Int8TeV/MkNtuple.root"; //TString fname = "/terranova_0/HWWwidth/HWW/GGVvAnalyzer/MkNtuple/Hw1Int8TeV/MkNtuple.root"; //if (gSystem->AccessPathName( fname )) // file does not exist in local directory // exit(-1); //gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root"); //TFile *input = TFile::Open( fname ); //TFile *SB_OnPeak = TFile::Open("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntOnPeak_8TeV.root"); //TTree *SB_OnPeak_Tree = (TTree*)SB_OnPeak->Get("latino"); TChain *SB_Chain = new TChain("latino"); SB_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntOnPeak_8TeV.root"); SB_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntShoulder_8TeV.root"); SB_Chain->Add("root://eoscms.cern.ch//eos/cms/store/group/phys_higgs/cmshww/amassiro/HiggsWidth/gg2vv/latinogg2vv_Hw1_IntTail_8TeV.root"); // std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl; // --- Register the training and test trees //TTree *signal = (TTree*)input->Get("ggTree"); //TTree *background = (TTree*)input->Get("ggTree"); // global event weights per tree (see below for setting event-wise weights) //Double_t signalWeight = 1.0; //Double_t backgroundWeight = 1.0; // You can add an arbitrary number of signal or background trees //factory->AddSignalTree ( signal, signalWeight ); //factory->AddBackgroundTree( background, backgroundWeight ); // Classification training and test data in ROOT tree format with signal and background events being located in the same tree TCut sigCut = "mWW>160 && (channel == 2 || channel == 3)"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut bkgCut = "mWW<160 && (channel == 2 || channel == 3)"; // for example: TCut mycutb = "abs(var1)<0.5"; factory->SetInputTrees(SB_Chain, sigCut, bkgCut); // To give different trees for training and testing, do as follows: // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // // // --- begin ---------------------------------------------------------- // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables // Float_t treevars[4], weight; // // // Signal // for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<signal->GetEntries(); i++) { // signal->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight ); // else factory->AddSignalTestEvent ( vars, signalWeight ); // } // // // Background (has event weights) // background->SetBranchAddress( "weight", &weight ); // for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (UInt_t i=0; i<background->GetEntries(); i++) { // background->GetEntry(i); // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight ); // else factory->AddBackgroundTestEvent ( vars, backgroundWeight*weight ); // } // --- end ------------------------------------------------------------ // // --- end of tree registration // Set individual event weights (the variables must exist in the original TTree) // for signal : factory->SetSignalWeightExpression ("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); //factory->SetBackgroundWeightExpression( "weight" ); // Apply additional cuts on the signal and background samples (can be different) //TCut mycuts = "pt1>70 && pt2>25"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; //TCut mycuts = "pt1>17 && pt2>8"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; //TCut mycutb = "pt1>70 && pt2>25"; // for example: TCut mycutb = "abs(var1)<0.5"; //TCut mycutb = "pt1>17 && pt2>8"; // for example: TCut mycutb = "abs(var1)<0.5"; TCut mycut = "pt1>20 && pt2>10"; // for example: TCut mycutb = "abs(var1)<0.5"; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); factory->PrepareTrainingAndTestTree( mycut, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); //factory->PrepareTrainingAndTestTree( mycuts, mycutb, // "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood ("naive Bayes estimator") if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // Decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); // PCA-transformed likelihood if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // Use a kernel density estimator to approximate the PDFs if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); if (Use["PDEFoamBoost"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" ); // Linear discriminant (same as Fisher discriminant) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ); if (Use["MLPBNN"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:V:NTrees=850:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); //"!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ); if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher", "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier usage, see: TMVAClassificationCategory // ----------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // ---- STILL EXPERIMENTAL and only implemented for BDT's ! // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","FitGA"); // ----------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVAClassification( std::string selectionName, std::string charge, TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) // this loads the library TMVA::Tools::Instance(); //--------------------------------------------------------------- // default MVA methods to be trained + tested std::map<std::string,int> Use; Use["Cuts"] = 1; Use["CutsD"] = 0; Use["CutsPCA"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // --- Use["Likelihood"] = 0; Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings) Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings) Use["LikelihoodKDE"] = 0; Use["LikelihoodMIX"] = 0; // --- Use["PDERS"] = 0; Use["PDERSD"] = 0; Use["PDERSPCA"] = 0; Use["PDERSkNN"] = 0; // depreciated until further notice Use["PDEFoam"] = 0; // -- Use["KNN"] = 0; // --- Use["HMatrix"] = 0; Use["Fisher"] = 0; Use["FisherG"] = 0; Use["BoostedFisher"] = 0; Use["LD"] = 0; // --- Use["FDA_GA"] = 0; Use["FDA_SA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; Use["FDA_MCMT"] = 0; // --- Use["MLP"] = 0; // this is the recommended ANN Use["MLPBFGS"] = 0; // recommended ANN with optional training method Use["CFMlpANN"] = 0; // *** missing Use["TMlpANN"] = 0; // --- Use["SVM"] = 0; // --- Use["BDT"] = 0; Use["BDTD"] = 0; Use["BDTG"] = 0; Use["BDTB"] = 0; // --- Use["RuleFit"] = 0; // --- Use["Plugin"] = 0; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // Create a new root output file. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] //factory->AddVariable( "MET" , "ME_{T}", "GeV", 'F'); //factory->AddVariable( "TMath::Max(pT1,pT2)" , "Lead Lepton p_{T}", "GeV", 'F'); factory->AddVariable( "HT" , "H_{T}", "GeV", 'F'); //factory->AddVariable( "M3" , "M_{3}", "GeV", 'F'); factory->AddVariable( "TMath::Min(pT1,pT2)" , "Sublead Lepton p_{T}", "GeV", 'F'); //factory->AddVariable( "NbJ" , "N B Jets", "", 'I'); //factory->AddVariable( "NbJmed" , "N B Jets (medium)", "", 'I'); //factory->AddVariable( "NJ" , "N Jets", "", 'I'); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables //factory->AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ); //factory->AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ); // read training and test data if (ReadDataFromAsciiIFormat) { // load the signal and background event samples from ascii files // format in file must be: // var1/F:var2/F:var3/F:var4/F // 0.04551 0.59923 0.32400 -0.19170 // ... TString datFileS = "tmva_example_sig.dat"; TString datFileB = "tmva_example_bkg.dat"; factory->SetInputTrees( datFileS, datFileB ); } else { // load the signal and background event samples from ROOT trees TFile *input(0); //TString fname = "../macros/tmva_example.root"; //TString fname = "opt_ttW_Apr10_Iso005_NoZVeto_jet20.root"; TString fname = "opt_ttW_Nov20_muDetIso0p05_elDetIso0p05_jet20_withZveto_optimization.root"; //TString fname = "opt_ttW_" + selectionName + ".root"; if (!gSystem->AccessPathName( fname )) { input = TFile::Open( fname ); // check if file in local directory exists } else { input = TFile::Open( "http://root.cern.ch/files/tmva_class_example.root" ); // if not: download from ROOT server } if (!input) { std::cout << "ERROR: could not open data file" << std::endl; exit(1); } std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl; TTree* opt_tree = (TTree*)input->Get("tree_opt"); TFile* signalFile = TFile::Open("/shome/mdunser/workspace/CMSSW_5_2_5/src/DiLeptonAnalysis/NTupleProducer/macros/plots/Nov26_muPFIso0p05_elPFIso0p05_jet20_withZveto/TTbarW_Yields.root"); TTree *signal = (TTree*)signalFile->Get("SigEvents"); TChain* background = new TChain("SigEvents"); background->Add("/shome/mdunser/workspace/CMSSW_5_2_5/src/DiLeptonAnalysis/NTupleProducer/macros/plots/Nov26_muPFIso0p05_elPFIso0p05_jet20_withZveto/TTJets_Yields.root"); background->Add("/shome/mdunser/workspace/CMSSW_5_2_5/src/DiLeptonAnalysis/NTupleProducer/macros/plots/Nov26_muPFIso0p05_elPFIso0p05_jet20_withZveto/WZTo3LNu_Yields.root"); //TTree *background = (TTree*)opt_tree->CopyTree("SName==\"TTJets\" || SName==\"DYJets\" || SName==\"WZTo3LNu\""); //TTree *background = (TTree*)opt_tree->CopyTree("SName==\"DYJets\""); // global event weights per tree (see below for setting event-wise weights) Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; // ====== register trees ==================================================== // // the following method is the prefered one: // you can add an arbitrary number of signal or background trees factory->AddSignalTree ( signal, signalWeight ); factory->AddBackgroundTree( background, backgroundWeight ); // To give different trees for training and testing, do as follows: // factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ); // factory->AddSignalTree( signalTestTree, signalTestWeight, "Test" ); // Use the following code instead of the above two or four lines to add signal and background // training and test events "by hand" // NOTE that in this case one should not give expressions (such as "var1+var2") in the input // variable definition, but simply compute the expression before adding the event // // // --- begin ---------------------------------------------------------- // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables // Float_t treevars[4]; // for (Int_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (Int_t i=0; i<signal->GetEntries(); i++) { // signal->GetEntry(i); // for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < signal->GetEntries()/2) factory->AddSignalTrainingEvent( vars, signalWeight ); // else factory->AddSignalTestEvent ( vars, signalWeight ); // } // // for (Int_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) ); // for (Int_t i=0; i<background->GetEntries(); i++) { // background->GetEntry(i); // for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar]; // // add training and test events; here: first half is training, second is testing // // note that the weight can also be event-wise // if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight ); // else factory->AddBackgroundTestEvent ( vars, backgroundWeight ); // } // // --- end ------------------------------------------------------------ // // ====== end of register trees ============================================== } // This would set individual event weights (the variables defined in the // expression need to exist in the original TTree) // for signal : factory->SetSignalWeightExpression("weight1*weight2"); // for background: factory->SetBackgroundWeightExpression("weight1*weight2"); //factory->SetSignalWeightExpression("eventWeight"); factory->SetBackgroundWeightExpression("1./SLumi"); // Apply additional cuts on the signal and background samples (can be different) TCut mycuts; TCut mycutb; if( charge == "plus" ) { mycuts = "Charge==1 && SystFlag==0 && NJ>=3 && pT1>20. && pT2>20. && NbJmed>0 && TLCat==0 && Flavor<3 && PassZVeto==1"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; mycutb = "Charge==1 && SystFlag==0 && NJ>=3 && pT1>20. && pT2>20. && NbJmed>0 && TLCat==0 && Flavor<3 && PassZVeto==1"; // for example: TCut mycutb = "abs(var1)<0.5"; } else if( charge == "minus" ) { mycuts = "Charge==-1 && SystFlag==0 && NJ>=3 && pT1>20. && pT2>20. && NbJmed>0 && TLCat==0 && Flavor<3 && PassZVeto==1"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; mycutb = "Charge==-1 && SystFlag==0 && NJ>=3 && pT1>20. && pT2>20. && NbJmed>0 && TLCat==0 && Flavor<3 && PassZVeto==1"; // for example: TCut mycutb = "abs(var1)<0.5"; } else if( charge == "all" ) { mycuts = " SystFlag==0 && NJ>=3 && pT1>20. && pT2>20. && NbJmed>0 && TLCat==0 && Flavor<3 && PassZVeto==1"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; mycutb = " SystFlag==0 && NJ>=3 && pT1>20. && pT2>20. && NbJmed>0 && TLCat==0 && Flavor<3 && PassZVeto==1"; // for example: TCut mycutb = "abs(var1)<0.5"; } else { std::cout << "only 'plus' and 'minus' and 'all' are allowed for charge." <<std::endl; return; } //if( btagMed_presel_ ) { // mycuts += "NbJmed>0"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; // mycutb += "NbJmed>0"; // for example: TCut mycutb = "abs(var1)<0.5"; //} // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycuts, mycutb, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); // If no numbers of events are given, half of the events in the tree are used for training, and // the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); // ---- Book MVA methods // // please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // Cut optimisation if (Use["Cuts"]) { std::string bookConditions; bookConditions = "H:!V:FitMethod=MC"; bookConditions += ":VarProp[0]=FMax"; //HT bookConditions += ":VarProp[1]=FMax"; //pt2 bookConditions += ":EffSel:SampleSize=500000000"; //bookConditions += ":EffSel:SampleSize=50000"; factory->BookMethod( TMVA::Types::kCuts, "Cuts", bookConditions.c_str() ); //factory->BookMethod( TMVA::Types::kCuts, "Cuts", // "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); } if (Use["CutsD"]) factory->BookMethod( TMVA::Types::kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ); if (Use["CutsPCA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Likelihood if (Use["Likelihood"]) factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); // test the decorrelated likelihood if (Use["LikelihoodD"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); if (Use["LikelihoodPCA"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); // test the new kernel density estimator if (Use["LikelihoodKDE"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); // test the mixed splines and kernel density estimator (depending on which variable) if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // test the multi-dimensional probability density estimator // here are the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSkNN"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSkNN", "!H:!V:VolumeRangeMode=kNN:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDERSD"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ); if (Use["PDERSPCA"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:CutNmin=T:Nmin=100:Kernel=None:Compress=T" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // H-Matrix (chi2-squared) method if (Use["HMatrix"]) factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" ); // Fisher discriminant if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=60:NsmoothMVAPdf=10" ); // Fisher with Gauss-transformed input variables if (Use["FisherG"]) factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" ); // Composite classifier: ensemble (tree) of boosted Fisher classifiers if (Use["BoostedFisher"]) factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2"); // Linear discriminant (same as Fisher) if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options]) factory->BookMethod( TMVA::Types::kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if (Use["FDA_MCMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=N+5:TestRate=10:EpochMonitoring" ); if (Use["MLPBFGS"]) factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=N+5:TestRate=10:TrainingMethod=BFGS:!EpochMonitoring" ); // CF(Clermont-Ferrand)ANN if (Use["CFMlpANN"]) factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:... // Tmlp(Root)ANN if (Use["TMlpANN"]) factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:... // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDTG"]) // Gradient Boost factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" ); if (Use["BDT"]) // Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTB"]) // Bagging factory->BookMethod( TMVA::Types::kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ); if (Use["BDTD"]) // Decorrelation + Adaptive Boost factory->BookMethod( TMVA::Types::kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ); // RuleFit -- TMVA implementation of Friedman's method if (Use["RuleFit"]) factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // For an example of the category classifier, see: TMVAClassificationCategory // -------------------------------------------------------------------------------------------------- // As an example how to use the ROOT plugin mechanism, book BDT via // plugin mechanism if (Use["Plugin"]) { // // first the plugin has to be defined, which can happen either through the following line in the local or global .rootrc: // // # plugin handler plugin name(regexp) class to be instanciated library constructor format // Plugin.TMVA@@MethodBase: ^BDT TMVA::MethodBDT TMVA.1 "MethodBDT(TString,TString,DataSet&,TString)" // // or by telling the global plugin manager directly gPluginMgr->AddHandler("TMVA@@MethodBase", "BDT", "TMVA::MethodBDT", "TMVA.1", "MethodBDT(TString,TString,DataSet&,TString)"); factory->BookMethod( TMVA::Types::kPlugins, "BDT", "!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=50" ); } // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); if (Use["Cuts"]) { for( unsigned iEff=1; iEff<11; ++iEff ) { TMVA::IMethod* method = (TMVA::IMethod*)factory->GetMethod("Cuts"); TMVA::MethodCuts* cuts = dynamic_cast<TMVA::MethodCuts*>(method); std::string optcutsdir = "optcuts_" + selectionName + "_" + charge; std::string mkdir_command = "mkdir -p " + optcutsdir; system(mkdir_command.c_str()); char cutsFileName[500]; sprintf( cutsFileName, "%s/cuts_Seff%d.txt", optcutsdir.c_str(), 10*iEff ); ofstream ofs(cutsFileName); std::vector<Double_t> cutsMin, cutsMax; cuts->GetCuts((float)iEff*0.10, cutsMin, cutsMax); bool found_pT1 = false; bool found_pT2 = false; bool found_NJ = false; bool found_NbJ = false; bool found_NbJmed = false; for( unsigned iCut=0; iCut<cutsMin.size(); ++iCut) { TString varName = factory->DefaultDataSetInfo().GetVariableInfo(iCut).GetInternalName(); if( varName=="TMath_Min_pT1,pT2_") { ofs << "pT1 " << cutsMin[iCut] << " " << cutsMax[iCut] << std::endl; ofs << "pT2 " << cutsMin[iCut] << " " << cutsMax[iCut] << std::endl; found_pT1 = true; found_pT2 = true; } else { ofs << varName << " " << cutsMin[iCut] << " " << cutsMax[iCut] << std::endl; } if( varName=="pT1" ) found_pT1 = true; if( varName=="pT2" ) found_pT2 = true; if( varName=="NJ" ) found_NJ = true; if( varName=="NbJ" ) found_NbJ = true; if( varName=="NbJmed" ) found_NbJmed = true; } // preselection cuts (if not optimized): if( !found_pT1 ) ofs << "pT1 20. 100000." << std::endl; if( !found_pT2 ) ofs << "pT2 20. 100000." << std::endl; if( !found_NJ ) ofs << "NJ 3 100000." << std::endl; if( !found_NbJ ) ofs << "NbJ 1 100000." << std::endl; if( !found_NbJmed && btagMed_presel_ ) ofs << "NbJmed 1 100000." << std::endl; if( charge=="plus" ) ofs << "Charge 1 10" << std::endl; else if( charge=="minus" ) ofs << "Charge -10 0" << std::endl; ofs.close(); } // for eff } // if cuts // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void TMVAtest1() { // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString TrainName = "TMVA"; TString outfileName( TrainName+".root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( TrainName, outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory->AddVariable ("ptphoton", 'F'); // factory->AddVariable ("etaphoton", 'F'); factory->AddVariable ("ptmuon", 'F'); // factory->AddVariable ("etamuon", 'F'); factory->AddVariable ("ptjet", 'F'); // factory->AddVariable ("etajet", 'F'); // factory->AddVariable ("masstop", 'F'); // factory->AddVariable ("mtw", 'F'); factory->AddVariable ("deltaRphotonjet", 'F'); factory->AddVariable ("deltaRphotonmuon", 'F'); // factory->AddVariable ("ht", 'F'); // factory->AddVariable ("photonmuonmass", 'F'); factory->AddVariable ("costopphoton", 'F'); // factory->AddVariable ("topphotonmass", 'F'); //factory->AddVariable ("pttop", 'F'); //factory->AddVariable ("etatop", 'F'); factory->AddVariable ("jetmultiplicity", 'F'); // factory->AddVariable ("bjetmultiplicity", 'F'); factory->AddVariable ("deltaphiphotonmet", 'F'); factory->AddVariable ("cvsdiscriminant", 'F'); // factory->AddVariable ("leptoncharge", 'F'); //Load the signal and background event samples from ROOT trees // TFile *inputSTrain(0); // TFile *inputBTrain(0); TString sigFileTrain = "SIGNALtGu.root"; TString bkgFileTrain = "WPHJET.root"; TString ttbarFileTrain1 ="TTBAR1.root"; TString ttbarFileTrain2 ="TTBAR2.root"; TString ttbarFileTrain3 ="TTBAR3.root"; TString TTGFileTrain ="TTG.root"; TString WWFileTrain ="WW.root"; TString WZFileTrain ="WZ.root"; TString ZZFileTrain ="ZZ.root"; TString ZGAMMAFileTrain ="ZGAMMA.root"; // TString bkgFileTrain = "TTG.root"; TFile *inputSTrain = TFile::Open( sigFileTrain ); if (!inputSTrain) { std::cout << "ERROR: could not open data file" << std::endl; exit(1); } TFile *inputBTrain = TFile::Open( bkgFileTrain ); TFile *inputBttbar1 = TFile::Open( ttbarFileTrain1); TFile *inputBttbar2 = TFile::Open( ttbarFileTrain2); TFile *inputBttbar3 = TFile::Open( ttbarFileTrain3); TFile *inputTTG= TFile::Open(TTGFileTrain ); TFile *inputWW= TFile::Open(WWFileTrain ); TFile *inputWZ= TFile::Open(WZFileTrain ); TFile *inputZZ= TFile::Open(ZZFileTrain ); TFile *inputZGAMMA= TFile::Open( ZGAMMAFileTrain); if (!inputBTrain) { std::cout << "ERROR: could not open data file" << std::endl; exit(1); } std::cout << "--- TMVAnalysis : Accessing Signal Train: " << sigFileTrain << std::endl; std::cout << "--- TMVAnalysis : Accessing Background Train: " << bkgFileTrain << std::endl; TTree *signalTrain = (TTree*)inputSTrain->FindObjectAny("atq"); TTree *backgroundTrain = (TTree*)inputBTrain->FindObjectAny("atq"); TTree *ttbarbackground1 = (TTree*)inputBttbar1->FindObjectAny("atq"); TTree *ttbarbackground2 = (TTree*)inputBttbar2->FindObjectAny("atq"); TTree *ttbarbackground3 = (TTree*)inputBttbar3->FindObjectAny("atq"); TTree *TTGbackground = (TTree*)inputTTG->FindObjectAny("atq"); TTree *WWbackground = (TTree*)inputWW->FindObjectAny("atq"); TTree *WZbackground = (TTree*)inputWZ->FindObjectAny("atq"); TTree *ZZbackground = (TTree*)inputZZ->FindObjectAny("atq"); TTree *ZGAMMAbackground = (TTree*)inputZGAMMA->FindObjectAny("atq"); // TTree *signalTrain = (TTree*)inputSTrain->FindObjectAny("insidetopmass"); // TTree *backgroundTrain = (TTree*)inputBTrain->FindObjectAny("insidetopmass"); // factory->AddSignalTree( signalTrain, 1); //cout<<"reza1"; // factory->AddBackgroundTree( backgroundTrain, 1, ); //cout<<"reza2"; factory->SetInputTrees(signalTrain,backgroundTrain,1,1); factory->AddBackgroundTree( ttbarbackground1, 1.1); factory->AddBackgroundTree( ttbarbackground2, 1.1); // factory->AddBackgroundTree( ttbarbackground3, 0.3); factory->AddBackgroundTree( TTGbackground, 1); factory->AddBackgroundTree( WWbackground, 1); factory->AddBackgroundTree(WZbackground , 1); factory->AddBackgroundTree(ZZbackground , 1); factory->AddBackgroundTree(ZGAMMAbackground , 1); // Set xs-weight // factory->SetSignalWeightExpression ("weight"); //factory->SetBackgroundWeightExpression("weight"); // Apply additional cuts on the signal and background samples (can be different) TCut mycut = ""; // TCut mycutb = ""; Int_t nSignalTrain = signalTrain->GetEntries(); Int_t nBackTrain = backgroundTrain->GetEntries(); factory->PrepareTrainingAndTestTree( "","", "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=0:nTest_Background=0:SplitMode=Random:SplitSeed=88!V" ); //factory->PrepareTrainingAndTestTree( "", "", // "nTrain_Signal=100:nTrain_Background=100:nTest_Signal=100:nTest_Background=100:!V" ); // factory->PrepareTrainingAndTestTree( mycuts, mycutb, //":NSigTrain=:NBkgTrain=:NSigTest=:NBkgTest=:SplitMode=Alternate:!V" ); //":nTrain_Signal=10000:nTest_Signal=2260:nTrain_Background=100000:nTest_Background=100000:SplitMode=Alternate:!V" ); // ":nTrain_Signal=nSignalTrain:nTrain_Background=nBackTrain:!V"); //":nTrain_Signal=nSignalTrain:nTest_Signal=nSignalTest:nTrain_Background=nBackTrain:nTest_Background=nBackTest:SplitMode=Block:!V"); //":SplitMode=Alternate:!V"); //":nTrain_Signal=:nTest_Signal=:nTrain_Background=:nTest_Background=:SplitMode=Block:!V"); //":SplitMode=Alternate:!V" ); // ---- Book MVA methods // factory->BookMethod( TMVA::Types::kBDT, "BDT", // "!H:!V:NTrees=300:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=-1:NNodesMax=5:UseNvars=4:PruneStrength=5:PruneMethod=CostComplexity:MaxDepth=4" ); //factory->BookMethod( TMVA::Types::kBDT, "BDT", // "!H:!V:NTrees=400:BoostType=AdaBoost:AdaBoostBeta=0.5:MaxDepth=3:SeparationType=GiniIndex:PruneMethod=NoPruning" ); factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=400:BoostType=AdaBoost:AdaBoostBeta=0.8:MaxDepth=3:SeparationType=GiniIndex:PruneMethod=NoPruning" ); // factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5"); // factory->BookMethod(TMVA::Types::kFisher, "Fisher", "H:!V:Fisher"); // ---- Now you can tell the factory to train, test, and evaluate the MVAs // factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); // factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001" ); // factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit","H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ); // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros // if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
int main () { TFile * outputfile = TFile::Open ("outputTMVA.root","RECREATE"); TMVA::Factory * TMVAtest = new TMVA::Factory ("TMVAtest", outputfile, "S") ; //PG get the signal and deliver it to the TMVA factory TChain signalTree ("shrinked") ; signalTree.Add ("data/H160_VBF.root") ; std::cout << "READ " << signalTree.GetEntries () << " signal events\n" ; TMVAtest->AddSignalTree (&signalTree, 1) ; //PG get the bkg and deliver it to the TMVA factory TChain bkgTree ("shrinked") ; bkgTree.Add ("data/H160_NOVBF.root") ; std::cout << "READ " << bkgTree.GetEntries () << " bkg events\n" ; TMVAtest->AddBackgroundTree (&bkgTree, 1) ; //PG get the training and test samples and deliver them to the TMVA factory TChain signalTrainTree ("shrinked") ; signalTrainTree.Add ("data/H150_VBF.root") ; std::cout << "READ " << signalTrainTree.GetEntries () << " signal train events\n" ; TChain bkgTrainTree ("shrinked") ; bkgTrainTree.Add ("data/H150_NOVBF.root") ; std::cout << "READ " << bkgTrainTree.GetEntries () << " bkg train events\n" ; // PG this is useless!! TMVAtest->SetInputTrees (signalTrainTree.GetTree (), bkgTrainTree.GetTree (), 1., 1.) ; //PG variables to be used for the selection //PG must be defined in the TTrees TMVAtest->AddVariable ("vars.Deta", 'F') ; TMVAtest->AddVariable ("vars.Dphi" , 'F') ; TMVAtest->AddVariable ("vars.Minv" , 'F') ; int signalNumTrain = signalTrainTree.GetEntries () * 4 / 5 ; int bkgNumTrain = bkgTrainTree.GetEntries () * 4 / 5 ; int signalNumTest = signalTrainTree.GetEntries () - signalNumTrain ; int bkgNumTest = bkgTrainTree.GetEntries () - bkgNumTrain ; char trainOptions[120] ; sprintf (trainOptions,"NSigTrain=%d:NBkgTrain=%d:NSigTest=%d:NBkgTest=%d", signalNumTrain, bkgNumTrain, signalNumTest, bkgNumTest) ; sprintf (trainOptions,"NSigTrain=%d:NBkgTrain=%d:NSigTest=%d:NBkgTest=%d", 0,0,0,0) ; std::cout << "TRAINING CONFIGURATION : " << trainOptions << "\n" ; TMVAtest->PrepareTrainingAndTestTree ("",trainOptions) ; //PG prepare the classifiers //PG ------------------------ //PG cut-based, default params TMVAtest->BookMethod (TMVA::Types::kCuts, "Cuts") ; //PG multi layer perceptron TMVAtest->BookMethod (TMVA::Types::kMLP, "MLP", "H:!V:!Normalise:NeuronType=tanh:NCycles=200:HiddenLayers=N+1,N:TestRate=5"); TMVAtest->TrainAllMethods () ; TMVAtest->TestAllMethods () ; TMVAtest->EvaluateAllMethods () ; delete TMVAtest ; delete outputfile ; }