void regressphi() { TMVA::Tools::Instance(); std::cout << "==> Start TMVAClassification" << std::endl; // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "mva", outputFile, "!V:!Silent:Color:DrawProgressBar" ); factory->AddVariable( "npv" , 'F' ); factory->AddVariable( "u" , 'F' ); factory->AddVariable( "uphi" , 'F' ); factory->AddVariable( "chsumet/sumet" , 'F' ); factory->AddVariable( "tku" , 'F' ); factory->AddVariable( "tkuphi" , 'F' ); factory->AddVariable( "nopusumet/sumet" , 'F' ); factory->AddVariable( "nopuu" , 'F' ); factory->AddVariable( "nopuuphi" , 'F' ); factory->AddVariable( "pusumet/sumet" , 'F' ); factory->AddVariable( "pumet" , 'F' ); factory->AddVariable( "pumetphi" , 'F' ); factory->AddVariable( "pucsumet/sumet" , 'F' ); factory->AddVariable( "pucu" , 'F' ); factory->AddVariable( "pucuphi" , 'F' ); factory->AddVariable( "jspt_1" , 'F' ); factory->AddVariable( "jseta_1" , 'F' ); factory->AddVariable( "jsphi_1" , 'F' ); factory->AddVariable( "jspt_2" , 'F' ); factory->AddVariable( "jseta_2" , 'F' ); factory->AddVariable( "jsphi_2" , 'F' ); factory->AddVariable( "nalljet" , 'I' ); factory->AddVariable( "njet" , 'I' ); factory->AddTarget( "rphi_z-uphi+ 2.*TMath::Pi()*(rphi_z-uphi < -TMath::Pi()) - 2.*TMath::Pi()*(rphi_z-uphi > TMath::Pi()) " ); TString lName = "../Jets/r11-dimu_nochs_v2.root"; TFile *lInput = TFile::Open(lName); TTree *lRegress = (TTree*)lInput ->Get("Flat"); Double_t lRWeight = 1.0; factory->AddRegressionTree( lRegress , lRWeight ); TCut lCut = "nbtag == 0"; //Cut to remove real MET //(rpt_z < 40 || (rpt_z > 40 && rpt_z+u1 < 40)) && nbtag == 0 "; ==> stronger cut to remove Real MET factory->PrepareTrainingAndTestTree( lCut, "nTrain_Regression=0:nTest_Regression=0:SplitMode=Block:NormMode=NumEvents:!V" ); // Boosted Decision Trees factory->BookMethod( TMVA::Types::kBDT, "RecoilPhiRegress_data_clean2_njet", "!H:!V:VarTransform=None:nEventsMin=200:NTrees=100:BoostType=Grad:Shrinkage=0.1:MaxDepth=100:NNodesMax=100000:UseYesNoLeaf=F:nCuts=2000");//MaxDepth=100 factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; //if (!gROOT->IsBatch()) TMVAGui( outfileName ); }
void REG::Loop() { if (fChain == 0) return; Long64_t nentries = fChain->GetEntriesFast(); Long64_t nbytes = 0, nb = 0; TMVA::Tools::Instance(); std::cout << std::endl; std::cout << "==> Start TMVARegression" << std::endl; TString outfileName( "TMVAReg.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( "TMVARegression", outputFile, "!V:!Silent:Color:DrawProgressBar" ); factory->AddVariable( "l1Pt", "Variable 1", "units", 'F' ); factory->AddVariable( "l1Eta", "Variable 2", "units", 'F' ); factory->AddVariable( "l1Phi", "Variable 1", "units", 'F' ); // factory->AddVariable( "RhoL1", "Variable 2", "units", 'F' ); factory->AddTarget( "ratio" ); factory->AddRegressionTree(fChain, 1.0 ); TCut mycut = ""; factory->PrepareTrainingAndTestTree( mycut, "nTrain_Regression=10000:nTest_Regression=0:SplitMode=Block:NormMode=NumEvents:!V" ); factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" ); factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events // factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs // factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVARegression is done!" << std::endl; delete factory; }
int main(int argc, char**argv){ if(argc != 2){ std::cerr << " >>>>> analysis.cpp::usage: " << argv[0] << " configFileName" << std::endl ; return 1; } parseConfigFile (argv[1]) ; // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVARegression.C\(\"myMethod1,myMethod2,myMethod3\"\) // //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); std::vector<std::string> UseMethodName; UseMethodName = gConfigParser -> readStringListOption("Input::UseMethodName"); std::cout << " >>>>> Input::UseMethodName size = " << UseMethodName.size() << std::endl; std::cout << " >>>>> >>>>> "; for (unsigned int iCat = 0; iCat < UseMethodName.size(); iCat++){ std::cout << " " << UseMethodName.at(iCat) << ", "; } std::cout << std::endl; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVARegression" << std::endl; std::map<std::string,int> Use; for(std::vector<std::string>::iterator it=UseMethodName.begin(); it!=UseMethodName.end(); ++it) Use[*it]=0; std::string UseMethodFlag; try{ UseMethodFlag = gConfigParser -> readStringOption("Input::UseMethodFlag"); std::cout<< UseMethodFlag<<std::endl; std::vector<TString> mlist = gTools().SplitString( UseMethodFlag, '/' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return -1; } Use[regMethod] = 1; std::cout << "regMethod= " << regMethod<< " 1 "<<std::endl; } } catch (char * exception){ std::cerr << " exception = Use All method " << std::endl; for(std::vector<std::string>::iterator it=UseMethodName.begin() ;it!=UseMethodName.end(); it++) Use[*it]=1; } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a new root output file std::string outputFileName = gConfigParser -> readStringOption("Output::outputFileName"); std::cout<<" Output Data File = "<<outputFileName<<std::endl; TFile* outputFile = TFile::Open( outputFileName.c_str(), "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // The first argument is the base of the name of all the // weightfiles in the directory weight/ // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string // Read training and test data (see TMVAClassification for reading ASCII files) // load the signal and background event samples from ROOT trees std::string inputFileList = gConfigParser -> readStringOption("Input::inputFileList"); std::string treeNameDATA = gConfigParser -> readStringOption("Input::treeNameDATA"); std::cout<<" Input Data List = "<<inputFileList<<std::endl; TChain* treeDATA = new TChain(treeNameDATA.c_str()); FillChain(*treeDATA,inputFileList.c_str()); TMVA::Factory *factory = new TMVA::Factory( "TMVARegression", outputFile, "!V:!Silent:Color:DrawProgressBar" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] std::string RegionOfTraining = gConfigParser -> readStringOption("Input::RegionOfTraining"); std::cout<<" RegionOfTraining = "<<RegionOfTraining<<std::endl; if(RegionOfTraining=="EB"){ factory->AddVariable( "ele1_scE/ele1_scERaw" , 'F'); factory->AddVariable( "ele1_eRegrInput_nPV" , 'F'); factory->AddVariable( "ele1_eRegrInput_r9" , 'F'); factory->AddVariable( "ele1_fbrem" , 'F'); factory->AddVariable( "ele1_eta" , 'F'); factory->AddVariable( "ele1_DphiIn" , 'F'); factory->AddVariable( "ele1_DetaIn" , 'F'); factory->AddVariable( "ele1_sigmaIetaIeta" , 'F'); factory->AddVariable( "ele1_eRegrInput_etaW" , 'F'); factory->AddVariable( "ele1_eRegrInput_phiW" , 'F'); factory->AddVariable( "ele1_eRegrInput_bCE_Over_sCE", 'F'); factory->AddVariable( "ele1_eRegrInput_sigietaieta_bC1" , 'F'); factory->AddVariable( "ele1_eRegrInput_sigiphiiphi_bC1" , 'F'); factory->AddVariable( "ele1_eRegrInput_sigietaiphi_bC1" , 'F'); factory->AddVariable( "ele1_eRegrInput_e3x3_Over_bCE" , 'F'); factory->AddVariable( "ele1_eRegrInput_Deta_bC_sC" , 'F'); factory->AddVariable( "ele1_eRegrInput_Dphi_bC_sC" , 'F'); factory->AddVariable( "ele1_eRegrInput_bEMax_Over_bCE" , 'F'); factory->AddVariable( "ele1_dxy_PV" , 'F'); factory->AddVariable( "ele1_dz_PV" , 'F'); factory->AddVariable( "ele1_sigmaP/ele1_tkP" , 'F'); factory->AddVariable( "ele1_eRegrInput_bCELow_Over_sCE", 'F'); factory->AddVariable( "ele1_eRegrInput_e3x3_Over_bCELow" , 'F'); factory->AddVariable( "ele1_eRegrInput_Deta_bCLow_sC" , 'F'); factory->AddVariable( "ele1_eRegrInput_Dphi_bCLow_sC" , 'F'); factory->AddVariable( "ele1_eRegrInput_seedbC_etacry" , 'F'); factory->AddVariable( "ele1_eRegrInput_seedbC_phicry" , 'F'); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables // factory->AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ); // factory->AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ); // Add the variable carrying the regression target // factory->AddTarget("ele1_scE/ele1_E_true" ); factory->AddTarget("ele1_tkP/ele1_E_true" ); // It is also possible to declare additional targets for multi-dimensional regression, ie: // -- factory->AddTarget( "fvalue2" ); // BUT: this is currently ONLY implemented for MLP // global event weights per tree (see below for setting event-wise weights) Double_t regWeight = 1.0; // You can add an arbitrary number of regression trees factory->AddRegressionTree( treeDATA, regWeight ); // This would set individual event weights (the variables defined in the // expression need to exist in the original TTree) // factory->SetWeightExpression( "var1", "Regression" ); // TCut mycut = "ele1_isEB==1 && ele1_sigmaP/ele1_tkP<0.4 && ele1_fbrem>0 && abs(ele1_dxy_PV)<0.05 && abs(ele1_dz_PV)<0.05 && ele1_eRegrInput_etaW > 0.006 && ele1_eRegrInput_phiW<0.08 && ele1_eRegrInput_sigietaieta_bC1>0.006 && ele1_eRegrInput_sigiphiiphi_bC1>0.008 && abs(ele1_eRegrInput_Deta_bC_sC)<0.004 && abs(ele1_eRegrInput_Dphi_bC_sC)<0.04 && abs(ele1_eRegrInput_seedbC_etacry)<0.6 && abs(ele1_eRegrInput_seedbC_phicry)<0.6 && ele1_scE/ele1_scERaw<1.2 && (ele1_scE/ele1_E_true)<1.4 && (ele1_scE/ele1_E_true)>0.3"; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycut = "ele1_isEB==1 && ele1_sigmaP/ele1_tkP<0.4 && ele1_fbrem>0 && abs(ele1_dxy_PV)<0.05 && abs(ele1_dz_PV)<0.05 && ele1_eRegrInput_etaW > 0.006 && ele1_eRegrInput_phiW<0.08 && ele1_eRegrInput_sigietaieta_bC1>0.006 && ele1_eRegrInput_sigiphiiphi_bC1>0.008 && abs(ele1_eRegrInput_Deta_bC_sC)<0.004 && abs(ele1_eRegrInput_Dphi_bC_sC)<0.04 && abs(ele1_eRegrInput_seedbC_etacry)<0.6 && abs(ele1_eRegrInput_seedbC_phicry)<0.6 && ele1_scE/ele1_scERaw<1.2 && ele1_tkP/ele1_E_true<1.8 && ele1_tkP/ele1_E_true>0.2"; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1"; // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycut, "nTrain_Regression=2500000:nTest_Regression=2500000:SplitMode=Random:NormMode=NumEvents:!V" ); TString Name = Form("weight_%s_%s_P_W",RegionOfTraining.c_str(),UseMethodFlag.c_str()); (TMVA::gConfig().GetIONames()).fWeightFileDir = Name; } if(RegionOfTraining=="EE"){ factory->AddVariable( "ele1_scE/ele1_scERaw" , 'F'); factory->AddVariable( "ele1_eRegrInput_nPV",'F'); factory->AddVariable( "ele1_eRegrInput_r9",'F'); factory->AddVariable( "ele1_fbrem",'F'); factory->AddVariable( "ele1_eta",'F'); factory->AddVariable( "ele1_DphiIn",'F'); factory->AddVariable( "ele1_DetaIn",'F'); factory->AddVariable( "ele1_sigmaIetaIeta",'F'); factory->AddVariable( "ele1_eRegrInput_etaW",'F'); factory->AddVariable( "ele1_eRegrInput_phiW",'F'); factory->AddVariable( "ele1_dxy_PV",'F'); factory->AddVariable( "ele1_dz_PV",'F'); factory->AddVariable( "ele1_sigmaP/ele1_tkP",'F'); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables // factory->AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ); // factory->AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ); // Add the variable carrying the regression target // factory->AddTarget("ele1_scE/ele1_E_true" ); factory->AddTarget("ele1_tkP/ele1_E_true" ); // It is also possible to declare additional targets for multi-dimensional regression, ie: // -- factory->AddTarget( "fvalue2" ); // BUT: this is currently ONLY implemented for MLP // global event weights per tree (see below for setting event-wise weights) Double_t regWeight = 1.0; // You can add an arbitrary number of regression trees factory->AddRegressionTree( treeDATA, regWeight ); // This would set individual event weights (the variables defined in the // expression need to exist in the original TTree) // factory->SetWeightExpression( "var1", "Regression" ); // TCut mycut = "ele1_isEB==0 && ele1_sigmaP/ele1_tkP<0.4 && ele1_fbrem>0 && abs(ele1_dxy_PV)<0.05 && abs(ele1_dz_PV)<0.05 &&(ele1_scE/ele1_E_true)<1.4 && (ele1_scE/ele1_E_true)>0.3"; TCut mycut = "ele1_isEB==0 && ele1_sigmaP/ele1_tkP<0.4 && ele1_fbrem>0 && abs(ele1_dxy_PV)<0.05 && abs(ele1_dz_PV)<0.05 && (ele1_tkP/ele1_E_true)<1.6"; // for example: TCut mycut = "abs(var1)<0.5 && // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycut, "nTrain_Regression=3000000:nTest_Regression=3000000:SplitMode=Random:NormMode=NumEvents:!V" ); TString Name = Form("weight_%s_%s_P_W",RegionOfTraining.c_str(),UseMethodFlag.c_str()); (TMVA::gConfig().GetIONames()).fWeightFileDir = Name; } // Apply additional cuts on the signal and background samples (can be different) // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // ---- Book MVA methods // // please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // PDE - RS method if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:Normthree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" ); // And the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.3:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // Linear discriminant if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD","!H:!V:VarTransform=G,U,D" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); // Neural network (MLP) if (Use["MLP"]) // factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" ); // factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=200:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" ); // factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=400:HiddenLayers=N+10:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15" ); // factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=N:NeuronType=tanh:NCycles=200:HiddenLayers=N+10:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15" ); // factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=G,N:NeuronType=tanh:NCycles=200:HiddenLayers=N+5:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15" ); factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:NCycles=250:HiddenLayers=N+5:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:TestRate=10"); // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=N" ); // factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=N,G" ); // Boosted Decision Trees if (Use["BDT"]) // factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=100:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" ); // factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=200:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:PruneMethod=CostComplexity:PruneStrength=30" ); // factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=300:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:PruneMethod=CostComplexity:PruneStrength=30" ); // factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=100:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:PruneMethod=CostComplexity:PruneStrength=30" ); factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=100:nEventsMin=20:BoostType=AdaBoostR2:SeparationType=RegressionVariance:PruneMethod=CostComplexity:PruneStrength=30"); if (Use["BDTG"]) // factory->BookMethod( TMVA::Types::kBDT, "BDTG","!H:!V:NTrees=2000::BoostType=Grad:Shrinkage=0.1:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:MaxDepth=3:NNodesMax=15" ); factory->BookMethod( TMVA::Types::kBDT, "BDTG","!H:!V:NTrees=1000::BoostType=Grad:Shrinkage=0.1:UseBaggedGrad:GradBaggingFraction=0.5:MaxDepth=5:NNodesMax=25:PruneMethod=CostComplexity:PruneStrength=30"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVARegression is done!" << std::endl; delete factory; // Launch the GUI for the root macros // if (!gROOT->IsBatch()) TMVARegGui( outputFileName.c_str() ); return 0; }
void BJetRegression( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVARegression.C\(\"myMethod1,myMethod2,myMethod3\"\) // //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // Default MVA methods to be trained + tested std::map<std::string,int> Use; // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDEFoam"] = 1; Use["KNN"] = 1; // // --- Linear Discriminant Analysis Use["LD"] = 1; // // --- Function Discriminant analysis Use["FDA_GA"] = 1; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; // // --- Neural Network Use["MLP"] = 1; // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 0; Use["BDTG"] = 1; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVARegression" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a new root output file TString outfileName( "TMVAReg_CSVJ1.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVARegression", outputFile, "!V:!Silent:Color:DrawProgressBar" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] // std::string inputVariables[] = {"CSVJ1PtUncorr", "CSVJ1Pt", "CSVJ1Et", "CSVJ1Mt", "CSVJ1ptLeadTrk", // "CSVJ1Vtx3dL", "CSVJ1Vtx3deL", "CSVJ1vtxMass", "CSVJ1VtxPt", // "CSVJ1SoftLeptPtRel", "CSVJ1SoftLeptPt", // "CSVJ1SoftLeptdR" , "CSVJ1Ntot"}; std::string inputVariables[] = {"jetPtUncorr", "jetPt", "jetEt", "jetMt", "jetptLeadTrk", "jetVtx3dL", "jetVtx3deL", "jetvtxMass", "jetVtxPt", "jetSoftLeptPtRel", "jetSoftLeptPt", "jetSoftLeptdR" , "jetNtot", "jetJECUnc"}; // for(int ivar = 0; ivar < 14; ivar++){ factory->AddVariable( inputVariables[ivar], inputVariables[ivar], "units", 'F' ); } // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables // factory->AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ); // factory->AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ); // Add the variable carrying the regression target // factory->AddTarget( "matchGenJet1Pt" ); factory->AddTarget( "matchGenJetPt" ); // It is also possible to declare additional targets for multi-dimensional regression, ie: // -- factory->AddTarget( "fvalue2" ); // BUT: this is currently ONLY implemented for MLP // Read training and test data (see TMVAClassification for reading ASCII files) // load the signal and background event samples from ROOT trees TFile *input(0); TString fname = "/scratch/zmao/regression/allSample_both_isobTag.root"; if (!gSystem->AccessPathName( fname )) input = TFile::Open( fname ); // check if file in local directory exists else input = TFile::Open( "http://root.cern.ch/files/tmva_reg_example.root" ); // if not: download from ROOT server if (!input) { std::cout << "ERROR: could not open data file" << std::endl; exit(1); } std::cout << "--- TMVARegression : Using input file: " << input->GetName() << std::endl; // --- Register the regression tree TTree *regTree = (TTree*)input->Get("eventTree"); // global event weights per tree (see below for setting event-wise weights) Double_t regWeight = 1.0; // You can add an arbitrary number of regression trees factory->AddRegressionTree( regTree, regWeight ); // This would set individual event weights (the variables defined in the // expression need to exist in the original TTree) // factory->SetWeightExpression( "triggerEff", "Regression" ); // Apply additional cuts on the signal and background samples (can be different) TCut mycut = ""; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1"; // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycut, "nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents:!V" ); // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // ---- Book MVA methods // // please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // PDE - RS method if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" ); // And the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // Linear discriminant if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "!H:!V:VarTransform=None" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); // Neural network (MLP) if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" ); // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDT"]) factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:MinNodeSize=1.0%:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" ); if (Use["BDTG"]) factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=2000::BoostType=Grad:Shrinkage=0.1:UseBaggedBoost:BaggedSampleFraction=0.7:nCuts=200:MaxDepth=3:NNodesMax=15" ); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVARegression is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVARegGui( outfileName ); }
//////////////////////////////////////////////////////////////////////////////// /// Main /// //////////////////////////////////////////////////////////////////////////////// void TrainRegressionFJ(TString myMethodList="") { gROOT->SetBatch(1); gROOT->LoadMacro("HelperFunctions.h" ); // make functions visible to TTreeFormula if (!TString(gROOT->GetVersion()).Contains("5.34")) { std::cout << "INCORRECT ROOT VERSION! Please use 5.34:" << std::endl; std::cout << "source /uscmst1/prod/sw/cms/slc5_amd64_gcc462/lcg/root/5.34.02-cms/bin/thisroot.csh" << std::endl; std::cout << "Return without doing anything." << std::endl; return; } //TString curDynamicPath( gSystem->GetDynamicPath() ); //gSystem->SetDynamicPath( "../lib:" + curDynamicPath ); //TString curIncludePath(gSystem->GetIncludePath()); //gSystem->SetIncludePath( " -I../include " + curIncludePath ); // Load the library TMVA::Tools::Instance(); //-------------------------------------------------------------------------- // Default MVA methods to be trained + tested std::map<std::string, int> Use; // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDERS"] = 0; Use["PDEFoam"] = 1; Use["KNN"] = 1; // // --- Linear Discriminant Analysis Use["LD"] = 1; // // --- Function Discriminant analysis Use["FDA_GA"] = 1; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; // // --- Neural Network Use["MLP"] = 1; // // --- Support Vector Machine Use["SVM"] = 0; // // --- Boosted Decision Trees Use["BDT"] = 1; Use["BDT1"] = 0; Use["BDTG"] = 0; Use["BDTG1"] = 0; //-------------------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVARegression" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } //-------------------------------------------------------------------------- // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVARegFJ.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // // The first argument is the base of the name of all the // weightfiles in the directory weights/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVARegressionFJ", outputFile, "!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=Regression" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; const std::vector<std::string> & inputExpressions = GetInputExpressionsFJReg(); const std::vector<std::string> & inputExpressionLabels = GetInputExpressionLabelsFJReg(); assert(inputExpressions.size() == inputExpressionLabels.size()); // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] //factory->AddVariable( "var1", "Variable 1", "units", 'F' ); //factory->AddVariable( "var2", "Variable 2", "units", 'F' ); for (UInt_t iexpr=0; iexpr!=inputExpressions.size(); iexpr++){ Label label = MakeLabel(inputExpressionLabels.at(iexpr)); TString expr = inputExpressions.at(iexpr); factory->AddVariable(expr, label.xlabel, label.unit, label.type); } // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables //factory->AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); //factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // Add the variable carrying the regression target //factory->AddTarget( "fvalue" ); factory->AddTarget( "fathFilterJets_genPt" ); // It is also possible to declare additional targets for multi-dimensional regression, ie: // -- factory->AddTarget( "fvalue2" ); // BUT: this is currently ONLY implemented for MLP //-------------------------------------------------------------------------- // Read training and test data TFile *input(0); TString dirname = "skim_ZnnH_regression_fj/"; TString prefix = "skim_"; TString suffix = ".root"; TTree *regTrainTree(0), *regTestTree(0); std::vector<std::string> processes; processes.push_back("ZnnH110"); processes.push_back("ZnnH115"); processes.push_back("ZnnH120"); processes.push_back("ZnnH125"); processes.push_back("ZnnH130"); processes.push_back("ZnnH135"); processes.push_back("ZnnH140"); processes.push_back("ZnnH145"); processes.push_back("ZnnH150"); #ifdef USE_WH processes.push_back("WlnH110"); processes.push_back("WlnH115"); processes.push_back("WlnH120"); processes.push_back("WlnH125"); processes.push_back("WlnH130"); processes.push_back("WlnH135"); processes.push_back("WlnH140"); processes.push_back("WlnH145"); processes.push_back("WlnH150"); #endif std::vector<TFile *> files; for (UInt_t i=0; i<processes.size(); i++){ std::string process = processes.at(i); input = (TFile*) TFile::Open(dirname + prefix + process + suffix, "READ"); if (!input) { std::cout << "ERROR: Could not open input file." << std::endl; exit(1); } std::cout << "--- TMVARegression : Using input file: " << input->GetName() << std::endl; files.push_back(input); // --- Register the regression tree regTrainTree = (TTree*) input->Get("tree_train"); regTestTree = (TTree*) input->Get("tree_test"); // Global event weights per tree (see below for setting event-wise weights) Double_t regWeight = 1.0; // You can add an arbitrary number of regression trees factory->AddRegressionTree(regTrainTree, regWeight, TMVA::Types::kTraining); factory->AddRegressionTree(regTestTree , regWeight, TMVA::Types::kTesting ); } // Set individual event weights (the variables must exist in the original TTree) //factory->SetWeightExpression( "var1", "Regression" ); // Apply additional cuts on the signal and background samples (can be different) TCut mycut = "fathFilterJets_genPt>10 && fathFilterJets_pt>15 && abs(fathFilterJets_eta)<2.5"; // this is to avoid 3rd filter jet without gen match //TCut mycut = "hJet_genPt[0] > 0. && hJet_genPt[1] > 0. && hJet_csv[0] > 0. && hJet_csv[1] > 0. && hJet_pt[0] > 20. && hJet_pt[1] > 20. && abs(hJet_eta[0])<2.5 && abs(hJet_eta[1])<2.5"; // Tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycut, "V:nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents" ); // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=Random:!V" ); // --- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethodCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable // PDE - RS method if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" ); // And the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // Linear discriminant if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "!H:!V:VarTransform=None" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); // Neural network (MLP) if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" ); // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDT"]) factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:V:NTrees=100:nEventsMin=30:NodePurityLimit=0.5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" ); //"!H:V:NTrees=60:nEventsMin=20:NodePurityLimit=0.5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30:DoBoostMonitor" ); if (Use["BDT1"]) factory->BookMethod( TMVA::Types::kBDT, "BDT1", "!H:V:NTrees=100:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" ); if (Use["BDTG"]) factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:V:NTrees=2000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.7:nCuts=200:MaxDepth=3:NNodesMax=15" ); if (Use["BDTG1"]) factory->BookMethod( TMVA::Types::kBDT, "BDTG1", "!H:V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:MaxDepth=3:NNodesMax=15" ); //-------------------------------------------------------------------------- // Train MVAs using the set of training events factory->TrainAllMethods(); // --- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // --- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); //-------------------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVARegression is done!" << std::endl; for (UInt_t i=0; i<files.size(); i++) files.at(i)->Close(); delete outputFile; delete factory; // Launch the GUI for the root macros //gROOT->SetMacroPath( "$ROOTSYS/tmva/macros/" ); //gROOT->Macro( "$ROOTSYS/tmva/macros/TMVAlogon.C" ); //gROOT->LoadMacro( "$ROOTSYS/tmva/macros/TMVAGui.C" ); //if (!gROOT->IsBatch()) TMVARegGui( outfileName ); }
void TMVARegression( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVARegression.C\(\"myMethod1,myMethod2,myMethod3\"\) // // this loads the library TMVA::Tools::Instance(); //--------------------------------------------------------------- // default MVA methods to be trained + tested std::map<std::string,int> Use; Use["PDERS"] = 0; Use["PDERSkNN"] = 0; // depreciated until further notice Use["PDEFoam"] = 0; // preparation for new TMVA version "reader". This method is not available in this version of TMVA // --- Use["KNN"] = 0; // --- Use["LD"] = 0; // --- Use["FDA_GA"] = 0; Use["FDA_MC"] = 0; Use["FDA_MT"] = 0; Use["FDA_GAMT"] = 0; // --- Use["MLP"] = 0; // --- Use["SVM"] = 0; // --- Use["BDT"] = 0; Use["BDTG"] = 1; // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVARegression" << std::endl; if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // Create a new root output file TString outfileName( "TMVAReg_TarggetAPDPN.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory will // then run the performance analysis for you. // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVARegression", outputFile, "!V:!Silent:Color:DrawProgressBar" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] // factory->AddVariable( "laser.qmax", "Qmax", "ADC", 'D' ); // factory->AddVariable( "laser.tmax", "T_{0}", "ns", 'D' ); // factory->AddVariable( "laser.fwhm", "FWHM","ns", 'D'); // factory->AddVariable( "laser.prepulse","Prepulse indicator","A.U.", 'D' ); // factory->AddVariable( "laser.w10","Width at 10%","ns", 'D' ); factory->AddVariable( "qmax[0]", "Qmax", "ADC", 'D' ); factory->AddVariable( "tmax[0]", "T_{0}", "ns", 'D' ); factory->AddVariable( "l_fwhm[0]", "FWHM","ns", 'D'); factory->AddVariable( "l_prepulse[0]","Prepulse indicator","A.U.", 'D' ); factory->AddVariable( "l_width90[0]","Width at 10%","ns", 'D' ); // You can add so-called "Spectator variables", which are not used in the MVA training, // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // input variables, the response values of all trained MVAs, and the spectator variables // factory->AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ); // factory->AddSpectator( "spec2:=timeStamp", "Spectator 1", "units", 'F' ); // Add the variable carrying the regression target factory->AddTarget ( "apdpnAB[0]" ); // It is also possible to declare additional targets for multi-dimensional regression, ie: // -- factory->AddTarget( "fvalue2" ); // BUT: this is currently ONLY implemented for MLP // read training and test data (see TMVAClassification for reading ASCII files) // load the signal and background event samples from ROOT trees // TFile *input(0); // TString fname = "./stabTreeFed630.root"; // if (!gSystem->AccessPathName( fname )) { // input = TFile::Open( fname ); // check if file in local directory exists // } // if (!input) { // std::cout << "ERROR: could not open data file" << std::endl; // exit(1); // } // std::cout << "--- TMVARegression : Using input file: " << input->GetName() << std::endl; TChain * ntu = new TChain("x"); ntu->Add("/data2/EcalLaserMonitoringData/ntuples_2011_158851_178888/ntu_data_0015*.root"); ntu->Add("/data2/EcalLaserMonitoringData/ntuples_2011_158851_178888/ntu_data_0016*.root"); //TTree *regTree = (TTree*)input->Get("ntu"); // global event weights per tree (see below for setting event-wise weights) Double_t regWeight = 1.0; // ====== register trees ==================================================== // // the following method is the prefered one: // you can add an arbitrary number of regression trees //factory->AddRegressionTree( regTree, regWeight ); factory->AddRegressionTree( ntu, regWeight ); // Alternative call: // -- factory->AddRegressionTree( regTree, regWeight ); // This would set individual event weights (the variables defined in the // expression need to exist in the original TTree) // factory->SetWeightExpression( "fwhm-26", "Regression" ); // Apply additional cuts on the signal and background samples (can be different) // TCut mycut = "abs(0.5*(apd0+apd1)-1)<0.015 && timeStamp<1299.8e6"; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1"; cout << "Defining cuts..." << endl; TCut mycut = "fed[0] == 605 && harness == 6 && field > 0.1 && run < 161200 && apdpnAB[0] > 0"; // Count selected events and use them all TCanvas cdummy("cdummy","cdummy"); cdummy.cd(); Int_t selectedEvts = ntu->Draw("l_fwhm[0]",mycut,"N"); cdummy.Delete(); cout << selectedEvts << endl; char trainPara[132]; sprintf(trainPara,"nTrain_Regression=%d:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents:!V",selectedEvts-10); // tell the factory to use all remaining events in the trees after training for testing: factory->PrepareTrainingAndTestTree( mycut, trainPara ); // If no numbers of events are given, half of the events in the tree are used for training, and // the other half for testing: // factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); // To also specify the number of testing events, use: // factory->PrepareTrainingAndTestTree( mycut, // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); // ---- Book MVA methods // // please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable cout << "Booking MVA methods" << endl; // PDE - RS method if (Use["PDERS"]) factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" ); // And the options strings for the MinMax and RMS methods, respectively: // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if (Use["PDERSkNN"]) // depreciated until further notice factory->BookMethod( TMVA::Types::kPDERS, "PDERSkNN", "!H:!V:VolumeRangeMode=kNN:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:CutNmin=T:Nmin=10:VarTransform=None" ); // K-Nearest Neighbour classifier (KNN) if (Use["KNN"]) factory->BookMethod( TMVA::Types::kKNN, "KNN", "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ); // Linear discriminant if (Use["LD"]) factory->BookMethod( TMVA::Types::kLD, "LD", "!H:!V:VarTransform=N" ); // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if (Use["FDA_MC"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MC", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" ); if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" ); if (Use["FDA_MT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_MT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if (Use["FDA_GAMT"]) factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT", "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); // Neural network (MLP) if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=5000:HiddenLayers=N+5,N+2:TestRate=6:TrainingMethod=BP:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15" ); // Support Vector Machine if (Use["SVM"]) factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ); // Boosted Decision Trees if (Use["BDT"]) factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" ); if (Use["BDTG"]) factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=200::BoostType=Grad:Shrinkage=1.0:UseBaggedGrad:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" ); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVARegression is done!" << std::endl; delete factory; // Launch the GUI for the root macros //if (!gROOT->IsBatch()) TMVARegGui( outfileName ); }
TString autoencoder (std::string inputFileName) { std::string tmstr (now ()); TString tmstmp (tmstr.c_str ()); std::cout << "==> Start Autoencoder " << std::endl; std::cout << "-------------------- open input file ---------------- " << std::endl; TString fname = pathToData + TString (inputFileName.c_str ()) + TString (".root"); TFile *input = TFile::Open( fname ); std::cout << "-------------------- get tree ---------------- " << std::endl; TTree *tree = (TTree*)input->Get("data"); TString outfileName( "TMVAAutoEnc__" ); outfileName += TString (inputFileName.c_str ()) + TString ("__") + tmstmp + TString (".root"); std::cout << "-------------------- open output file ---------------- " << std::endl; TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); std::cout << "-------------------- prepare factory ---------------- " << std::endl; TMVA::Factory *factory = new TMVA::Factory( "TMVAAutoencoder", outputFile, "AnalysisType=Regression:Color:DrawProgressBar" ); std::cout << "-------------------- add variables ---------------- " << std::endl; for (auto varname : variableNames+additionalVariableNames) { factory->AddVariable (varname.c_str (), 'F'); factory->AddTarget (varname.c_str (), 'F'); } std::cout << "-------------------- add tree ---------------- " << std::endl; // global event weights per tree (see below for setting event-wise weights) Double_t regWeight = 1.0; factory->AddRegressionTree (tree, regWeight); std::cout << "-------------------- prepare ---------------- " << std::endl; TCut mycut = ""; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1"; factory->PrepareTrainingAndTestTree( mycut, "nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents:!V" ); /* // This would set individual event weights (the variables defined in the */ /* // expression need to exist in the original TTree) */ /* factory->SetWeightExpression( "var1", "Regression" ); */ if (true) { TString layoutString ("Layout=TANH|100,TANH|20,TANH|40,LINEAR"); TString training0 ("LearningRate=1e-5,Momentum=0.5,Repetitions=1,ConvergenceSteps=500,BatchSize=50,TestRepetitions=7,WeightDecay=0.01,Regularization=NONE,DropConfig=0.5+0.5+0.5+0.5,DropRepetitions=2"); TString training1 ("LearningRate=1e-5,Momentum=0.9,Repetitions=1,ConvergenceSteps=500,BatchSize=30,TestRepetitions=7,WeightDecay=0.01,Regularization=L2,DropConfig=0.1+0.1+0.1,DropRepetitions=1"); TString training2 ("LearningRate=1e-4,Momentum=0.3,Repetitions=1,ConvergenceSteps=10,BatchSize=40,TestRepetitions=7,WeightDecay=0.1,Regularization=L2"); TString training3 ("LearningRate=1e-5,Momentum=0.1,Repetitions=1,ConvergenceSteps=10,BatchSize=10,TestRepetitions=7,WeightDecay=0.001,Regularization=NONE"); TString trainingStrategyString ("TrainingStrategy="); trainingStrategyString += training0 + "|" + training1 + "|" + training2 ; //+ "|" + training3; // TString trainingStrategyString ("TrainingStrategy=LearningRate=1e-1,Momentum=0.3,Repetitions=3,ConvergenceSteps=20,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,L1=false,DropFraction=0.0,DropRepetitions=5"); TString nnOptions ("!H:V:ErrorStrategy=SUMOFSQUARES:VarTransform=N:WeightInitialization=XAVIERUNIFORM"); // TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CHECKGRADIENTS"); nnOptions.Append (":"); nnOptions.Append (layoutString); nnOptions.Append (":"); nnOptions.Append (trainingStrategyString); factory->BookMethod( TMVA::Types::kNN, TString("NN_")+tmstmp, nnOptions ); // NN } // -------------------------------------------------------------------------------------------------- factory->TrainAllMethods(); factory->TestAllMethods(); factory->EvaluateAllMethods(); outputFile->Close(); // TMVA::TMVARegGui (outfileName); delete factory; return TString("NN_")+tmstmp; }