Beispiel #1
0
void regressphi() {
   TMVA::Tools::Instance();
   std::cout << "==> Start TMVAClassification" << std::endl;
  
   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   TString outfileName( "TMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
   TMVA::Factory *factory = new TMVA::Factory( "mva", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar" );
   factory->AddVariable( "npv"                , 'F' ); 
   factory->AddVariable( "u"                  , 'F' ); 
   factory->AddVariable( "uphi"               , 'F' );
   factory->AddVariable( "chsumet/sumet"      , 'F' ); 
   factory->AddVariable( "tku"                , 'F' );
   factory->AddVariable( "tkuphi"             , 'F' );
   factory->AddVariable( "nopusumet/sumet"    , 'F' );
   factory->AddVariable( "nopuu"              , 'F' );
   factory->AddVariable( "nopuuphi"           , 'F' );
   factory->AddVariable( "pusumet/sumet"      , 'F' );
   factory->AddVariable( "pumet"              , 'F' );
   factory->AddVariable( "pumetphi"           , 'F' );
   factory->AddVariable( "pucsumet/sumet"     , 'F' );
   factory->AddVariable( "pucu"               , 'F' );
   factory->AddVariable( "pucuphi"            , 'F' );
   factory->AddVariable( "jspt_1"             , 'F' );
   factory->AddVariable( "jseta_1"            , 'F' );
   factory->AddVariable( "jsphi_1"            , 'F' );
   factory->AddVariable( "jspt_2"             , 'F' );
   factory->AddVariable( "jseta_2"            , 'F' );
   factory->AddVariable( "jsphi_2"            , 'F' );
   factory->AddVariable( "nalljet"            , 'I' );
   factory->AddVariable( "njet"               , 'I' );
    
   factory->AddTarget( "rphi_z-uphi+ 2.*TMath::Pi()*(rphi_z-uphi < -TMath::Pi()) - 2.*TMath::Pi()*(rphi_z-uphi > TMath::Pi())  " ); 
   TString  lName       = "../Jets/r11-dimu_nochs_v2.root";  TFile *lInput = TFile::Open(lName);
   TTree   *lRegress    = (TTree*)lInput    ->Get("Flat");

   Double_t lRWeight = 1.0;
   factory->AddRegressionTree( lRegress   , lRWeight );
   TCut lCut = "nbtag == 0"; //Cut to remove real MET
   //(rpt_z < 40 || (rpt_z > 40 && rpt_z+u1 < 40)) && nbtag == 0 "; ==> stronger cut to remove Real MET

   factory->PrepareTrainingAndTestTree( lCut,
					"nTrain_Regression=0:nTest_Regression=0:SplitMode=Block:NormMode=NumEvents:!V" );

   // Boosted Decision Trees
   factory->BookMethod( TMVA::Types::kBDT, "RecoilPhiRegress_data_clean2_njet",
			"!H:!V:VarTransform=None:nEventsMin=200:NTrees=100:BoostType=Grad:Shrinkage=0.1:MaxDepth=100:NNodesMax=100000:UseYesNoLeaf=F:nCuts=2000");//MaxDepth=100

   factory->TrainAllMethods();
   factory->TestAllMethods();
   factory->EvaluateAllMethods();
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;
   delete factory;
   //if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
Beispiel #2
0
void REG::Loop() {

  if (fChain == 0) return;


  Long64_t nentries = fChain->GetEntriesFast();
  
  Long64_t nbytes = 0, nb = 0;
  TMVA::Tools::Instance();
  std::cout << std::endl;
  std::cout << "==> Start TMVARegression" << std::endl;
  TString outfileName( "TMVAReg.root" );
  TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

  TMVA::Factory *factory = new TMVA::Factory( "TMVARegression", outputFile, 
					      "!V:!Silent:Color:DrawProgressBar" );

  factory->AddVariable( "l1Pt", "Variable 1", "units", 'F' );
  factory->AddVariable( "l1Eta", "Variable 2", "units", 'F' );
  factory->AddVariable( "l1Phi", "Variable 1", "units", 'F' );
//  factory->AddVariable( "RhoL1", "Variable 2", "units", 'F' );

  
  factory->AddTarget( "ratio" ); 
  factory->AddRegressionTree(fChain, 1.0 );
  TCut mycut = "";
  factory->PrepareTrainingAndTestTree( mycut, 
				       "nTrain_Regression=10000:nTest_Regression=0:SplitMode=Block:NormMode=NumEvents:!V" );

  factory->BookMethod( TMVA::Types::kBDT, "BDT",
		       "!H:!V:NTrees=100:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" );
  factory->TrainAllMethods();
 // ---- Evaluate all MVAs using the set of test events
  //   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
  //  factory->EvaluateAllMethods();    

   // --------------------------------------------------------------
   
   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVARegression is done!" << std::endl;      

   delete factory;
}
int main(int argc, char**argv){
 
 if(argc != 2){
 std::cerr << " >>>>> analysis.cpp::usage: " << argv[0] << " configFileName" << std::endl ;
 return 1;
 }

 parseConfigFile (argv[1]) ;

 // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
 // if you use your private .rootrc, or run from a different directory, please copy the 
 // corresponding lines from .rootrc
 // methods to be processed can be given as an argument; use format:
 //
 // mylinux~> root -l TMVARegression.C\(\"myMethod1,myMethod2,myMethod3\"\)
 //
 //---------------------------------------------------------------
 // This loads the library
 TMVA::Tools::Instance();

 std::vector<std::string> UseMethodName;
 UseMethodName = gConfigParser -> readStringListOption("Input::UseMethodName");

 std::cout << " >>>>> Input::UseMethodName size = " << UseMethodName.size() << std::endl;  
 std::cout << " >>>>> >>>>>  "; 
 for (unsigned int iCat = 0; iCat < UseMethodName.size(); iCat++){
  std::cout << " " << UseMethodName.at(iCat) << ", ";
 }
 std::cout << std::endl; 

 
 
 // ---------------------------------------------------------------

 std::cout << std::endl;
 std::cout << "==> Start TMVARegression" << std::endl;

 std::map<std::string,int> Use;

 for(std::vector<std::string>::iterator it=UseMethodName.begin(); it!=UseMethodName.end(); ++it) Use[*it]=0;

 std::string UseMethodFlag;
 try{ UseMethodFlag = gConfigParser -> readStringOption("Input::UseMethodFlag");
      std::cout<< UseMethodFlag<<std::endl;
      std::vector<TString> mlist = gTools().SplitString( UseMethodFlag, '/' );
      for (UInt_t i=0; i<mlist.size(); i++) {
     
      std::string regMethod(mlist[i]);
      if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return -1;
         }
         Use[regMethod] = 1;
         std::cout << "regMethod= " << regMethod<< " 1 "<<std::endl;

      }
     }
 catch (char * exception){
   
   std::cerr << " exception =  Use All method " << std::endl;
   for(std::vector<std::string>::iterator it=UseMethodName.begin() ;it!=UseMethodName.end(); it++) Use[*it]=1;

   }
 

 // --------------------------------------------------------------------------------------------------
 // --- Here the preparation phase begins
 // Create a new root output file
 
 std::string outputFileName =  gConfigParser -> readStringOption("Output::outputFileName");
 std::cout<<" Output Data File = "<<outputFileName<<std::endl;
 
 TFile* outputFile = TFile::Open( outputFileName.c_str(), "RECREATE" );

 // Create the factory object. Later you can choose the methods
 // whose performance you'd like to investigate. The factory will
 // then run the performance analysis for you.
 // The first argument is the base of the name of all the
 // weightfiles in the directory weight/ 
 // All TMVA output can be suppressed by removing the "!" (not) in 
 // front of the "Silent" argument in the option string

 // Read training and test data (see TMVAClassification for reading ASCII files)
 // load the signal and background event samples from ROOT trees
 std::string inputFileList =  gConfigParser -> readStringOption("Input::inputFileList");
 std::string treeNameDATA =  gConfigParser -> readStringOption("Input::treeNameDATA");

 std::cout<<" Input Data List = "<<inputFileList<<std::endl;

 TChain* treeDATA = new TChain(treeNameDATA.c_str());

 FillChain(*treeDATA,inputFileList.c_str());


 TMVA::Factory *factory = new TMVA::Factory( "TMVARegression", outputFile, 
                                               "!V:!Silent:Color:DrawProgressBar" );

 // If you wish to modify default settings 
 // (please check "src/Config.h" to see all available global options)
 //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
 //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";
 // Define the input variables that shall be used for the MVA training
 // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
 // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
 std::string RegionOfTraining =  gConfigParser -> readStringOption("Input::RegionOfTraining");
 std::cout<<" RegionOfTraining = "<<RegionOfTraining<<std::endl;
 
 if(RegionOfTraining=="EB"){


 factory->AddVariable( "ele1_scE/ele1_scERaw" , 'F');   
 factory->AddVariable( "ele1_eRegrInput_nPV" , 'F');
 factory->AddVariable( "ele1_eRegrInput_r9" , 'F');
 factory->AddVariable( "ele1_fbrem" , 'F');
 factory->AddVariable( "ele1_eta" , 'F');
 factory->AddVariable( "ele1_DphiIn" , 'F');
 factory->AddVariable( "ele1_DetaIn" , 'F');
 factory->AddVariable( "ele1_sigmaIetaIeta" , 'F');

 factory->AddVariable( "ele1_eRegrInput_etaW" , 'F');
 factory->AddVariable( "ele1_eRegrInput_phiW" , 'F');

 factory->AddVariable( "ele1_eRegrInput_bCE_Over_sCE", 'F');
 factory->AddVariable( "ele1_eRegrInput_sigietaieta_bC1" , 'F');
 factory->AddVariable( "ele1_eRegrInput_sigiphiiphi_bC1" , 'F');
 factory->AddVariable( "ele1_eRegrInput_sigietaiphi_bC1" , 'F');
 factory->AddVariable( "ele1_eRegrInput_e3x3_Over_bCE" , 'F');
 factory->AddVariable( "ele1_eRegrInput_Deta_bC_sC" , 'F');
 factory->AddVariable( "ele1_eRegrInput_Dphi_bC_sC" , 'F');
 factory->AddVariable( "ele1_eRegrInput_bEMax_Over_bCE" , 'F');


 factory->AddVariable( "ele1_dxy_PV" , 'F');
 factory->AddVariable( "ele1_dz_PV" , 'F');
 factory->AddVariable( "ele1_sigmaP/ele1_tkP" , 'F');

 factory->AddVariable( "ele1_eRegrInput_bCELow_Over_sCE", 'F');
 factory->AddVariable( "ele1_eRegrInput_e3x3_Over_bCELow" , 'F');
 factory->AddVariable( "ele1_eRegrInput_Deta_bCLow_sC" , 'F');
 factory->AddVariable( "ele1_eRegrInput_Dphi_bCLow_sC" , 'F');

 factory->AddVariable( "ele1_eRegrInput_seedbC_etacry" , 'F');
 factory->AddVariable( "ele1_eRegrInput_seedbC_phicry" , 'F');

 // You can add so-called "Spectator variables", which are not used in the MVA training, 
 // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
 // input variables, the response values of all trained MVAs, and the spectator variables
 // factory->AddSpectator( "spec1:=var1*2",  "Spectator 1", "units", 'F' );
 // factory->AddSpectator( "spec2:=var1*3",  "Spectator 2", "units", 'F' );
 // Add the variable carrying the regression target
//   factory->AddTarget("ele1_scE/ele1_E_true" ); 
   factory->AddTarget("ele1_tkP/ele1_E_true" ); 


 // It is also possible to declare additional targets for multi-dimensional regression, ie:
 // -- factory->AddTarget( "fvalue2" );
 // BUT: this is currently ONLY implemented for MLP

 // global event weights per tree (see below for setting event-wise weights)
 Double_t regWeight  = 1.0;   

 // You can add an arbitrary number of regression trees
 factory->AddRegressionTree( treeDATA, regWeight );

 // This would set individual event weights (the variables defined in the 
 // expression need to exist in the original TTree)
 // factory->SetWeightExpression( "var1", "Regression" );

//  TCut mycut = "ele1_isEB==1 && ele1_sigmaP/ele1_tkP<0.4 && ele1_fbrem>0 && abs(ele1_dxy_PV)<0.05 && abs(ele1_dz_PV)<0.05 && ele1_eRegrInput_etaW > 0.006 && ele1_eRegrInput_phiW<0.08 && ele1_eRegrInput_sigietaieta_bC1>0.006 && ele1_eRegrInput_sigiphiiphi_bC1>0.008  && abs(ele1_eRegrInput_Deta_bC_sC)<0.004 && abs(ele1_eRegrInput_Dphi_bC_sC)<0.04 && abs(ele1_eRegrInput_seedbC_etacry)<0.6 && abs(ele1_eRegrInput_seedbC_phicry)<0.6 && ele1_scE/ele1_scERaw<1.2 && (ele1_scE/ele1_E_true)<1.4 && (ele1_scE/ele1_E_true)>0.3"; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1";

  TCut mycut = "ele1_isEB==1 && ele1_sigmaP/ele1_tkP<0.4 && ele1_fbrem>0 && abs(ele1_dxy_PV)<0.05 && abs(ele1_dz_PV)<0.05 && ele1_eRegrInput_etaW > 0.006 && ele1_eRegrInput_phiW<0.08 && ele1_eRegrInput_sigietaieta_bC1>0.006 && ele1_eRegrInput_sigiphiiphi_bC1>0.008  && abs(ele1_eRegrInput_Deta_bC_sC)<0.004 && abs(ele1_eRegrInput_Dphi_bC_sC)<0.04 && abs(ele1_eRegrInput_seedbC_etacry)<0.6 && abs(ele1_eRegrInput_seedbC_phicry)<0.6 && ele1_scE/ele1_scERaw<1.2 && ele1_tkP/ele1_E_true<1.8 && ele1_tkP/ele1_E_true>0.2"; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1";


 // tell the factory to use all remaining events in the trees after training for testing:
 factory->PrepareTrainingAndTestTree( mycut, 
                                        "nTrain_Regression=2500000:nTest_Regression=2500000:SplitMode=Random:NormMode=NumEvents:!V" );
 
 TString Name = Form("weight_%s_%s_P_W",RegionOfTraining.c_str(),UseMethodFlag.c_str());
 (TMVA::gConfig().GetIONames()).fWeightFileDir = Name;
 } 

 if(RegionOfTraining=="EE"){

 factory->AddVariable( "ele1_scE/ele1_scERaw" , 'F');      
 factory->AddVariable( "ele1_eRegrInput_nPV",'F');
 factory->AddVariable( "ele1_eRegrInput_r9",'F');
 factory->AddVariable( "ele1_fbrem",'F');
 factory->AddVariable( "ele1_eta",'F');
 factory->AddVariable( "ele1_DphiIn",'F');
 factory->AddVariable( "ele1_DetaIn",'F');
 factory->AddVariable( "ele1_sigmaIetaIeta",'F');

 factory->AddVariable( "ele1_eRegrInput_etaW",'F');
 factory->AddVariable( "ele1_eRegrInput_phiW",'F');

 factory->AddVariable( "ele1_dxy_PV",'F');
 factory->AddVariable( "ele1_dz_PV",'F');
 factory->AddVariable( "ele1_sigmaP/ele1_tkP",'F');

   
 // You can add so-called "Spectator variables", which are not used in the MVA training, 
 // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
 // input variables, the response values of all trained MVAs, and the spectator variables
 // factory->AddSpectator( "spec1:=var1*2",  "Spectator 1", "units", 'F' );
 // factory->AddSpectator( "spec2:=var1*3",  "Spectator 2", "units", 'F' );
 // Add the variable carrying the regression target

//  factory->AddTarget("ele1_scE/ele1_E_true" ); 
  factory->AddTarget("ele1_tkP/ele1_E_true" );

 // It is also possible to declare additional targets for multi-dimensional regression, ie:
 // -- factory->AddTarget( "fvalue2" );
 // BUT: this is currently ONLY implemented for MLP

 // global event weights per tree (see below for setting event-wise weights)
 Double_t regWeight  = 1.0;   

 // You can add an arbitrary number of regression trees
 factory->AddRegressionTree( treeDATA, regWeight );

 // This would set individual event weights (the variables defined in the 
 // expression need to exist in the original TTree)
 // factory->SetWeightExpression( "var1", "Regression" );
//  TCut mycut = "ele1_isEB==0 && ele1_sigmaP/ele1_tkP<0.4 && ele1_fbrem>0 && abs(ele1_dxy_PV)<0.05 && abs(ele1_dz_PV)<0.05 &&(ele1_scE/ele1_E_true)<1.4 && (ele1_scE/ele1_E_true)>0.3";
  TCut mycut = "ele1_isEB==0 && ele1_sigmaP/ele1_tkP<0.4 && ele1_fbrem>0 && abs(ele1_dxy_PV)<0.05 && abs(ele1_dz_PV)<0.05 && (ele1_tkP/ele1_E_true)<1.6";

 // for example: TCut mycut = "abs(var1)<0.5 && 
 // tell the factory to use all remaining events in the trees after training for testing:
 factory->PrepareTrainingAndTestTree( mycut, 
                                        "nTrain_Regression=3000000:nTest_Regression=3000000:SplitMode=Random:NormMode=NumEvents:!V" );

 TString Name = Form("weight_%s_%s_P_W",RegionOfTraining.c_str(),UseMethodFlag.c_str());
 (TMVA::gConfig().GetIONames()).fWeightFileDir = Name;

 }
 // Apply additional cuts on the signal and background samples (can be different)
 
//  // If no numbers of events are given, half of the events in the tree are used 
 // for training, and the other half for testing:
 //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );  

 // ---- Book MVA methods
 //
 // please lookup the various method configuration options in the corresponding cxx files, eg:
 // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
 // it is possible to preset ranges in the option string in which the cut optimisation should be done:
 // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

 // PDE - RS method
  if (Use["PDERS"])
    factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:Normthree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" );
   // And the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );   
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );   

 if (Use["PDEFoam"])
   factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.3:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" );

 // K-Nearest Neighbour classifier (KNN)
 if (Use["KNN"])
   factory->BookMethod( TMVA::Types::kKNN, "KNN", "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

 // Linear discriminant
 if (Use["LD"])  factory->BookMethod( TMVA::Types::kLD, "LD","!H:!V:VarTransform=G,U,D" );

 // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
 if (Use["FDA_MC"]) 
     factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                          "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" );
   
 if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas
   factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" );

 if (Use["FDA_MT"]) 
   factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

 if (Use["FDA_GAMT"]) 
   factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   // Neural network (MLP)
 if (Use["MLP"])
//       factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" );
//         factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=200:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" );
// 	factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=400:HiddenLayers=N+10:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15" );
// 	factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=N:NeuronType=tanh:NCycles=200:HiddenLayers=N+10:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15" );
// 	factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=G,N:NeuronType=tanh:NCycles=200:HiddenLayers=N+5:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15" );
   factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:NCycles=250:HiddenLayers=N+5:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:TestRate=10");
	
   // Support Vector Machine
 if (Use["SVM"])
   factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=N" );
//     factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=N,G" );

   // Boosted Decision Trees
 if (Use["BDT"])
//      factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=100:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" );
//         factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=200:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:PruneMethod=CostComplexity:PruneStrength=30" );
//         factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=300:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:PruneMethod=CostComplexity:PruneStrength=30" );
//  	factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=100:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:PruneMethod=CostComplexity:PruneStrength=30" );
   factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=100:nEventsMin=20:BoostType=AdaBoostR2:SeparationType=RegressionVariance:PruneMethod=CostComplexity:PruneStrength=30");
	
 if (Use["BDTG"])
//      factory->BookMethod( TMVA::Types::kBDT, "BDTG","!H:!V:NTrees=2000::BoostType=Grad:Shrinkage=0.1:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:MaxDepth=3:NNodesMax=15" );
   factory->BookMethod( TMVA::Types::kBDT, "BDTG","!H:!V:NTrees=1000::BoostType=Grad:Shrinkage=0.1:UseBaggedGrad:GradBaggingFraction=0.5:MaxDepth=5:NNodesMax=25:PruneMethod=CostComplexity:PruneStrength=30");
   // --------------------------------------------------------------------------------------------------
   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

 // Train MVAs using the set of training events
 factory->TrainAllMethods();

 // ---- Evaluate all MVAs using the set of test events
 factory->TestAllMethods();

 // ----- Evaluate and compare performance of all configured MVAs
 factory->EvaluateAllMethods();    

 // --------------------------------------------------------------
   
 // Save the output
 outputFile->Close();

 std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
 std::cout << "==> TMVARegression is done!" << std::endl;      

 delete factory;

 // Launch the GUI for the root macros
//  if (!gROOT->IsBatch()) TMVARegGui( outputFileName.c_str() );

 return 0;
}
void BJetRegression( TString myMethodList = "" ) 
{
   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the 
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVARegression.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   // --- Mutidimensional likelihood and Nearest-Neighbour methods
   Use["PDERS"]           = 0;
   Use["PDEFoam"]         = 1; 
   Use["KNN"]             = 1;
   // 
   // --- Linear Discriminant Analysis
   Use["LD"]		        = 1;
   // 
   // --- Function Discriminant analysis
   Use["FDA_GA"]          = 1;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   // 
   // --- Neural Network
   Use["MLP"]             = 1; 
   // 
   // --- Support Vector Machine 
   Use["SVM"]             = 0;
   // 
   // --- Boosted Decision Trees
   Use["BDT"]             = 0;
   Use["BDTG"]            = 1;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVARegression" << std::endl;

   // Select methods (don't look at this code - not of interest)
   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 1;
      }
   }

   // --------------------------------------------------------------------------------------------------

   // --- Here the preparation phase begins

   // Create a new root output file
   TString outfileName( "TMVAReg_CSVJ1.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory will
   // then run the performance analysis for you.
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/ 
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in 
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVARegression", outputFile, 
                                               "!V:!Silent:Color:DrawProgressBar" );

   // If you wish to modify default settings 
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
//    std::string inputVariables[] =  {"CSVJ1PtUncorr", "CSVJ1Pt", "CSVJ1Et", "CSVJ1Mt", "CSVJ1ptLeadTrk",
//                                               "CSVJ1Vtx3dL", "CSVJ1Vtx3deL", "CSVJ1vtxMass", "CSVJ1VtxPt",
//                                               "CSVJ1SoftLeptPtRel", "CSVJ1SoftLeptPt",
//                                               "CSVJ1SoftLeptdR" , "CSVJ1Ntot"};
   std::string inputVariables[] =  {"jetPtUncorr", "jetPt", "jetEt", "jetMt", "jetptLeadTrk",
                                              "jetVtx3dL", "jetVtx3deL", "jetvtxMass", "jetVtxPt",
                                              "jetSoftLeptPtRel", "jetSoftLeptPt",
                                              "jetSoftLeptdR" , "jetNtot", "jetJECUnc"}; //
   for(int ivar = 0; ivar < 14;  ivar++){
        factory->AddVariable( inputVariables[ivar], inputVariables[ivar], "units", 'F' );
    }
   // You can add so-called "Spectator variables", which are not used in the MVA training, 
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
   // input variables, the response values of all trained MVAs, and the spectator variables
//    factory->AddSpectator( "spec1:=var1*2",  "Spectator 1", "units", 'F' );
//    factory->AddSpectator( "spec2:=var1*3",  "Spectator 2", "units", 'F' );

   // Add the variable carrying the regression target
//    factory->AddTarget( "matchGenJet1Pt" ); 
   factory->AddTarget( "matchGenJetPt" ); 

   // It is also possible to declare additional targets for multi-dimensional regression, ie:
   // -- factory->AddTarget( "fvalue2" );
   // BUT: this is currently ONLY implemented for MLP

   // Read training and test data (see TMVAClassification for reading ASCII files)
   // load the signal and background event samples from ROOT trees
   TFile *input(0);
   TString fname = "/scratch/zmao/regression/allSample_both_isobTag.root";
   if (!gSystem->AccessPathName( fname )) 
      input = TFile::Open( fname ); // check if file in local directory exists
   else 
      input = TFile::Open( "http://root.cern.ch/files/tmva_reg_example.root" ); // if not: download from ROOT server
   
   if (!input) {
      std::cout << "ERROR: could not open data file" << std::endl;
      exit(1);
   }
   std::cout << "--- TMVARegression           : Using input file: " << input->GetName() << std::endl;

   // --- Register the regression tree

   TTree *regTree = (TTree*)input->Get("eventTree");

   // global event weights per tree (see below for setting event-wise weights)
   Double_t regWeight  = 1.0;   

   // You can add an arbitrary number of regression trees
   factory->AddRegressionTree( regTree, regWeight );

   // This would set individual event weights (the variables defined in the 
   // expression need to exist in the original TTree)
//    factory->SetWeightExpression( "triggerEff", "Regression" );

   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycut = ""; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1";

   // tell the factory to use all remaining events in the trees after training for testing:
   factory->PrepareTrainingAndTestTree( mycut, 
                                        "nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents:!V" );

   // If no numbers of events are given, half of the events in the tree are used 
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );  

   // ---- Book MVA methods
   //
   // please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // PDE - RS method
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS", 
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" );
   // And the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );   
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );   

   if (Use["PDEFoam"])
       factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", 
			    "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN", 
                           "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // Linear discriminant
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", 
                           "!H:!V:VarTransform=None" );

	// Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"]) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                          "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" );
   
   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" );

   if (Use["FDA_MT"]) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"]) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   // Neural network (MLP)
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" );

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDT"])
     factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=100:MinNodeSize=1.0%:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" );

   if (Use["BDTG"])
     factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                           "!H:!V:NTrees=2000::BoostType=Grad:Shrinkage=0.1:UseBaggedBoost:BaggedSampleFraction=0.7:nCuts=200:MaxDepth=3:NNodesMax=15" );
   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();    

   // --------------------------------------------------------------
   
   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVARegression is done!" << std::endl;      

   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVARegGui( outfileName );
}
Beispiel #5
0
////////////////////////////////////////////////////////////////////////////////
/// Main                                                                     ///
////////////////////////////////////////////////////////////////////////////////
void TrainRegressionFJ(TString myMethodList="")
{
    gROOT->SetBatch(1);
    gROOT->LoadMacro("HelperFunctions.h" );  // make functions visible to TTreeFormula

    if (!TString(gROOT->GetVersion()).Contains("5.34")) {
        std::cout << "INCORRECT ROOT VERSION! Please use 5.34:" << std::endl;
        std::cout << "source /uscmst1/prod/sw/cms/slc5_amd64_gcc462/lcg/root/5.34.02-cms/bin/thisroot.csh" << std::endl;
        std::cout << "Return without doing anything." << std::endl;
        return;
    }
    
    //TString curDynamicPath( gSystem->GetDynamicPath() );
    //gSystem->SetDynamicPath( "../lib:" + curDynamicPath );

    //TString curIncludePath(gSystem->GetIncludePath());
    //gSystem->SetIncludePath( " -I../include " + curIncludePath );

    // Load the library
    TMVA::Tools::Instance();


    //--------------------------------------------------------------------------
    // Default MVA methods to be trained + tested
    std::map<std::string, int> Use;

    // --- Mutidimensional likelihood and Nearest-Neighbour methods
    Use["PDERS"]           = 0;
    Use["PDEFoam"]         = 1;
    Use["KNN"]             = 1;
    //
    // --- Linear Discriminant Analysis
    Use["LD"]              = 1;
    //
    // --- Function Discriminant analysis
    Use["FDA_GA"]          = 1;
    Use["FDA_MC"]          = 0;
    Use["FDA_MT"]          = 0;
    Use["FDA_GAMT"]        = 0;
    //
    // --- Neural Network
    Use["MLP"]             = 1; 
    //
    // --- Support Vector Machine 
    Use["SVM"]             = 0;
    // 
    // --- Boosted Decision Trees
    Use["BDT"]             = 1;
    Use["BDT1"]            = 0;
    Use["BDTG"]            = 0;
    Use["BDTG1"]           = 0;

    //--------------------------------------------------------------------------
    std::cout << std::endl;
    std::cout << "==> Start TMVARegression" << std::endl;

    // Select methods (don't look at this code - not of interest)
    if (myMethodList != "") {
        for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

        std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
        for (UInt_t i=0; i<mlist.size(); i++) {
            std::string regMethod(mlist[i]);

            if (Use.find(regMethod) == Use.end()) {
                std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
                for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
                std::cout << std::endl;
                return;
            }
            Use[regMethod] = 1;
        }
    }

    //--------------------------------------------------------------------------
    // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
    TString outfileName( "TMVARegFJ.root" );
    TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

    // Create the factory object. Later you can choose the methods
    // whose performance you'd like to investigate. The factory will
    // then run the performance analysis for you.
    //
    // The first argument is the base of the name of all the
    // weightfiles in the directory weights/
    //
    // The second argument is the output file for the training results
    // All TMVA output can be suppressed by removing the "!" (not) in 
    // front of the "Silent" argument in the option string
    TMVA::Factory *factory = new TMVA::Factory( "TMVARegressionFJ", outputFile, 
                                                "!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=Regression" );

    // If you wish to modify default settings
    // (please check "src/Config.h" to see all available global options)
    //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
    //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

    const std::vector<std::string> & inputExpressions      = GetInputExpressionsFJReg();
    const std::vector<std::string> & inputExpressionLabels = GetInputExpressionLabelsFJReg();
    assert(inputExpressions.size() == inputExpressionLabels.size());

    // Define the input variables that shall be used for the MVA training
    // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
    //factory->AddVariable( "var1", "Variable 1", "units", 'F' );
    //factory->AddVariable( "var2", "Variable 2", "units", 'F' );
    
    for (UInt_t iexpr=0; iexpr!=inputExpressions.size(); iexpr++){
        Label label = MakeLabel(inputExpressionLabels.at(iexpr));
        TString expr = inputExpressions.at(iexpr);
        factory->AddVariable(expr, label.xlabel, label.unit, label.type);
    }

    // You can add so-called "Spectator variables", which are not used in the MVA training,
    // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
    // input variables, the response values of all trained MVAs, and the spectator variables
    //factory->AddSpectator( "spec1 := var1*2",  "Spectator 1", "units", 'F' );
    //factory->AddSpectator( "spec2 := var1*3",  "Spectator 2", "units", 'F' );

    // Add the variable carrying the regression target
    //factory->AddTarget( "fvalue" );
    factory->AddTarget( "fathFilterJets_genPt" );

    // It is also possible to declare additional targets for multi-dimensional regression, ie:
    // -- factory->AddTarget( "fvalue2" );
    // BUT: this is currently ONLY implemented for MLP

    //--------------------------------------------------------------------------
    // Read training and test data
    TFile *input(0);
    TString dirname = "skim_ZnnH_regression_fj/";
    TString prefix  = "skim_";
    TString suffix  = ".root";
    TTree *regTrainTree(0), *regTestTree(0);
    
    std::vector<std::string> processes;
    processes.push_back("ZnnH110");
    processes.push_back("ZnnH115");
    processes.push_back("ZnnH120");
    processes.push_back("ZnnH125");
    processes.push_back("ZnnH130");
    processes.push_back("ZnnH135");
    processes.push_back("ZnnH140");
    processes.push_back("ZnnH145");
    processes.push_back("ZnnH150");
#ifdef USE_WH
    processes.push_back("WlnH110");
    processes.push_back("WlnH115");
    processes.push_back("WlnH120");
    processes.push_back("WlnH125");
    processes.push_back("WlnH130");
    processes.push_back("WlnH135");
    processes.push_back("WlnH140");
    processes.push_back("WlnH145");
    processes.push_back("WlnH150");
#endif

    std::vector<TFile *> files;
    for (UInt_t i=0; i<processes.size(); i++){
        std::string process = processes.at(i);
        input = (TFile*) TFile::Open(dirname + prefix + process + suffix, "READ");
        if (!input) {
            std::cout << "ERROR: Could not open input file." << std::endl;
            exit(1);
        }
        std::cout << "--- TMVARegression           : Using input file: " << input->GetName() << std::endl;
        files.push_back(input);
        
        // --- Register the regression tree
        regTrainTree = (TTree*) input->Get("tree_train");
        regTestTree  = (TTree*) input->Get("tree_test");

        // Global event weights per tree (see below for setting event-wise weights)
        Double_t regWeight = 1.0;

        // You can add an arbitrary number of regression trees
        factory->AddRegressionTree(regTrainTree, regWeight, TMVA::Types::kTraining);
        factory->AddRegressionTree(regTestTree , regWeight, TMVA::Types::kTesting );
    }

    // Set individual event weights (the variables must exist in the original TTree)
    //factory->SetWeightExpression( "var1", "Regression" );

    // Apply additional cuts on the signal and background samples (can be different)
    TCut mycut = "fathFilterJets_genPt>10 && fathFilterJets_pt>15 && abs(fathFilterJets_eta)<2.5"; // this is to avoid 3rd filter jet without gen match
    //TCut mycut = "hJet_genPt[0] > 0. && hJet_genPt[1] > 0. && hJet_csv[0] > 0. && hJet_csv[1] > 0. && hJet_pt[0] > 20. && hJet_pt[1] > 20. && abs(hJet_eta[0])<2.5 && abs(hJet_eta[1])<2.5";

    // Tell the factory to use all remaining events in the trees after training for testing:
    factory->PrepareTrainingAndTestTree( mycut, "V:nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents" );

    // If no numbers of events are given, half of the events in the tree are used 
    // for training, and the other half for testing:
    //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=Random:!V" );

    // --- Book MVA methods
    //
    // Please lookup the various method configuration options in the corresponding cxx files, eg:
    // src/MethodCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    // it is possible to preset ranges in the option string in which the cut optimisation should be done:
    // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    // PDE - RS method
    if (Use["PDERS"])
        factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                             "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" );
    // And the options strings for the MinMax and RMS methods, respectively:
    //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );   
    //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );   

    if (Use["PDEFoam"])
        factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                             "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" );

    // K-Nearest Neighbour classifier (KNN)
    if (Use["KNN"])
        factory->BookMethod( TMVA::Types::kKNN, "KNN",
                             "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

    // Linear discriminant
    if (Use["LD"])
        factory->BookMethod( TMVA::Types::kLD, "LD", 
                             "!H:!V:VarTransform=None" );

    // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if (Use["FDA_MC"])
        factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                             "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" );

    if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas
        factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                             "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" );

    if (Use["FDA_MT"])
        factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                             "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

    if (Use["FDA_GAMT"])
        factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                             "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

    // Neural network (MLP)
    if (Use["MLP"])
        factory->BookMethod( TMVA::Types::kMLP, "MLP", 
                             "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" );

    // Support Vector Machine
    if (Use["SVM"])
        factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

    // Boosted Decision Trees
    if (Use["BDT"])
        factory->BookMethod( TMVA::Types::kBDT, "BDT",
                             "!H:V:NTrees=100:nEventsMin=30:NodePurityLimit=0.5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" );
//"!H:V:NTrees=60:nEventsMin=20:NodePurityLimit=0.5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30:DoBoostMonitor" );

    if (Use["BDT1"])
        factory->BookMethod( TMVA::Types::kBDT, "BDT1",
                             "!H:V:NTrees=100:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" );

    if (Use["BDTG"])
        factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                             "!H:V:NTrees=2000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.7:nCuts=200:MaxDepth=3:NNodesMax=15" );

    if (Use["BDTG1"])
        factory->BookMethod( TMVA::Types::kBDT, "BDTG1",
                             "!H:V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:MaxDepth=3:NNodesMax=15" );

    //--------------------------------------------------------------------------
    // Train MVAs using the set of training events
    factory->TrainAllMethods();

    // --- Evaluate all MVAs using the set of test events
    factory->TestAllMethods();

    // --- Evaluate and compare performance of all configured MVAs
    factory->EvaluateAllMethods();

    //--------------------------------------------------------------------------
    // Save the output
    outputFile->Close();

    std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
    std::cout << "==> TMVARegression is done!" << std::endl;

    for (UInt_t i=0; i<files.size(); i++)
        files.at(i)->Close();

    delete outputFile;
    delete factory;

    // Launch the GUI for the root macros
    //gROOT->SetMacroPath( "$ROOTSYS/tmva/macros/" );
    //gROOT->Macro( "$ROOTSYS/tmva/macros/TMVAlogon.C" );
    //gROOT->LoadMacro( "$ROOTSYS/tmva/macros/TMVAGui.C" );
    //if (!gROOT->IsBatch()) TMVARegGui( outfileName );
}
Beispiel #6
0
void TMVARegression( int optimIndex, int Cat=0, TString myMethodList = "" ) 
{

   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the 
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVARegression.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   // --- Mutidimensional likelihood and Nearest-Neighbour methods
   Use["PDERS"]           = 0;
   Use["PDEFoam"]         = 0; 
   Use["KNN"]             = 0;
   // 
   // --- Linear Discriminant Analysis
   Use["LD"]		        = 0;
   // 
   // --- Function Discriminant analysis
   Use["FDA_GA"]          = 0;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   // 
   // --- Neural Network
   Use["MLP"]             = 0; 
   // 
   // --- Support Vector Machine 
   Use["SVM"]             = 0;
   // 
   // --- Boosted Decision Trees
   Use["BDT"]             = 0;
   Use["BDTG"]            = 1;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVARegression" << std::endl;

   // Select methods (don't look at this code - not of interest)
   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 1;
      }
   }

   // --------------------------------------------------------------------------------------------------

   // --- Here the preparation phase begins

   // Create a new root output file
   TString outfileName( Form("TMVAoutput/TMVAReg_%i.root",optimIndex) );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory will
   // then run the performance analysis for you.
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/ 
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in 
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( Form("TMVARegression_%i_Cat%i",optimIndex,Cat), outputFile, 
                                               "!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=Regression" );

   // If you wish to modify default settings 
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]

   factory->AddVariable( "jet_eta", "jet_eta", "units", 'F' );
   factory->AddVariable( "jet_emfrac", "jet_emfrac", "units", 'F' );
   factory->AddVariable( "jet_hadfrac", "jet_hadfrac", "units", 'F' );
   factory->AddVariable( "jet_nconstituents", "jet_nconst", "units", 'F' );
   factory->AddVariable( "jet_vtx3dL", "jet_vtx3dL", "units", 'F' );
   factory->AddVariable( "MET", "MET", "units", 'F' );
   factory->AddVariable( "jet_dPhiMETJet", "jet_dPhiMETJet", "units", 'F' );
   
   //factory->AddVariable( "hJet_vtxPt", "hJet_vtxPt", "units", 'F' );
   //factory->AddVariable( "hJet_JECUnc", "hJet_JECUnc", "units", 'F' );
   //factory->AddVariable( "hJet_ptLeadTrack", "hJet_ptLeadTrack", "units", 'F' );
   //factory->AddVariable( "hJet_SoftLeptPtCut:=(hJet_SoftLeptIdlooseMu>0. || hJet_SoftLeptId95>0.) ? (hJet_SoftLeptPt) : (-99)", "hJet_SoftLeptPt", "units", 'F' );
   //factory->AddVariable( "hJet_En", "hJet_En", "units", 'F' );
   //factory->AddVariable( "hJet_Et", "hJet_Et", "units", 'F' );
   //factory->AddVariable( "hJet_Mt", "hJet_Mt", "units", 'F' );
   //factory->AddVariable( "hJet_nch", "hJet_nch", "units", 'F' );
   //factory->AddVariable( "hJet_vtx3deL", "hJet_vtx3deL", "units", 'F' );
   //factory->AddVariable( "hJet_vtxMass", "hJet_vtxMass", "units", 'F' );
   //factory->AddVariable( "hJet_ptRaw", "hJet_ptRaw", "units", 'F' );
   //factory->AddVariable( "hJet_EnRaw", "hJet_EnRaw", "units", 'F' );
   //factory->AddVariable( "hJet_SoftLeptptRelCut:=(hJet_SoftLeptIdlooseMu>0. || hJet_SoftLeptId95>0.) ? (hJet_SoftLeptptRel) : (-99)", "hJet_SoftLeptptRel", "units", 'F' );
   //factory->AddVariable( "hJet_SoftLeptdRCut:=(hJet_SoftLeptIdlooseMu>0. || hJet_SoftLeptId95>0.) ? (hJet_SoftLeptdR) : (-99)", "hJet_SoftLeptdR", "units", 'F' );
   //factory->AddVariable( "rho25", "rho25", "units", 'F' );
   //factory->AddVariable( "dPhiMETJet", "dPhiMETJet", "units", 'F' );


   // You can add so-called "Spectator variables", which are not used in the MVA training, 
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
   // input variables, the response values of all trained MVAs, and the spectator variables

   // Add the variable carrying the regression target
   //factory->AddTarget( "jet_genPt" ); 
   factory->AddTarget( "jet_genJetPt/jet_pt" ); 

   // It is also possible to declare additional targets for multi-dimensional regression, ie:
   // -- factory->AddTarget( "fvalue2" );
   // BUT: this is currently ONLY implemented for MLP

   // Read training and test data (see TMVAClassification for reading ASCII files)
   // load the signal and background event samples from ROOT trees
/*   TFile *input(0);
   TString fname = "./tmva_reg_example.root";
   if (!gSystem->AccessPathName( fname )) 
      input = TFile::Open( fname ); // check if file in local directory exists
   else 
      input = TFile::Open( "http://root.cern.ch/files/tmva_reg_example.root" ); // if not: download from ROOT server
   
   if (!input) {
      std::cout << "ERROR: could not open data file" << std::endl;
      exit(1);
   }
   std::cout << "--- TMVARegression           : Using input file: " << input->GetName() << std::endl;

   // --- Register the regression tree

   TTree *regTree = (TTree*)input->Get("TreeR");
*/

   TChain chainTraining("Events");
   chainTraining.Add("TrainingFiles/training.root");
   TTree *regTreeTraining = (TTree*) chainTraining;

   TChain chainTesting("Events");
   chainTesting.Add("TrainingFiles/testing.root");
   TTree *regTreeTesting = (TTree*) chainTesting;

   // global event weights per tree (see below for setting event-wise weights)
   Double_t regWeight  = 1.0;   

   // You can add an arbitrary number of regression trees
   factory->AddTree( regTreeTraining, "Regression", regWeight, "", "training" );
   factory->AddTree( regTreeTesting, "Regression", regWeight, "", "test" );

   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycut = "hJet_pt[0]>20. && hJet_pt[1]>20. && fabs(hJet_eta[0])<2.5 && fabs(hJet_eta[1])<2.5 && hJet_csv[0]>0. && hJet_csv[1]>0. && hJet_ptLeadTrack[0]<1500. && hJet_ptLeadTrack[1]<1500. && hJet_genJetPt[0]>0. && hJet_genJetPt[1]>0. && hJet_puJetIdL[0]>0.0 && hJet_puJetIdL[1]>0.0";
  TCut testingCut = "hJet_pt[0]>20. && hJet_pt[1]>20. && abs(hJet_eta[0])<2.5 && abs(hJet_eta[1])<2.5";
  TCut mjjCut = "sqrt(pow(hJet_pt[0]*cos(hJet_phi[0])+hJet_pt[1]*cos(hJet_phi[1]),2)+pow(hJet_pt[0]*sin(hJet_phi[0])+hJet_pt[1]*sin(hJet_phi[1]),2)) < 110";

  if(Cat==1) mjjCut = "sqrt(pow(hJet_pt[0]*cos(hJet_phi[0])+hJet_pt[1]*cos(hJet_phi[1]),2)+pow(hJet_pt[0]*sin(hJet_phi[0])+hJet_pt[1]*sin(hJet_phi[1]),2)) > 110";
  TCut jetPtCut="jet_pt>90";
  if(Cat==1) jetPtCut="jet_pt>90";
  //TCut trainingCut = "hJet_pt[0]>20. && hJet_pt[1]>20. && abs(hJet_eta[0])<2.5 && abs(hJet_eta[1])<2.5 && hJet_genJetPt[0]>0. && hJet_genJetPt[1]>0. && hJet_csv[0]>0.0 && hJet_csv[1]>0.0 && hJet_ptLeadTrack[0]<1500. && hJet_ptLeadTrack[1]<1500.";
  TCut trainingCut = "jet_pt>20. && abs(jet_eta)<2.5 && jet_genJetPt>0. && jet_dRJetGenJet < 0.4 && (jet_partonID)==5";

// for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1";
// for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1";

   // tell the factory to use all remaining events in the trees after training for testing:
   // factory->PrepareTrainingAndTestTree(mycut, "nTrain_Regression=600000:nTest_Regression=600000:SplitMode=Random:NormMode=NumEvents:!V");
   //factory->PrepareTrainingAndTestTree(mycut, "nTrain_Regression=337500:nTest_Regression=337500:SplitMode=Random:NormMode=NumEvents:!V");
   //   factory->PrepareTrainingAndTestTree(mycut, "nTrain_Regression=158393:nTest_Regression=158393:SplitMode=Random:NormMode=NumEvents:!V");
   //factory->PrepareTrainingAndTestTree(mycut, "nTrain_Regression=14197:nTest_Regression=14197:SplitMode=Random:NormMode=NumEvents:!V");
   factory->PrepareTrainingAndTestTree(trainingCut+jetPtCut, "!V");

   // If no numbers of events are given, half of the events in the tree are used 
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );  

   // ---- Book MVA methods
   //
   // please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // PDE - RS method
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS", 
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" );
   // And the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );   
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );   

   if (Use["PDEFoam"])
       factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", 
			    "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN", 
                           "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // Linear discriminant
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", 
                           "!H:!V:VarTransform=None" );

	// Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"]) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                          "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" );
   
   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" );

   if (Use["FDA_MT"]) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"]) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   // Neural network (MLP)
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" );

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );
//100, 5
   // Boosted Decision Tree   //100,5 nCuts=-1
   if (Use["BDT"])
     factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=100:nEventsMin=4:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=-1:PruneMethod=CostComplexity:PruneStrength=30" );

   const bool doScan1=0;

   int ntreesArray[8] = {100,200,300,400,500,600,700,800};
   float shrinkageArray[3] = {0.1,0.2,0.3};
   float gradbaggingfracArray[3] = {0.7,0.8,0.9};
   int maxdepthArray[3] = {2,3,4};
   int nnodesmaxArray[3] = {5,10,15};
   if(!doScan1) shrinkageArray[2]=1.0;
   int nnodesmaxIndex=-1,maxdepthIndex=-1,gradbaggingfracIndex=-1,shrinkageIndex=-1,ntreesIndex=-1;

   if(doScan1){
     nnodesmaxIndex = optimIndex/216;
     maxdepthIndex = (optimIndex-nnodesmaxIndex*216)/72;
     gradbaggingfracIndex = (optimIndex-nnodesmaxIndex*216-maxdepthIndex*72)/24;
     shrinkageIndex = (optimIndex-nnodesmaxIndex*216-maxdepthIndex*72-gradbaggingfracIndex*24)/8;
     ntreesIndex = (optimIndex-nnodesmaxIndex*216-maxdepthIndex*72-gradbaggingfracIndex*24-shrinkageIndex*8);
   }
   else{
     nnodesmaxIndex = optimIndex/72;
     maxdepthIndex = (optimIndex-nnodesmaxIndex*72)/24;
     shrinkageIndex = (optimIndex-nnodesmaxIndex*72-maxdepthIndex*24)/8;
     ntreesIndex = (optimIndex-nnodesmaxIndex*72-maxdepthIndex*24-shrinkageIndex*8);
   }

   if (Use["BDTG"]){
     if(doScan1)
       factory->BookMethod( TMVA::Types::kBDT, "BDTG", 
			    Form("!H:!V:NTrees=%i::BoostType=Grad:Shrinkage=%.1f:UseBaggedGrad:GradBaggingFraction=%.1f:nCuts=200:MaxDepth=%i:NNodesMax=%i",ntreesArray[ntreesIndex],shrinkageArray[shrinkageIndex],gradbaggingfracArray[gradbaggingfracIndex],maxdepthArray[maxdepthIndex],nnodesmaxArray[nnodesmaxIndex]) );
     else
       factory->BookMethod( TMVA::Types::kBDT, "BDTG", 
			    Form("!H:!V:IgnoreNegWeights:NTrees=%i::BoostType=Grad:Shrinkage=%.1f:nCuts=200:MaxDepth=%i:NNodesMax=%i",ntreesArray[ntreesIndex],shrinkageArray[shrinkageIndex],maxdepthArray[maxdepthIndex],nnodesmaxArray[nnodesmaxIndex]) );
   }

   if(doScan1)
     cout << Form("!H:!V:NTrees=%i::BoostType=Grad:Shrinkage=%.2f:UseBaggedGrad:GradBaggingFraction=%.2f:nCuts=200:MaxDepth=%i:NNodesMax=%i",ntreesArray[ntreesIndex],shrinkageArray[shrinkageIndex],gradbaggingfracArray[gradbaggingfracIndex],maxdepthArray[maxdepthIndex],nnodesmaxArray[nnodesmaxIndex])<<endl;
   else
     cout << Form("!H:!V:IgnoreNegWeights:NTrees=%i::BoostType=Grad:Shrinkage=%.1f:nCuts=200:MaxDepth=%i:NNodesMax=%i",ntreesArray[ntreesIndex],shrinkageArray[shrinkageIndex],maxdepthArray[maxdepthIndex],nnodesmaxArray[nnodesmaxIndex]) << endl;

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();    

   // --------------------------------------------------------------
   
   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVARegression is done!" << std::endl;      

   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVARegGui( outfileName );
}
void TMVARegression( TString myMethodList = "" ) 
{
   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the 
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVARegression.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //

   // this loads the library
   TMVA::Tools::Instance();

   //---------------------------------------------------------------
   // default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   Use["PDERS"]           = 0;
   Use["PDERSkNN"]        = 0; // depreciated until further notice
   Use["PDEFoam"]         = 0; // preparation for new TMVA version "reader". This method is not available in this version of TMVA
   // ---
   Use["KNN"]             = 0;
   // ---
   Use["LD"]		  = 0;
   // ---
   Use["FDA_GA"]          = 0;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   // ---
   Use["MLP"]             = 0; 
   // ---
   Use["SVM"]             = 0;
   // ---
   Use["BDT"]             = 0;
   Use["BDTG"]            = 1;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVARegression" << std::endl;

   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 1;
      }
   }

   // Create a new root output file
   TString outfileName( "TMVAReg_TarggetAPDPN.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory will
   // then run the performance analysis for you.
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/ 
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in 
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVARegression", outputFile, 
                                               "!V:!Silent:Color:DrawProgressBar" );

   // If you wish to modify default settings 
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
//    factory->AddVariable( "laser.qmax", "Qmax", "ADC", 'D' );
//    factory->AddVariable( "laser.tmax", "T_{0}", "ns", 'D' );
//    factory->AddVariable( "laser.fwhm", "FWHM","ns", 'D');
//    factory->AddVariable( "laser.prepulse","Prepulse indicator","A.U.", 'D' );
//    factory->AddVariable( "laser.w10","Width at 10%","ns", 'D' );

   factory->AddVariable( "qmax[0]", "Qmax", "ADC", 'D' );
   factory->AddVariable( "tmax[0]", "T_{0}", "ns", 'D' );
   factory->AddVariable( "l_fwhm[0]", "FWHM","ns", 'D');
   factory->AddVariable( "l_prepulse[0]","Prepulse indicator","A.U.", 'D' );
   factory->AddVariable( "l_width90[0]","Width at 10%","ns", 'D' );

   // You can add so-called "Spectator variables", which are not used in the MVA training, 
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
   // input variables, the response values of all trained MVAs, and the spectator variables
   //   factory->AddSpectator( "spec1:=var1*2",  "Spectator 1", "units", 'F' );
   //   factory->AddSpectator( "spec2:=timeStamp",  "Spectator 1", "units", 'F' );

   // Add the variable carrying the regression target
   factory->AddTarget  ( "apdpnAB[0]" ); 

   // It is also possible to declare additional targets for multi-dimensional regression, ie:
   // -- factory->AddTarget( "fvalue2" );
   // BUT: this is currently ONLY implemented for MLP

   // read training and test data (see TMVAClassification for reading ASCII files)
   // load the signal and background event samples from ROOT trees
//    TFile *input(0);
//    TString fname = "./stabTreeFed630.root";
//    if (!gSystem->AccessPathName( fname )) {
//       input = TFile::Open( fname ); // check if file in local directory exists
//    } 

//    if (!input) {
//       std::cout << "ERROR: could not open data file" << std::endl;
//       exit(1);
//    }
//    std::cout << "--- TMVARegression           : Using input file: " << input->GetName() << std::endl;


   TChain * ntu = new TChain("x");
   ntu->Add("/data2/EcalLaserMonitoringData/ntuples_2011_158851_178888/ntu_data_0015*.root");
   ntu->Add("/data2/EcalLaserMonitoringData/ntuples_2011_158851_178888/ntu_data_0016*.root");

   //TTree *regTree = (TTree*)input->Get("ntu");

   // global event weights per tree (see below for setting event-wise weights)
   Double_t regWeight  = 1.0;   

   // ====== register trees ====================================================
   //
   // the following method is the prefered one:
   // you can add an arbitrary number of regression trees
   //factory->AddRegressionTree( regTree, regWeight );
   factory->AddRegressionTree( ntu, regWeight );

   // Alternative call:
   // -- factory->AddRegressionTree( regTree, regWeight );
   
   // This would set individual event weights (the variables defined in the 
   // expression need to exist in the original TTree)
   //   factory->SetWeightExpression( "fwhm-26", "Regression" );

   // Apply additional cuts on the signal and background samples (can be different)
   //   TCut mycut = "abs(0.5*(apd0+apd1)-1)<0.015 && timeStamp<1299.8e6"; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1";
   cout << "Defining cuts..." << endl;
   TCut mycut = "fed[0] == 605 && harness == 6 && field > 0.1 && run < 161200 && apdpnAB[0] > 0"; 

   // Count selected events and use them all
   TCanvas cdummy("cdummy","cdummy"); cdummy.cd(); 
   Int_t selectedEvts = ntu->Draw("l_fwhm[0]",mycut,"N");
   cdummy.Delete(); 
   cout << selectedEvts << endl;
   char trainPara[132];
   sprintf(trainPara,"nTrain_Regression=%d:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents:!V",selectedEvts-10);

   // tell the factory to use all remaining events in the trees after training for testing:
   factory->PrepareTrainingAndTestTree( mycut, trainPara ); 

   // If no numbers of events are given, half of the events in the tree are used for training, and 
   // the other half for testing:
   //   factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );  
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut, 
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );  

   // ---- Book MVA methods
   //
   // please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   cout << "Booking MVA methods" << endl;

   // PDE - RS method
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS", 
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" );
   // And the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );   
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );   

   if (Use["PDERSkNN"]) // depreciated until further notice
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSkNN", 
                           "!H:!V:VolumeRangeMode=kNN:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDEFoam"])
       factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", 
			    "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:CutNmin=T:Nmin=10:VarTransform=None" );


   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN", 
                           "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // Linear discriminant
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", 
                           "!H:!V:VarTransform=N" );

	// Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"]) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                          "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" );
   
   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" );

   if (Use["FDA_MT"]) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"]) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   // Neural network (MLP)
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=5000:HiddenLayers=N+5,N+2:TestRate=6:TrainingMethod=BP:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15" );

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDT"])
     factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=100:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" );

   if (Use["BDTG"])
     factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                           "!H:!V:NTrees=200::BoostType=Grad:Shrinkage=1.0:UseBaggedGrad:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" );
   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();    

   // --------------------------------------------------------------
   
   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVARegression is done!" << std::endl;      

   delete factory;

   // Launch the GUI for the root macros
   //if (!gROOT->IsBatch()) TMVARegGui( outfileName );
}
Beispiel #8
0
TString autoencoder (std::string inputFileName) 
{

    std::string tmstr (now ());
    TString tmstmp (tmstr.c_str ());
   
  
    std::cout << "==> Start Autoencoder " << std::endl;
    std::cout << "-------------------- open input file ---------------- " << std::endl;
    TString fname = pathToData + TString (inputFileName.c_str ()) + TString (".root");
    TFile *input = TFile::Open( fname );

    std::cout << "-------------------- get tree ---------------- " << std::endl;
    TTree *tree     = (TTree*)input->Get("data");
   
    TString outfileName( "TMVAAutoEnc__" );
    outfileName += TString (inputFileName.c_str ()) + TString ("__") + tmstmp + TString (".root");

    std::cout << "-------------------- open output file ---------------- " << std::endl;
    TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

    std::cout << "-------------------- prepare factory ---------------- " << std::endl;
    TMVA::Factory *factory = new TMVA::Factory( "TMVAAutoencoder", outputFile,
						"AnalysisType=Regression:Color:DrawProgressBar" );
    std::cout << "-------------------- add variables ---------------- " << std::endl;


    for (auto varname : variableNames+additionalVariableNames)
    {
	factory->AddVariable (varname.c_str (), 'F');
	factory->AddTarget   (varname.c_str (), 'F');
    }



    std::cout << "-------------------- add tree ---------------- " << std::endl;
    // global event weights per tree (see below for setting event-wise weights)
    Double_t regWeight  = 1.0;   
    factory->AddRegressionTree (tree, regWeight);

   
    std::cout << "-------------------- prepare ---------------- " << std::endl;
    TCut mycut = ""; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1";
    factory->PrepareTrainingAndTestTree( mycut, 
					 "nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents:!V" );


    /* // This would set individual event weights (the variables defined in the  */
    /* // expression need to exist in the original TTree) */
    /* factory->SetWeightExpression( "var1", "Regression" ); */


    if (true)
    {
	TString layoutString ("Layout=TANH|100,TANH|20,TANH|40,LINEAR");

	TString training0 ("LearningRate=1e-5,Momentum=0.5,Repetitions=1,ConvergenceSteps=500,BatchSize=50,TestRepetitions=7,WeightDecay=0.01,Regularization=NONE,DropConfig=0.5+0.5+0.5+0.5,DropRepetitions=2");
	TString training1 ("LearningRate=1e-5,Momentum=0.9,Repetitions=1,ConvergenceSteps=500,BatchSize=30,TestRepetitions=7,WeightDecay=0.01,Regularization=L2,DropConfig=0.1+0.1+0.1,DropRepetitions=1");
	TString training2 ("LearningRate=1e-4,Momentum=0.3,Repetitions=1,ConvergenceSteps=10,BatchSize=40,TestRepetitions=7,WeightDecay=0.1,Regularization=L2");
	TString training3 ("LearningRate=1e-5,Momentum=0.1,Repetitions=1,ConvergenceSteps=10,BatchSize=10,TestRepetitions=7,WeightDecay=0.001,Regularization=NONE");

	TString trainingStrategyString ("TrainingStrategy=");
	trainingStrategyString += training0 + "|" + training1 + "|" + training2 ; //+ "|" + training3;

       
	//       TString trainingStrategyString ("TrainingStrategy=LearningRate=1e-1,Momentum=0.3,Repetitions=3,ConvergenceSteps=20,BatchSize=30,TestRepetitions=7,WeightDecay=0.0,L1=false,DropFraction=0.0,DropRepetitions=5");

	TString nnOptions ("!H:V:ErrorStrategy=SUMOFSQUARES:VarTransform=N:WeightInitialization=XAVIERUNIFORM");
	//       TString nnOptions ("!H:V:VarTransform=Normalize:ErrorStrategy=CHECKGRADIENTS");
	nnOptions.Append (":"); nnOptions.Append (layoutString);
	nnOptions.Append (":"); nnOptions.Append (trainingStrategyString);

	factory->BookMethod( TMVA::Types::kNN, TString("NN_")+tmstmp, nnOptions ); // NN
    }


   
    // --------------------------------------------------------------------------------------------------
    factory->TrainAllMethods();
    factory->TestAllMethods();
    factory->EvaluateAllMethods();

    outputFile->Close();

//    TMVA::TMVARegGui (outfileName);
   
    delete factory;
    return TString("NN_")+tmstmp;
}