예제 #1
0
파일: mva2.C 프로젝트: NTrevisani/WW13TeV
void test_train(TString signalName = "WW",
		TString bkgName = "DY")
{
  TFile *outFile = new TFile("myAnalysisFile.root","RECREATE");
  
  TMVA::Factory *factory = new TMVA::Factory(signalName, outFile,"");
  
  TString directory = "../rootFiles/SF/MediumIDTighterIP/";
  //signalName = directory + signalName;
  
  //defining WW signal
  TFile *MySignalFile = new TFile("../rootFiles/SF/MediumIDTighterIP/WW.root","READ");
  TTree* sigTree = (TTree*)MySignalFile->Get("nt");
  factory->AddSignalTree(sigTree,1);
  
  //defining DY background
  TFile *MyBkgFile = new TFile("../rootFiles/SF/MediumIDTighterIP/DY.root","READ");
  TTree* bkgTree = (TTree*)MyBkgFile->Get("nt");
  factory->AddBackgroundTree(bkgTree,1);

  factory->SetWeightExpression("baseW");

  //************************************ FACTORY  
  
  factory->AddVariable("fullpmet");
  factory->AddVariable("trkpmet");
  factory->AddVariable("ratioMet");
  factory->AddVariable("ptll");
  factory->AddVariable("mth");
  factory->AddVariable("jetpt1");
  factory->AddVariable("ptWW");
  factory->AddVariable("dphilljet");
  factory->AddVariable("dphillmet");
  factory->AddVariable("dphijet1met");
  factory->AddVariable("nvtx");

  factory->PrepareTrainingAndTestTree("",500,500,500,500);
  cout<<"I've prepared trees"<<endl;
  //factory->BookMethod(TMVA::Types::kFisher, "Fisher","");
  factory->BookMethod(TMVA::Types::kBDT, "BDT","");
  
  cout<<"I've booked method"<<endl;
  factory->TrainAllMethods();
  factory->TestAllMethods();
  cout<<"I've tested all methods"<<endl;
  factory->EvaluateAllMethods();
  cout<<"I've evaluated all methods"<<endl;
  
}
예제 #2
0
void trainBDT(void)
{
	// Open input file and get tree
	TFile *infile = new TFile("l3bdt.root");
	TTree *l3tree = (TTree*)infile->Get("l3tree");
	if(l3tree == NULL){
		cout << "Couldn't open \"l3bdt.root\"!" << endl;
		return;
	}

	// Open output root file (for TMVA)
	TFile *outfile = new TFile("l3BDT_out.root", "RECREATE");
	TMVA::Factory *fac = new TMVA::Factory("L3",outfile,"");

	// Specify input tree that contains both signal and background 
	TCut signalCut("is_good==1");
	TCut backgroundCut("is_good==0");
	fac->SetInputTrees(l3tree, signalCut, backgroundCut);

	// Add variables
	fac->AddVariable("Nstart_counter",      'I');
	fac->AddVariable("Ntof",                'I');
	fac->AddVariable("Nbcal_points",        'I');
	fac->AddVariable("Nbcal_clusters",      'I');
	fac->AddVariable("Ebcal_points",        'F');
	fac->AddVariable("Ebcal_clusters",      'F');
	fac->AddVariable("Nfcal_clusters",      'I');
	fac->AddVariable("Efcal_clusters",      'F');
	fac->AddVariable("Ntrack_candidates",   'I');
	fac->AddVariable("Ptot_candidates",     'F');

	TCut preSelectCut("");
	fac->PrepareTrainingAndTestTree(preSelectCut,"");
	fac->BookMethod(TMVA::Types::kBDT, "BDT", "");

	fac->TrainAllMethods();
	fac->TestAllMethods();
	fac->EvaluateAllMethods();

	delete fac;

	outfile->Close();
	delete outfile;
}
예제 #3
0
void trainBJetIdMVA(TString SELECTION)
{
  // the training is done using a dedicated tree format
  TFile *src = TFile::Open("bjetId_"+SELECTION+".root");
  TTree *tr  = (TTree*)src->Get("jets"); 
  
  TFile *outf    = new TFile("bjetId_"+SELECTION+"_MVA.root","RECREATE");

  TCut signalCut       = "abs(partonId) == 5";
  TCut bkgCut          = "abs(partonId) != 5";
  TCut preselectionCut = "btagIdx<4 && etaIdx<4 && etaIdx>-1 && ptIdx<4";
  
  int N = 100000;
  cout<<"NUMBER OF TRAINING EVENTS = "<<N<<endl;
  
  TMVA::Factory* factory = new TMVA::Factory("factory_"+SELECTION+"_",outf,"!V:!Silent:Color:DrawProgressBar:Transformations=I;G:AnalysisType=Classification" );
  
  factory->SetInputTrees(tr,signalCut,bkgCut);
  
  factory->AddVariable("btagIdx",'I');
  factory->AddVariable("etaIdx" ,'I');
  factory->AddVariable("btag"   ,'F');
  factory->AddVariable("eta"    ,'F');

  char name[1000];
  sprintf(name,"nTrain_Signal=%d:nTrain_Background=%d:nTest_Signal=%d:nTest_Background=%d",N,N,N,N);
  factory->PrepareTrainingAndTestTree(preselectionCut,name);

  // specify the training methods
  factory->BookMethod(TMVA::Types::kLikelihood,"Likelihood");
  //factory->BookMethod(TMVA::Types::kBDT,"BDT_DEF");
  //factory->BookMethod(TMVA::Types::kBDT,"BDT_ADA","NTrees=600:AdaBoostBeta=0.1:nCuts=35"); 
  //factory->BookMethod(TMVA::Types::kBDT,"BDT_GRAD1","NTrees=600:nCuts=40:BoostType=Grad:Shrinkage=0.5");  
  factory->BookMethod(TMVA::Types::kBDT,"BDT_GRAD2","NTrees=600:nCuts=25:BoostType=Grad:Shrinkage=0.2");
  factory->TrainAllMethods();
  factory->TestAllMethods();
  factory->EvaluateAllMethods(); 
}
예제 #4
0
파일: TMVAtest.C 프로젝트: aperloff/TAMUWW
void TMVAtest(){
  //gSystem->Load("../lib/slc5_amd64_gcc462/libTAMUWWMEPATNtuple.so");
  gSystem->Load("libPhysics");
  //gSystem->Load("EvtTreeForAlexx_h.so");
  gSystem->Load("libTMVA.1");
  gSystem->Load("AutoDict_vector_TLorentzVector__cxx.so");
  TMVA::Tools::Instance();
  TFile* outputFile = TFile::Open("TMVA1.root", "RECREATE");
  TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification",outputFile,"V=true:Color:DrawProgressBar");// ":Transformations=I;D;P;G,D" );
  TFile* signal = TFile::Open("/uscms_data/d2/aperloff/Spring12ME7TeV/MEResults/microNtuples_oldStructure/microWW_EPDv01.root");
  TFile* bkg = TFile::Open("/uscms_data/d2/aperloff/Spring12ME7TeV/MEResults/microNtuples_oldStructure/microWJets_EPDv01.root");

  TTree* stree = (TTree*)signal->Get("METree");
  TTree* btree = (TTree*)bkg->Get("METree");
  factory->AddSignalTree(stree,1.0);
  factory->AddBackgroundTree(btree,1.0);


  factory->SetSignalWeightExpression("1.0");
  factory->SetBackgroundWeightExpression("1.0");
  factory->AddVariable("tEventProb[0]");
  factory->AddVariable("tEventProb[1]");
  factory->AddVariable("tEventProb[2]");

  //factory->AddVariable("tEventProb0 := tEventProb[0]",'F');
  //factory->AddVariable("tEventProb1 := tEventProb[1]",'F');
  //factory->AddVariable("tEventProb2 := tEventProb[2]",'F');
  TCut test("Entry$>-2 && jLV[1].Pt()>30");
  TCut mycuts (test);
  factory->PrepareTrainingAndTestTree(mycuts,mycuts,"nTrain_Signal=0:nTrain_Background=0:nTest_Signal=0:nTest_Background=0:SplitMode=Random:NormMode=None:V=true:VerboseLevel=DEBUG");
  factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );
  factory->TrainAllMethods();
  factory->TestAllMethods();
  factory->EvaluateAllMethods();
  outputFile->Close(); 

}
예제 #5
0
int main(int argc, char**argv){
 
 if(argc != 2){
 std::cerr << " >>>>> analysis.cpp::usage: " << argv[0] << " configFileName" << std::endl ;
 return 1;
 }

 parseConfigFile (argv[1]) ;

 // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
 // if you use your private .rootrc, or run from a different directory, please copy the 
 // corresponding lines from .rootrc
 // methods to be processed can be given as an argument; use format:
 //
 // mylinux~> root -l TMVARegression.C\(\"myMethod1,myMethod2,myMethod3\"\)
 //
 //---------------------------------------------------------------
 // This loads the library
 TMVA::Tools::Instance();

 std::vector<std::string> UseMethodName;
 UseMethodName = gConfigParser -> readStringListOption("Input::UseMethodName");

 std::cout << " >>>>> Input::UseMethodName size = " << UseMethodName.size() << std::endl;  
 std::cout << " >>>>> >>>>>  "; 
 for (unsigned int iCat = 0; iCat < UseMethodName.size(); iCat++){
  std::cout << " " << UseMethodName.at(iCat) << ", ";
 }
 std::cout << std::endl; 

 
 
 // ---------------------------------------------------------------

 std::cout << std::endl;
 std::cout << "==> Start TMVARegression" << std::endl;

 std::map<std::string,int> Use;

 for(std::vector<std::string>::iterator it=UseMethodName.begin(); it!=UseMethodName.end(); ++it) Use[*it]=0;

 std::string UseMethodFlag;
 try{ UseMethodFlag = gConfigParser -> readStringOption("Input::UseMethodFlag");
      std::cout<< UseMethodFlag<<std::endl;
      std::vector<TString> mlist = gTools().SplitString( UseMethodFlag, '/' );
      for (UInt_t i=0; i<mlist.size(); i++) {
     
      std::string regMethod(mlist[i]);
      if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return -1;
         }
         Use[regMethod] = 1;
         std::cout << "regMethod= " << regMethod<< " 1 "<<std::endl;

      }
     }
 catch (char * exception){
   
   std::cerr << " exception =  Use All method " << std::endl;
   for(std::vector<std::string>::iterator it=UseMethodName.begin() ;it!=UseMethodName.end(); it++) Use[*it]=1;

   }
 

 // --------------------------------------------------------------------------------------------------
 // --- Here the preparation phase begins
 // Create a new root output file
 
 std::string outputFileName =  gConfigParser -> readStringOption("Output::outputFileName");
 std::cout<<" Output Data File = "<<outputFileName<<std::endl;
 
 TFile* outputFile = TFile::Open( outputFileName.c_str(), "RECREATE" );

 // Create the factory object. Later you can choose the methods
 // whose performance you'd like to investigate. The factory will
 // then run the performance analysis for you.
 // The first argument is the base of the name of all the
 // weightfiles in the directory weight/ 
 // All TMVA output can be suppressed by removing the "!" (not) in 
 // front of the "Silent" argument in the option string

 // Read training and test data (see TMVAClassification for reading ASCII files)
 // load the signal and background event samples from ROOT trees
 std::string inputFileList =  gConfigParser -> readStringOption("Input::inputFileList");
 std::string treeNameDATA =  gConfigParser -> readStringOption("Input::treeNameDATA");

 std::cout<<" Input Data List = "<<inputFileList<<std::endl;

 TChain* treeDATA = new TChain(treeNameDATA.c_str());

 FillChain(*treeDATA,inputFileList.c_str());


 TMVA::Factory *factory = new TMVA::Factory( "TMVARegression", outputFile, 
                                               "!V:!Silent:Color:DrawProgressBar" );

 // If you wish to modify default settings 
 // (please check "src/Config.h" to see all available global options)
 //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
 //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";
 // Define the input variables that shall be used for the MVA training
 // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
 // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
 std::string RegionOfTraining =  gConfigParser -> readStringOption("Input::RegionOfTraining");
 std::cout<<" RegionOfTraining = "<<RegionOfTraining<<std::endl;
 
 if(RegionOfTraining=="EB"){


 factory->AddVariable( "ele1_scE/ele1_scERaw" , 'F');   
 factory->AddVariable( "ele1_eRegrInput_nPV" , 'F');
 factory->AddVariable( "ele1_eRegrInput_r9" , 'F');
 factory->AddVariable( "ele1_fbrem" , 'F');
 factory->AddVariable( "ele1_eta" , 'F');
 factory->AddVariable( "ele1_DphiIn" , 'F');
 factory->AddVariable( "ele1_DetaIn" , 'F');
 factory->AddVariable( "ele1_sigmaIetaIeta" , 'F');

 factory->AddVariable( "ele1_eRegrInput_etaW" , 'F');
 factory->AddVariable( "ele1_eRegrInput_phiW" , 'F');

 factory->AddVariable( "ele1_eRegrInput_bCE_Over_sCE", 'F');
 factory->AddVariable( "ele1_eRegrInput_sigietaieta_bC1" , 'F');
 factory->AddVariable( "ele1_eRegrInput_sigiphiiphi_bC1" , 'F');
 factory->AddVariable( "ele1_eRegrInput_sigietaiphi_bC1" , 'F');
 factory->AddVariable( "ele1_eRegrInput_e3x3_Over_bCE" , 'F');
 factory->AddVariable( "ele1_eRegrInput_Deta_bC_sC" , 'F');
 factory->AddVariable( "ele1_eRegrInput_Dphi_bC_sC" , 'F');
 factory->AddVariable( "ele1_eRegrInput_bEMax_Over_bCE" , 'F');


 factory->AddVariable( "ele1_dxy_PV" , 'F');
 factory->AddVariable( "ele1_dz_PV" , 'F');
 factory->AddVariable( "ele1_sigmaP/ele1_tkP" , 'F');

 factory->AddVariable( "ele1_eRegrInput_bCELow_Over_sCE", 'F');
 factory->AddVariable( "ele1_eRegrInput_e3x3_Over_bCELow" , 'F');
 factory->AddVariable( "ele1_eRegrInput_Deta_bCLow_sC" , 'F');
 factory->AddVariable( "ele1_eRegrInput_Dphi_bCLow_sC" , 'F');

 factory->AddVariable( "ele1_eRegrInput_seedbC_etacry" , 'F');
 factory->AddVariable( "ele1_eRegrInput_seedbC_phicry" , 'F');

 // You can add so-called "Spectator variables", which are not used in the MVA training, 
 // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
 // input variables, the response values of all trained MVAs, and the spectator variables
 // factory->AddSpectator( "spec1:=var1*2",  "Spectator 1", "units", 'F' );
 // factory->AddSpectator( "spec2:=var1*3",  "Spectator 2", "units", 'F' );
 // Add the variable carrying the regression target
//   factory->AddTarget("ele1_scE/ele1_E_true" ); 
   factory->AddTarget("ele1_tkP/ele1_E_true" ); 


 // It is also possible to declare additional targets for multi-dimensional regression, ie:
 // -- factory->AddTarget( "fvalue2" );
 // BUT: this is currently ONLY implemented for MLP

 // global event weights per tree (see below for setting event-wise weights)
 Double_t regWeight  = 1.0;   

 // You can add an arbitrary number of regression trees
 factory->AddRegressionTree( treeDATA, regWeight );

 // This would set individual event weights (the variables defined in the 
 // expression need to exist in the original TTree)
 // factory->SetWeightExpression( "var1", "Regression" );

//  TCut mycut = "ele1_isEB==1 && ele1_sigmaP/ele1_tkP<0.4 && ele1_fbrem>0 && abs(ele1_dxy_PV)<0.05 && abs(ele1_dz_PV)<0.05 && ele1_eRegrInput_etaW > 0.006 && ele1_eRegrInput_phiW<0.08 && ele1_eRegrInput_sigietaieta_bC1>0.006 && ele1_eRegrInput_sigiphiiphi_bC1>0.008  && abs(ele1_eRegrInput_Deta_bC_sC)<0.004 && abs(ele1_eRegrInput_Dphi_bC_sC)<0.04 && abs(ele1_eRegrInput_seedbC_etacry)<0.6 && abs(ele1_eRegrInput_seedbC_phicry)<0.6 && ele1_scE/ele1_scERaw<1.2 && (ele1_scE/ele1_E_true)<1.4 && (ele1_scE/ele1_E_true)>0.3"; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1";

  TCut mycut = "ele1_isEB==1 && ele1_sigmaP/ele1_tkP<0.4 && ele1_fbrem>0 && abs(ele1_dxy_PV)<0.05 && abs(ele1_dz_PV)<0.05 && ele1_eRegrInput_etaW > 0.006 && ele1_eRegrInput_phiW<0.08 && ele1_eRegrInput_sigietaieta_bC1>0.006 && ele1_eRegrInput_sigiphiiphi_bC1>0.008  && abs(ele1_eRegrInput_Deta_bC_sC)<0.004 && abs(ele1_eRegrInput_Dphi_bC_sC)<0.04 && abs(ele1_eRegrInput_seedbC_etacry)<0.6 && abs(ele1_eRegrInput_seedbC_phicry)<0.6 && ele1_scE/ele1_scERaw<1.2 && ele1_tkP/ele1_E_true<1.8 && ele1_tkP/ele1_E_true>0.2"; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1";


 // tell the factory to use all remaining events in the trees after training for testing:
 factory->PrepareTrainingAndTestTree( mycut, 
                                        "nTrain_Regression=2500000:nTest_Regression=2500000:SplitMode=Random:NormMode=NumEvents:!V" );
 
 TString Name = Form("weight_%s_%s_P_W",RegionOfTraining.c_str(),UseMethodFlag.c_str());
 (TMVA::gConfig().GetIONames()).fWeightFileDir = Name;
 } 

 if(RegionOfTraining=="EE"){

 factory->AddVariable( "ele1_scE/ele1_scERaw" , 'F');      
 factory->AddVariable( "ele1_eRegrInput_nPV",'F');
 factory->AddVariable( "ele1_eRegrInput_r9",'F');
 factory->AddVariable( "ele1_fbrem",'F');
 factory->AddVariable( "ele1_eta",'F');
 factory->AddVariable( "ele1_DphiIn",'F');
 factory->AddVariable( "ele1_DetaIn",'F');
 factory->AddVariable( "ele1_sigmaIetaIeta",'F');

 factory->AddVariable( "ele1_eRegrInput_etaW",'F');
 factory->AddVariable( "ele1_eRegrInput_phiW",'F');

 factory->AddVariable( "ele1_dxy_PV",'F');
 factory->AddVariable( "ele1_dz_PV",'F');
 factory->AddVariable( "ele1_sigmaP/ele1_tkP",'F');

   
 // You can add so-called "Spectator variables", which are not used in the MVA training, 
 // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
 // input variables, the response values of all trained MVAs, and the spectator variables
 // factory->AddSpectator( "spec1:=var1*2",  "Spectator 1", "units", 'F' );
 // factory->AddSpectator( "spec2:=var1*3",  "Spectator 2", "units", 'F' );
 // Add the variable carrying the regression target

//  factory->AddTarget("ele1_scE/ele1_E_true" ); 
  factory->AddTarget("ele1_tkP/ele1_E_true" );

 // It is also possible to declare additional targets for multi-dimensional regression, ie:
 // -- factory->AddTarget( "fvalue2" );
 // BUT: this is currently ONLY implemented for MLP

 // global event weights per tree (see below for setting event-wise weights)
 Double_t regWeight  = 1.0;   

 // You can add an arbitrary number of regression trees
 factory->AddRegressionTree( treeDATA, regWeight );

 // This would set individual event weights (the variables defined in the 
 // expression need to exist in the original TTree)
 // factory->SetWeightExpression( "var1", "Regression" );
//  TCut mycut = "ele1_isEB==0 && ele1_sigmaP/ele1_tkP<0.4 && ele1_fbrem>0 && abs(ele1_dxy_PV)<0.05 && abs(ele1_dz_PV)<0.05 &&(ele1_scE/ele1_E_true)<1.4 && (ele1_scE/ele1_E_true)>0.3";
  TCut mycut = "ele1_isEB==0 && ele1_sigmaP/ele1_tkP<0.4 && ele1_fbrem>0 && abs(ele1_dxy_PV)<0.05 && abs(ele1_dz_PV)<0.05 && (ele1_tkP/ele1_E_true)<1.6";

 // for example: TCut mycut = "abs(var1)<0.5 && 
 // tell the factory to use all remaining events in the trees after training for testing:
 factory->PrepareTrainingAndTestTree( mycut, 
                                        "nTrain_Regression=3000000:nTest_Regression=3000000:SplitMode=Random:NormMode=NumEvents:!V" );

 TString Name = Form("weight_%s_%s_P_W",RegionOfTraining.c_str(),UseMethodFlag.c_str());
 (TMVA::gConfig().GetIONames()).fWeightFileDir = Name;

 }
 // Apply additional cuts on the signal and background samples (can be different)
 
//  // If no numbers of events are given, half of the events in the tree are used 
 // for training, and the other half for testing:
 //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );  

 // ---- Book MVA methods
 //
 // please lookup the various method configuration options in the corresponding cxx files, eg:
 // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
 // it is possible to preset ranges in the option string in which the cut optimisation should be done:
 // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

 // PDE - RS method
  if (Use["PDERS"])
    factory->BookMethod( TMVA::Types::kPDERS, "PDERS", "!H:!V:Normthree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" );
   // And the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );   
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );   

 if (Use["PDEFoam"])
   factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.3:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" );

 // K-Nearest Neighbour classifier (KNN)
 if (Use["KNN"])
   factory->BookMethod( TMVA::Types::kKNN, "KNN", "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

 // Linear discriminant
 if (Use["LD"])  factory->BookMethod( TMVA::Types::kLD, "LD","!H:!V:VarTransform=G,U,D" );

 // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
 if (Use["FDA_MC"]) 
     factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                          "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" );
   
 if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas
   factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" );

 if (Use["FDA_MT"]) 
   factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

 if (Use["FDA_GAMT"]) 
   factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   // Neural network (MLP)
 if (Use["MLP"])
//       factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" );
//         factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=200:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" );
// 	factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=400:HiddenLayers=N+10:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15" );
// 	factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=N:NeuronType=tanh:NCycles=200:HiddenLayers=N+10:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15" );
// 	factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=G,N:NeuronType=tanh:NCycles=200:HiddenLayers=N+5:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15" );
   factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:NCycles=250:HiddenLayers=N+5:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:TestRate=10");
	
   // Support Vector Machine
 if (Use["SVM"])
   factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=N" );
//     factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=N,G" );

   // Boosted Decision Trees
 if (Use["BDT"])
//      factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=100:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" );
//         factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=200:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:PruneMethod=CostComplexity:PruneStrength=30" );
//         factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=300:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:PruneMethod=CostComplexity:PruneStrength=30" );
//  	factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=100:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:PruneMethod=CostComplexity:PruneStrength=30" );
   factory->BookMethod( TMVA::Types::kBDT, "BDT","!H:!V:NTrees=100:nEventsMin=20:BoostType=AdaBoostR2:SeparationType=RegressionVariance:PruneMethod=CostComplexity:PruneStrength=30");
	
 if (Use["BDTG"])
//      factory->BookMethod( TMVA::Types::kBDT, "BDTG","!H:!V:NTrees=2000::BoostType=Grad:Shrinkage=0.1:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:MaxDepth=3:NNodesMax=15" );
   factory->BookMethod( TMVA::Types::kBDT, "BDTG","!H:!V:NTrees=1000::BoostType=Grad:Shrinkage=0.1:UseBaggedGrad:GradBaggingFraction=0.5:MaxDepth=5:NNodesMax=25:PruneMethod=CostComplexity:PruneStrength=30");
   // --------------------------------------------------------------------------------------------------
   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

 // Train MVAs using the set of training events
 factory->TrainAllMethods();

 // ---- Evaluate all MVAs using the set of test events
 factory->TestAllMethods();

 // ----- Evaluate and compare performance of all configured MVAs
 factory->EvaluateAllMethods();    

 // --------------------------------------------------------------
   
 // Save the output
 outputFile->Close();

 std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
 std::cout << "==> TMVARegression is done!" << std::endl;      

 delete factory;

 // Launch the GUI for the root macros
//  if (!gROOT->IsBatch()) TMVARegGui( outputFileName.c_str() );

 return 0;
}
예제 #6
0
void BJetRegression( TString myMethodList = "" ) 
{
   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the 
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVARegression.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   // --- Mutidimensional likelihood and Nearest-Neighbour methods
   Use["PDERS"]           = 0;
   Use["PDEFoam"]         = 1; 
   Use["KNN"]             = 1;
   // 
   // --- Linear Discriminant Analysis
   Use["LD"]		        = 1;
   // 
   // --- Function Discriminant analysis
   Use["FDA_GA"]          = 1;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   // 
   // --- Neural Network
   Use["MLP"]             = 1; 
   // 
   // --- Support Vector Machine 
   Use["SVM"]             = 0;
   // 
   // --- Boosted Decision Trees
   Use["BDT"]             = 0;
   Use["BDTG"]            = 1;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVARegression" << std::endl;

   // Select methods (don't look at this code - not of interest)
   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 1;
      }
   }

   // --------------------------------------------------------------------------------------------------

   // --- Here the preparation phase begins

   // Create a new root output file
   TString outfileName( "TMVAReg_CSVJ1.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory will
   // then run the performance analysis for you.
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/ 
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in 
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVARegression", outputFile, 
                                               "!V:!Silent:Color:DrawProgressBar" );

   // If you wish to modify default settings 
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
//    std::string inputVariables[] =  {"CSVJ1PtUncorr", "CSVJ1Pt", "CSVJ1Et", "CSVJ1Mt", "CSVJ1ptLeadTrk",
//                                               "CSVJ1Vtx3dL", "CSVJ1Vtx3deL", "CSVJ1vtxMass", "CSVJ1VtxPt",
//                                               "CSVJ1SoftLeptPtRel", "CSVJ1SoftLeptPt",
//                                               "CSVJ1SoftLeptdR" , "CSVJ1Ntot"};
   std::string inputVariables[] =  {"jetPtUncorr", "jetPt", "jetEt", "jetMt", "jetptLeadTrk",
                                              "jetVtx3dL", "jetVtx3deL", "jetvtxMass", "jetVtxPt",
                                              "jetSoftLeptPtRel", "jetSoftLeptPt",
                                              "jetSoftLeptdR" , "jetNtot", "jetJECUnc"}; //
   for(int ivar = 0; ivar < 14;  ivar++){
        factory->AddVariable( inputVariables[ivar], inputVariables[ivar], "units", 'F' );
    }
   // You can add so-called "Spectator variables", which are not used in the MVA training, 
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
   // input variables, the response values of all trained MVAs, and the spectator variables
//    factory->AddSpectator( "spec1:=var1*2",  "Spectator 1", "units", 'F' );
//    factory->AddSpectator( "spec2:=var1*3",  "Spectator 2", "units", 'F' );

   // Add the variable carrying the regression target
//    factory->AddTarget( "matchGenJet1Pt" ); 
   factory->AddTarget( "matchGenJetPt" ); 

   // It is also possible to declare additional targets for multi-dimensional regression, ie:
   // -- factory->AddTarget( "fvalue2" );
   // BUT: this is currently ONLY implemented for MLP

   // Read training and test data (see TMVAClassification for reading ASCII files)
   // load the signal and background event samples from ROOT trees
   TFile *input(0);
   TString fname = "/scratch/zmao/regression/allSample_both_isobTag.root";
   if (!gSystem->AccessPathName( fname )) 
      input = TFile::Open( fname ); // check if file in local directory exists
   else 
      input = TFile::Open( "http://root.cern.ch/files/tmva_reg_example.root" ); // if not: download from ROOT server
   
   if (!input) {
      std::cout << "ERROR: could not open data file" << std::endl;
      exit(1);
   }
   std::cout << "--- TMVARegression           : Using input file: " << input->GetName() << std::endl;

   // --- Register the regression tree

   TTree *regTree = (TTree*)input->Get("eventTree");

   // global event weights per tree (see below for setting event-wise weights)
   Double_t regWeight  = 1.0;   

   // You can add an arbitrary number of regression trees
   factory->AddRegressionTree( regTree, regWeight );

   // This would set individual event weights (the variables defined in the 
   // expression need to exist in the original TTree)
//    factory->SetWeightExpression( "triggerEff", "Regression" );

   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycut = ""; // for example: TCut mycut = "abs(var1)<0.5 && abs(var2-0.5)<1";

   // tell the factory to use all remaining events in the trees after training for testing:
   factory->PrepareTrainingAndTestTree( mycut, 
                                        "nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents:!V" );

   // If no numbers of events are given, half of the events in the tree are used 
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );  

   // ---- Book MVA methods
   //
   // please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // PDE - RS method
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS", 
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" );
   // And the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );   
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );   

   if (Use["PDEFoam"])
       factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", 
			    "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN", 
                           "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // Linear discriminant
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", 
                           "!H:!V:VarTransform=None" );

	// Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"]) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                          "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" );
   
   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" );

   if (Use["FDA_MT"]) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"]) 
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   // Neural network (MLP)
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" );

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDT"])
     factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=100:MinNodeSize=1.0%:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" );

   if (Use["BDTG"])
     factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                           "!H:!V:NTrees=2000::BoostType=Grad:Shrinkage=0.1:UseBaggedBoost:BaggedSampleFraction=0.7:nCuts=200:MaxDepth=3:NNodesMax=15" );
   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();    

   // --------------------------------------------------------------
   
   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVARegression is done!" << std::endl;      

   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVARegGui( outfileName );
}
void TMVAClassification( TString myMethodList = "" )
{
   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //
   // if you like to use a method via the plugin mechanism, we recommend using
   //
   // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)
   // (an example is given for using the BDT as plugin (see below),
   // but of course the real application is when you write your own
   // method based)

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // to get access to the GUI and all tmva macros
   //TString thisdir = gSystem->DirName(gInterpreter->GetCurrentMacroName());
   //gROOT->SetMacroPath(thisdir + ":" + gROOT->GetMacroPath());
   //gROOT->ProcessLine(".L TMVAGui.C");


   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   // --- Cut optimisation
   Use["Cuts"]            = 0;
   Use["CutsD"]           = 0;
   Use["CutsPCA"]         = 0;
   Use["CutsGA"]          = 0;
   Use["CutsSA"]          = 0;
   //
   // --- 1-dimensional likelihood ("naive Bayes estimator")
   Use["Likelihood"]      = 0;
   Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
   Use["LikelihoodPCA"]   = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
   Use["LikelihoodKDE"]   = 0;
   Use["LikelihoodMIX"]   = 0;
   //
   // --- Mutidimensional likelihood and Nearest-Neighbour methods
   Use["PDERS"]           = 0;
   Use["PDERSD"]          = 0;
   Use["PDERSPCA"]        = 0;
   Use["PDEFoam"]         = 0;
   Use["PDEFoamBoost"]    = 0; // uses generalised MVA method boosting
   Use["KNN"]             = 0; // k-nearest neighbour method
   //
   // --- Linear Discriminant Analysis
   Use["LD"]              = 1; // Linear Discriminant identical to Fisher
   Use["Fisher"]          = 0;
   Use["FisherG"]         = 0;
   Use["BoostedFisher"]   = 0; // uses generalised MVA method boosting
   Use["HMatrix"]         = 0;
   //
   // --- Function Discriminant analysis
   Use["FDA_GA"]          = 0; // minimisation of user-defined function using Genetics Algorithm
   Use["FDA_SA"]          = 0;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   Use["FDA_MCMT"]        = 0;
   //
   // --- Neural Networks (all are feed-forward Multilayer Perceptrons)
   Use["MLP"]             = 0; // Recommended ANN
   Use["MLPBFGS"]         = 0; // Recommended ANN with optional training method
   Use["MLPBNN"]          = 0; // Recommended ANN with BFGS training method and bayesian regulator
   Use["CFMlpANN"]        = 0; // Depreciated ANN from ALEPH
   Use["TMlpANN"]         = 0; // ROOT's own ANN
   //
   // --- Support Vector Machine
   Use["SVM"]             = 0;
   //
   // --- Boosted Decision Trees
   Use["BDT"]             = 0; // uses Adaptive Boost
   Use["BDTG"]            = 0; // uses Gradient Boost
   Use["BDTB"]            = 0; // uses Bagging
   Use["BDTD"]            = 0; // decorrelation + Adaptive Boost
   Use["BDTF"]            = 0; // allow usage of fisher discriminant for node splitting
   //
   // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
   Use["RuleFit"]         = 0;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;

   // Select methods (don't look at this code - not of interest)
   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 1;
      }
   }

   // --------------------------------------------------------------------------------------------------

   // --- Here the preparation phase begins

   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   TString outfileName( "TMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory is
   // the only TMVA object you have to interact with
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );

   // If you wish to modify default settings
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
   // factory->AddVariable( "myvar1 := var1+var2", 'F' );
   // factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' );
   // factory->AddVariable( "var3",                "Variable 3", "units", 'F' );
   // factory->AddVariable( "var4",                "Variable 4", "units", 'F' );

   factory->AddVariable( "pho_ecalClusterIsoR4", "pho_ecalClusterIsoR4", "units", 'F' );
   factory->AddVariable( "pho_hcalRechitIsoR4", "pho_hcalRechitIsoR4", "units", 'F' );
   factory->AddVariable( "pho_trackIsoR4PtCut20", "pho_trackIsoR4PtCut20", "units", 'F' );
   factory->AddVariable( "phoHoverE", "phoHoverE", "units", 'F' );
   factory->AddVariable( "phoSigmaIEtaIEta_2012", "phoSigmaIEtaIEta_2012", "units", 'F' );

   // You can add so-called "Spectator variables", which are not used in the MVA training,
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
   // input variables, the response values of all trained MVAs, and the spectator variables
   // factory->AddSpectator( "spec1 := var1*2",  "Spectator 1", "units", 'F' );
   // factory->AddSpectator( "spec2 := var1*3",  "Spectator 2", "units", 'F' );

   // Read training and test data
   // (it is also possible to use ASCII format as input -> see TMVA Users Guide)
   // TString fname = "./tmva_class_example.root";
   TString fname = "/net/hisrv0001/home/juliusbl/alex/cut/cutTree.root";

   if (gSystem->AccessPathName( fname ))  // file does not exist in local directory
      gSystem->Exec("curl -O http://root.cern.ch/files/tmva_class_example.root");

   TFile *input = TFile::Open( fname );

   std::cout << "--- TMVAClassification       : Using input file: " << input->GetName() << std::endl;

   // --- Register the training and test trees

   TTree *signal     = (TTree*)input->Get("cutT");
   TTree *background = (TTree*)input->Get("cutT");

   // global event weights per tree (see below for setting event-wise weights)
   Double_t signalWeight     = 1.0;
   Double_t backgroundWeight = 1.0;

   // You can add an arbitrary number of signal or background trees
   factory->AddSignalTree    ( signal,     signalWeight     );
   factory->AddBackgroundTree( background, backgroundWeight );

   // To give different trees for training and testing, do as follows:
   //    factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" );
   //    factory->AddSignalTree( signalTestTree,     signalTestWeight,  "Test" );

   // Use the following code instead of the above two or four lines to add signal and background
   // training and test events "by hand"
   // NOTE that in this case one should not give expressions (such as "var1+var2") in the input
   //      variable definition, but simply compute the expression before adding the event
   //
   //     // --- begin ----------------------------------------------------------
   //     std::vector<Double_t> vars( 4 ); // vector has size of number of input variables
   //     Float_t  treevars[4], weight;
   //
   //     // Signal
   //     for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
   //     for (UInt_t i=0; i<signal->GetEntries(); i++) {
   //        signal->GetEntry(i);
   //        for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
   //        // add training and test events; here: first half is training, second is testing
   //        // note that the weight can also be event-wise
   //        if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight );
   //        else                              factory->AddSignalTestEvent    ( vars, signalWeight );
   //     }
   //
   //     // Background (has event weights)
   //     background->SetBranchAddress( "weight", &weight );
   //     for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
   //     for (UInt_t i=0; i<background->GetEntries(); i++) {
   //        background->GetEntry(i);
   //        for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
   //        // add training and test events; here: first half is training, second is testing
   //        // note that the weight can also be event-wise
   //        if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight );
   //        else                                factory->AddBackgroundTestEvent    ( vars, backgroundWeight*weight );
   //     }
         // --- end ------------------------------------------------------------
   //
   // --- end of tree registration

   // Set individual event weights (the variables must exist in the original TTree)
   //    for signal    : factory->SetSignalWeightExpression    ("weight1*weight2");
   //    for background: factory->SetBackgroundWeightExpression("weight1*weight2");
   // factory->SetBackgroundWeightExpression( "weight" );

   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = "subid==0"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   TCut mycutb = "subid==1"; // for example: TCut mycutb = "abs(var1)<0.5";

   // Tell the factory how to use the training and testing events
   //
   // If no numbers of events are given, half of the events in the tree are used
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut,
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );
   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

   // ---- Book MVA methods
   //
   // Please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["CutsD"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsD",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

   if (Use["CutsPCA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

   if (Use["CutsGA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                           "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );

   if (Use["CutsSA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                           "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   // Likelihood ("naive Bayes estimator")
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                           "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

   // Decorrelated likelihood
   if (Use["LikelihoodD"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
                           "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

   // PCA-transformed likelihood
   if (Use["LikelihoodPCA"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" );

   // Use a kernel density estimator to approximate the PDFs
   if (Use["LikelihoodKDE"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" );

   // Use a variable-dependent mix of splines and kernel density estimator
   if (Use["LikelihoodMIX"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
                           "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" );

   // Test the multi-dimensional probability density estimator
   // here are the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSD"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

   if (Use["PDERSPCA"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

   // Multi-dimensional likelihood estimator using self-adapting phase-space binning
   if (Use["PDEFoam"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                           "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );

   if (Use["PDEFoamBoost"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost",
                           "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN",
                           "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // H-Matrix (chi2-squared) method
   if (Use["HMatrix"])
      factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" );

   // Linear discriminant (same as Fisher discriminant)
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher discriminant (same as LD)
   if (Use["Fisher"])
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher with Gauss-transformed input variables
   if (Use["FisherG"])
      factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

   // Composite classifier: ensemble (tree) of boosted Fisher classifiers
   if (Use["BoostedFisher"])
      factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher",
                           "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" );

   // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

   if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   if (Use["FDA_MT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   if (Use["FDA_MCMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

   // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );

   if (Use["MLPBFGS"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

   if (Use["MLPBNN"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators

   // CF(Clermont-Ferrand)ANN
   if (Use["CFMlpANN"])
      factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...

   // Tmlp(Root)ANN
   if (Use["TMlpANN"])
      factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDTG"]) // Gradient Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                           "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" );

   if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );

   if (Use["BDTB"]) // Bagging
      factory->BookMethod( TMVA::Types::kBDT, "BDTB",
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" );

   if (Use["BDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTD",
                           "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" );

   if (Use["BDTF"])  // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables
      factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher",
                           "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" );

   // RuleFit -- TMVA implementation of Friedman's method
   if (Use["RuleFit"])
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                           "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

   // For an example of the category classifier usage, see: TMVAClassificationCategory

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

   // ---- STILL EXPERIMENTAL and only implemented for BDT's !
   // factory->OptimizeAllMethods("SigEffAt001","Scan");
   // factory->OptimizeAllMethods("ROCIntegral","FitGA");

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
   //if (!gROOT->IsBatch())
     // gROOT->ProcessLine(TString::Format("TMVAGui(\"%s\")", outfileName.Data()));

   // efficiencies( TString fin = "TMVA.root", Int_t type = 2, Bool_t useTMVAStyle = kTRUE );
}
예제 #8
0
void TMVAClassificationCategory() 
{
   //---------------------------------------------------------------

   std::cout << std::endl << "==> Start TMVAClassificationCategory" << std::endl;

   bool batchMode(false);

   // Create a new root output file.
   TString outfileName( "TMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory will
   // then run the performance analysis for you.
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/ 
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in 
   // front of the "Silent" argument in the option string
   std::string factoryOptions( "!V:!Silent:Transformations=I;D;P;G,D" );
   if (batchMode) factoryOptions += ":!Color:!DrawProgressBar";

   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationCategory", outputFile, factoryOptions );

   // If you wish to modify default settings 
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
   factory->AddVariable( "var1", 'F' );
   factory->AddVariable( "var2", 'F' );
   factory->AddVariable( "var3", 'F' );
   factory->AddVariable( "var4", 'F' );

   // You can add so-called "Spectator variables", which are not used in the MVA training, 
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
   // input variables, the response values of all trained MVAs, and the spectator variables
   factory->AddSpectator( "eta" );

   // load the signal and background event samples from ROOT trees
   TFile *input(0);
   TString fname( "" );
   if (UseOffsetMethod) fname = "../execs/data/toy_sigbkg_categ_offset.root";
   else                 fname = "../execs/data/toy_sigbkg_categ_varoff.root";
   if (!gSystem->AccessPathName( fname )) {
      // first we try to find tmva_example.root in the local directory
      std::cout << "--- TMVAClassificationCategory: Accessing " << fname << std::endl;
      input = TFile::Open( fname );
   } 

   if (!input) {
      std::cout << "ERROR: could not open data file: " << fname << std::endl;
      exit(1);
   }

   TTree *signal     = (TTree*)input->Get("TreeS");
   TTree *background = (TTree*)input->Get("TreeB");

   /// global event weights per tree (see below for setting event-wise weights)
   Double_t signalWeight     = 1.0;
   Double_t backgroundWeight = 1.0;
   
   /// you can add an arbitrary number of signal or background trees
   factory->AddSignalTree    ( signal,     signalWeight     );
   factory->AddBackgroundTree( background, backgroundWeight );
   
   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5";

   // tell the factory to use all remaining events in the trees after training for testing:
   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

   // Fisher discriminant   
   factory->BookMethod( TMVA::Types::kFisher, "Fisher", "!H:!V:Fisher" );

   // Likelihood
   factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", 
                        "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); 

   // Categorised classifier
   TMVA::MethodCategory* mcat = 0;
   
   // the variable sets
   TString theCat1Vars = "var1:var2:var3:var4";
   TString theCat2Vars = (UseOffsetMethod ? "var1:var2:var3:var4" : "var1:var2:var3");

   // the Fisher 
   TMVA::MethodBase* fiCat = factory->BookMethod( TMVA::Types::kCategory, "FisherCat","" );
   mcat = dynamic_cast<TMVA::MethodCategory*>(fiCat);
   mcat->AddMethod("abs(eta)<=1.3",theCat1Vars, TMVA::Types::kFisher,"Category_Fisher_1","!H:!V:Fisher");
   mcat->AddMethod("abs(eta)>1.3", theCat2Vars, TMVA::Types::kFisher,"Category_Fisher_2","!H:!V:Fisher");

   // the Likelihood
   TMVA::MethodBase* liCat = factory->BookMethod( TMVA::Types::kCategory, "LikelihoodCat","" );
   mcat = dynamic_cast<TMVA::MethodCategory*>(liCat);
   mcat->AddMethod("abs(eta)<=1.3",theCat1Vars, TMVA::Types::kLikelihood,"Category_Likelihood_1","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50");
   mcat->AddMethod("abs(eta)>1.3", theCat2Vars, TMVA::Types::kLikelihood,"Category_Likelihood_2","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50");

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();    

   // --------------------------------------------------------------
   
   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassificationCategory is done!" << std::endl;      

   // Clean up
   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
int main(int argc, char** argv) {

    if(argc != 2)
    {
        std::cerr << ">>>>> analysis.cpp::usage: " << argv[0] << " configFileName      MVAconfigFileName" << std::endl ;
        return 1;
    }

    // Parse the config file
    parseConfigFile (argv[1]) ;

    std::string treeName  = gConfigParser -> readStringOption("Input::treeName");
    std::string fileSamples = gConfigParser -> readStringOption("Input::fileSamples");
    std::string inputDirectory = gConfigParser -> readStringOption("Input::inputDirectory");

    std::string inputBeginningFile = "out_NtupleProducer_";
    try {
        inputBeginningFile = gConfigParser -> readStringOption("Input::inputBeginningFile");
    }
    catch (char const* exceptionString) {
        std::cerr << " exception = " << exceptionString << std::endl;
    }
    std::cout << ">>>>> Input::inputBeginningFile  " << inputBeginningFile  << std::endl;



    double LUMI = gConfigParser -> readDoubleOption("Options::Lumi");

    std::vector<std::string> SignalName;
    SignalName = gConfigParser -> readStringListOption("Options::SignalName");

    for (int iSignalSample=0; iSignalSample<SignalName.size(); iSignalSample++) {
        std::cout << " Signal[" << iSignalSample << "] = " << SignalName.at(iSignalSample) << std::endl;
    }

    std::string nameWeight = "1";
    try {
        nameWeight = gConfigParser -> readStringOption("Options::nameWeight");
    }
    catch (char const* exceptionString) {
        std::cerr << " exception = " << exceptionString << std::endl;
    }
    std::cout << ">>>>> Input::nameWeight  " << nameWeight  << std::endl;



    TTree *treeJetLepVect[200];

    char *nameSample[1000];
    char *nameHumanReadable[1000];
    char* xsectionName[1000];

    char nameFileIn[1000];
    sprintf(nameFileIn,"%s",fileSamples.c_str());

    int numberOfSamples = ReadFile(nameFileIn, nameSample, nameHumanReadable, xsectionName);

    double Normalization[1000];
    double xsection[1000];

    for (int iSample=0; iSample<numberOfSamples; iSample++) {
        xsection[iSample] = atof(xsectionName[iSample]);
    }

    for (int iSample=0; iSample<numberOfSamples; iSample++) {
        char nameFile[20000];
        sprintf(nameFile,"%s/%s%s.root",inputDirectory.c_str(),inputBeginningFile.c_str(),nameSample[iSample]);

        TFile* f = new TFile(nameFile, "READ");

        treeJetLepVect[iSample] = (TTree*) f->Get(treeName.c_str());
        char nameTreeJetLep[100];
        sprintf(nameTreeJetLep,"treeJetLep_%d",iSample);
        treeJetLepVect[iSample]->SetName(nameTreeJetLep);

        double XSection;
        XSection = xsection[iSample];
        Normalization[iSample] = XSection * LUMI / 1000.;
    }

    //==== cut
    std::string CutFile = gConfigParser -> readStringOption("Selections::CutFile");
    std::vector<std::string> vCut;
    std::cout << " nCuts   = " << ReadFileCut(CutFile, vCut) << std::endl;

    std::string Cut;
    if (vCut.size() != 0) {
        Cut = vCut.at(0);
    }
    else {
        Cut = "1";
    }

    //==== HiggsMass
    std::string HiggsMass = gConfigParser -> readStringOption("Options::HiggsMass");

    //==== list of methods
    std::vector<std::string> vectorMyMethodList = gConfigParser -> readStringListOption("Options::MVAmethods");
    TString myMethodList;
    for (int iMVA = 0; iMVA < vectorMyMethodList.size(); iMVA++) {
        if (iMVA == 0) myMethodList = Form ("%s",vectorMyMethodList.at(iMVA).c_str());
        else           myMethodList = Form ("%s,%s",myMethodList.Data(),vectorMyMethodList.at(iMVA).c_str());
    }

    //==== output
    TString outfileName = gConfigParser -> readStringOption("Output::outFileName");


    // This loads the library
    TMVA::Tools::Instance();

    // Default MVA methods to be trained + tested
    std::map<std::string,int> Use;

    Use["MLP"]             = 1;
    Use["BDTG"]            = 1;
    Use["FDA_GA"]          = 0;
    Use["PDEFoam"]         = 0;


    std::cout << std::endl;
    std::cout << "==> Start TMVAClassification" << std::endl;

    // Select methods (don't look at this code - not of interest)
    if (myMethodList != "") {
        for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

        std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
        for (UInt_t i=0; i<mlist.size(); i++) {
            std::string regMethod(mlist[i]);

            if (Use.find(regMethod) == Use.end()) {
                std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
                for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
                std::cout << std::endl;
                return 0;
            }
            Use[regMethod] = 1;
        }
    }

    // --------------------------------------------------------------------------------------------------
    // --- Here the preparation phase begins

    // Create a new root output file
    TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

//   TMVA::Factory *factory = new TMVA::Factory( "TMVAMulticlass",     outputFile, "AnalysisType=multiclass:!V:!Silent:!V:Transformations=I;D" );
    TMVA::Factory *factory = new TMVA::Factory( "TMVAMulticlass",     outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=multiclass" );

    factory->AddVariable( "jetpt1" , 'F');
    factory->AddVariable( "jetpt2" , 'F');
    factory->AddVariable( "mjj" , 'F');
    factory->AddVariable( "detajj" , 'F');
    factory->AddVariable( "dphilljetjet" , 'F');

    factory->AddVariable( "pt1" , 'F');
    factory->AddVariable( "pt2" , 'F');
    factory->AddVariable( "mll" , 'F');
    factory->AddVariable( "dphill" , 'F');
    factory->AddVariable( "mth" , 'F');

    factory->AddVariable( "dphillmet" , 'F');
    factory->AddVariable( "mpmet" , 'F');

    factory->AddSpectator( "channel" , 'F');

    for (int iSample=0; iSample<numberOfSamples; iSample++) {
        int numEnt = treeJetLepVect[iSample]->GetEntries(Cut.c_str());
        std::cout << " Sample = " << nameSample[iSample] << " ~ " << nameHumanReadable[iSample] << " --> " << numEnt << std::endl;
        if (numEnt != 0) {
            if (iSample == 0) factory->AddTree( treeJetLepVect[iSample], "Signal", Normalization[iSample] );
            else if (iSample == 1) factory->AddTree( treeJetLepVect[iSample], "Background", Normalization[iSample] );
            else factory->AddTree( treeJetLepVect[iSample], TString(nameHumanReadable[iSample]), Normalization[iSample] );

//     factory->AddTree( treeJetLepVect[iSample], TString(nameHumanReadable[iSample]), Normalization[iSample] );
//     factory->AddTree( treeJetLepVect[iSample], TString(nameHumanReadable[iSample]), Normalization[iSample] , nameWeight.c_str());
            //     factory->AddTree( treeJetLepVect[iSample], TString(nameHumanReadable[iSample]));
        }
    }

//   for (int iSample=0; iSample<numberOfSamples; iSample++){
//    int numEnt = treeJetLepVect[iSample]->GetEntries(Cut.c_str());
//    std::cout << " Sample = " << nameSample[iSample] << " ~ " << nameHumanReadable[iSample] << " --> " << numEnt << std::endl;
//    if (numEnt != 0) {
//     bool isSig = false;
//     for (std::vector<std::string>::const_iterator itSig = SignalName.begin(); itSig != SignalName.end(); itSig++){
//      if (nameHumanReadable[iSample] == *itSig) isSig = true;
//     }
//     if (isSig) {
//      factory->AddTree( treeJetLepVect[iSample], TString("Signal"), Normalization[iSample] ); //---> ci deve essere uno chiamato Signal!
//     }
//     else {
//      factory->AddTree( treeJetLepVect[iSample], TString(nameHumanReadable[iSample]), Normalization[iSample] );
//     }
//    }
//   }
//
//   for (int iSample=0; iSample<numberOfSamples; iSample++){
//    int numEnt = treeJetLepVect[iSample]->GetEntries(Cut.c_str());
//    std::cout << " Sample = " << nameSample[iSample] << " ~ " << nameHumanReadable[iSample] << " --> " << numEnt << std::endl;
//    if (numEnt != 0) {
//     bool isSig = false;
//     for (std::vector<std::string>::const_iterator itSig = SignalName.begin(); itSig != SignalName.end(); itSig++){
//      if (nameHumanReadable[iSample] == *itSig) isSig = true;
//     }
//     if (isSig) {
// //      factory->AddTree( treeJetLepVect[iSample], TString("Signal"), Normalization[iSample] ); //---> ci deve essere uno chiamato Signal!
//     }
//     else {
//      factory->AddTree( treeJetLepVect[iSample], TString(nameHumanReadable[iSample]), Normalization[iSample] );
//     }
//    }
//   }


    std::cerr << " AAAAAAAAAAAAAAAAAAAAAAAAAAAAA " << std::endl;

    TCut mycuts = Cut.c_str();

//   factory->SetWeightExpression( nameWeight.c_str() );
//   factory->SetBackgroundWeightExpression( nameWeight.c_str() );
//   factory->SetSignalWeightExpression    ( nameWeight.c_str() );

    std::cerr << " BBBBBBBBBBBBBBBBBBBBBBBBBBBBB " << std::endl;

    factory->PrepareTrainingAndTestTree( mycuts ,"SplitMode=Random:NormMode=None:!V");
//   factory->PrepareTrainingAndTestTree( "" ,"SplitMode=Random:NormMode=None:!V");

    std::cerr << " CCCCCCCCCCCCCCCCCCCCCCCCCCCCC " << std::endl;



    // gradient boosted decision trees
//   if (Use["BDTG"])    factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.50:nCuts=20:NNodesMax=8");
    if (Use["BDTG"])    factory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=600:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.50:nCuts=20:NNodesMax=8");
    // neural network
    if (Use["MLP"])     factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:NCycles=1000:HiddenLayers=N+5,5:TestRate=5:EstimatorType=MSE");
    // functional discriminant with GA minimizer
    if (Use["FDA_GA"])  factory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );
    // PDE-Foam approach
    if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );



    //==== Optimize parameters in MVA methods
//   factory->OptimizeAllMethods();
//   factory->OptimizeAllMethods("ROCIntegral","Scan");
    //==== Train MVAs using the set of training events ====
    factory->TrainAllMethods();

    //==== Evaluate all MVAs using the set of test events ====
    factory->TestAllMethods();

    //==== Evaluate and compare performance of all configured MVAs ====
    factory->EvaluateAllMethods();

    // --------------------------------------------------------------

    // Save the output
    outputFile->Close();

    std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
    std::cout << "==> TMVAnalysis is done!" << std::endl;

    delete factory;

    //==== change position of weights file
    std::string toDo;

    toDo = "rm -r Weights-MVA-MultiClass/weights_" + HiggsMass + "_testVariables";
    std::cerr << "toDo = " << toDo << std::endl;
    system (toDo.c_str());

    toDo = "mv weights Weights-MVA-MultiClass/weights_" + HiggsMass + "_testVariables";
    std::cerr << "toDo = " << toDo << std::endl;
    system (toDo.c_str());

    // Launch the GUI for the root macros
    //   if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
예제 #10
0
void TMVAClassification( TString eventsToTrain = "0", const TString & region = "barrel", const TString index = "", TString myMethodList = "BDT")
{

  std::cout << "running classification for " << region << " for " << myMethodList << std::endl;

  if( region != "barrel" && region != "endcaps" ) {
    std::cout << "Error, region can only be barrel or endcaps. Selected region was: " << region << std::endl;
    exit(1);
  }
  if( index != "" && index != "0" && index != "1" && index != "2" ) {
    std::cout << "Error, index can only be \"\", \"0\", \"1\" or \"2\". Selected index was: " << index << std::endl;
    exit(1);
  }


   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //
   // if you like to use a method via the plugin mechanism, we recommend using
   //
   // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)
   // (an example is given for using the BDT as plugin (see below),
   // but of course the real application is when you write your own
   // method based)

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   // --- Cut optimisation
   Use["Cuts"]            = 0;
   Use["CutsD"]           = 0;
   Use["CutsPCA"]         = 0;
   Use["CutsGA"]          = 0;
   Use["CutsSA"]          = 0;
   // 
   // --- 1-dimensional likelihood ("naive Bayes estimator")
   Use["Likelihood"]      = 0;
   Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
   Use["LikelihoodPCA"]   = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
   Use["LikelihoodKDE"]   = 0;
   Use["LikelihoodMIX"]   = 0;
   //
   // --- Mutidimensional likelihood and Nearest-Neighbour methods
   Use["PDERS"]           = 0;
   Use["PDERSD"]          = 0;
   Use["PDERSPCA"]        = 0;
   Use["PDEFoam"]         = 0;
   Use["PDEFoamBoost"]    = 0; // uses generalised MVA method boosting
   Use["KNN"]             = 0; // k-nearest neighbour method
   //
   // --- Linear Discriminant Analysis
   Use["LD"]              = 0; // Linear Discriminant identical to Fisher
   Use["Fisher"]          = 0;
   Use["FisherG"]         = 0;
   Use["BoostedFisher"]   = 0; // uses generalised MVA method boosting
   Use["HMatrix"]         = 0;
   //
   // --- Function Discriminant analysis
   Use["FDA_GA"]          = 0; // minimisation of user-defined function using Genetics Algorithm
   Use["FDA_SA"]          = 0;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   Use["FDA_MCMT"]        = 0;
   //
   // --- Neural Networks (all are feed-forward Multilayer Perceptrons)
   Use["MLP"]             = 0; // Recommended ANN
   Use["MLPBFGS"]         = 0; // Recommended ANN with optional training method
   Use["MLPBNN"]          = 0; // Recommended ANN with BFGS training method and bayesian regulator
   Use["CFMlpANN"]        = 0; // Depreciated ANN from ALEPH
   Use["TMlpANN"]         = 0; // ROOT's own ANN
   //
   // --- Support Vector Machine 
   Use["SVM"]             = 0;
   // 
   // --- Boosted Decision Trees
   Use["BDT"]             = 1; // uses Adaptive Boost
   Use["BDTG"]            = 0; // uses Gradient Boost
   Use["BDTB"]            = 0; // uses Bagging
   Use["BDTD"]            = 0; // decorrelation + Adaptive Boost
   Use["BDTF"]            = 0; // allow usage of fisher discriminant for node splitting 
   // 
   // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
   Use["RuleFit"]         = 0;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;

   // Select methods (don't look at this code - not of interest)
   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
	    std::exit(2);
         }
         Use[regMethod] = 1;
      }
   }


   // Input and output file names
   TString fnameTrainS = "BsMC12_barrel_preselection";
   TString fnameTrainB = "Barrel_preselection";
   TString fnameTestS = "BsMC12_barrel_preselection";
   TString fnameTestB = "Barrel_preselection";
   TString outputFileName = "TMVA_barrel";
   TString weightDirName = "barrel";
   if( region == "endcaps" ) {
     fnameTrainS = "BsMC12_endcaps_preselection";
     fnameTrainB = "Endcaps_preselection";
     fnameTestS = "BsMC12_endcaps_preselection";
     fnameTestB = "Endcaps_preselection";
     outputFileName = "TMVA_endcaps";
     weightDirName = "endcaps";
   }
   if( index != "" ) {
     fnameTrainS += "_"+index;
     fnameTrainB += "_"+index;
     TString indexTest = "";
     // The test index is the train index +1 (2+1 -> 0)
     if( index == "0" ) indexTest = "1";
     else if( index == "1" ) indexTest = "2";
     else if( index == "2" ) indexTest = "0";
     fnameTestS += "_"+indexTest;
     fnameTestB += "_"+indexTest;
     outputFileName += "_"+index;
     weightDirName += index;
   }

   fnameTrainS     = rootDir + fnameTrainS    + ".root";
   fnameTrainB     = rootDir + fnameTrainB    + ".root";
   fnameTestS      = rootDir + fnameTestS     + ".root";
   fnameTestB      = rootDir + fnameTestB     + ".root";
   outputFileName  = rootDir + outputFileName + ".root";
   weightDirName   = weightsDir + weightDirName + "Weights";


   // --------------------------------------------------------------------------------------------------

   // --- Here the preparation phase begins

   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   TString outfileName(outputFileName);
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory is 
   // the only TMVA object you have to interact with
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );

   // If you wish to modify default settings
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";
   // (TMVA::gConfig().GetIONames()).fWeightFileDir = outputFileName;
   (TMVA::gConfig().GetIONames()).fWeightFileDir = weightDirName;

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]

   bool useNewMuonID = false;

   factory->AddVariable( "fls3d",      "fls3d", "", 'F' );
   factory->AddVariable( "alpha",      "alpha", "", 'F' );
   factory->AddVariable( "pvips",      "pvips", "", 'F' );
   factory->AddVariable( "iso",        "iso", "", 'F' );
   factory->AddVariable( "m1iso",      "m1iso", "", 'F' );
   factory->AddVariable( "m2iso",      "m2iso", "", 'F' );
   factory->AddVariable( "chi2dof",    "chi2/dof", "", 'F' );
   if( region == "barrel" ) {
     factory->AddVariable( "eta",      "eta", "", 'F' );
     factory->AddVariable( "maxdoca",  "maxdoca", "cm", 'F' );
   }
   else {
     factory->AddVariable( "pt",       "pt", "GeV/c", 'F' );
     factory->AddVariable( "pvip",     "pvip", "cm", 'F' );
   }
   factory->AddVariable( "docatrk",    "docatrk", "cm", 'F' );
   // factory->AddVariable( "pt",         "pt", "GeV/c", 'F' );
   // factory->AddVariable( "closetrk",   "closetrk", "", 'I' );
   // factory->AddVariable( "y",                              "y", "", 'F' );
   // factory->AddVariable( "l3d",                            "l3d", "cm", 'F' );
   // factory->AddVariable( "cosAlphaXY",                     "cosAlphaXY", "", 'F' );
   // factory->AddVariable( "mu1_dxy",                        "mu1_dxy", "cm", 'F' );
   // factory->AddVariable( "mu2_dxy",                        "mu2_dxy", "cm", 'F' );

   if( useNewMuonID ) {
     // New Muon-id
     factory->AddVariable( "mu1_MVAMuonID",                  "mu1_MVAMuonID", "", 'F');
     factory->AddVariable( "mu2_MVAMuonID",                  "mu2_MVAMuonID", "", 'F');
   }

   // Extra variables
   // factory->AddVariable( "mu1_pt",                           "mu1_pt", "GeV/c", 'F' );
   // factory->AddVariable( "mu2_pt",                           "mu2_pt", "GeV/c", 'F' );
   // factory->AddVariable( "pvw8",                             "pvw8", "", 'F' );
   // factory->AddVariable( "cosAlpha3D",                       "cosAlpha3D", "", 'F' );
   // factory->AddVariable( "countTksOfPV",                     "countTksOfPV", "", 'I' );
   // factory->AddVariable( "ctauErrPV",                        "ctauErrPV", "", 'F' );
   // factory->AddVariable( "ctauPV",                           "ctauPV", "", 'F' );
   // factory->AddVariable( "dcaxy",                            "dcaxy", "", 'F' );

   // factory->AddVariable( "mu1_glbTrackProb",                 "mu1_glbTrackProb", "", 'F' );
   // factory->AddVariable( "mu1_nChi2",                        "mu1_nChi2", "", 'F' );
   // factory->AddVariable( "mu1_nMuSegs",                      "mu1_nMuSegs", "", 'F' );
   // factory->AddVariable( "mu1_nMuSegsCln",                   "mu1_nMuSegsCln", "", 'F' );
   // factory->AddVariable( "mu1_nPixHits",                     "mu1_nPixHits", "", 'F' );
   // factory->AddVariable( "mu1_nTrHits",                      "mu1_nTrHits", "", 'F' );
   // factory->AddVariable( "mu1_segComp",                      "mu1_segComp", "", 'F' );
   // factory->AddVariable( "mu1_trkEHitsOut",                  "mu1_trkEHitsOut", "", 'F' );
   // factory->AddVariable( "mu1_trkVHits",                     "mu1_trkVHits", "", 'F' );
   // factory->AddVariable( "mu1_validFrac",                    "mu1_validFrac", "", 'F' );
   // factory->AddVariable( "mu1_chi2LocMom",                   "mu1_chi2LocMom", "", 'F' );
   // factory->AddVariable( "mu1_chi2LocPos",                   "mu1_chi2LocPos", "", 'F' );

   // factory->AddVariable( "mu2_glbTrackProb",                 "mu2_glbTrackProb", "", 'F' );
   // factory->AddVariable( "mu2_nChi2",                        "mu2_nChi2", "", 'F' );
   // factory->AddVariable( "mu2_nMuSegs",                      "mu2_nMuSegs", "", 'F' );
   // factory->AddVariable( "mu2_nMuSegsCln",                   "mu2_nMuSegsCln", "", 'F' );
   // factory->AddVariable( "mu2_nPixHits",                     "mu2_nPixHits", "", 'F' );
   // factory->AddVariable( "mu2_nTrHits",                      "mu2_nTrHits", "", 'F' );
   // factory->AddVariable( "mu2_segComp",                      "mu2_segComp", "", 'F' );
   // factory->AddVariable( "mu2_trkEHitsOut",                  "mu2_trkEHitsOut", "", 'F' );
   // factory->AddVariable( "mu2_trkVHits",                     "mu2_trkVHits", "", 'F' );
   // factory->AddVariable( "mu2_validFrac",                    "mu2_validFrac", "", 'F' );
   // factory->AddVariable( "mu2_chi2LocMom",                   "mu2_chi2LocMom", "", 'F' );
   // factory->AddVariable( "mu2_chi2LocPos",                   "mu2_chi2LocPos", "", 'F' );



   // factory->AddVariable( "l3d := ctauPV*pt/mass",            "l3d", "cm", 'F' );
   // factory->AddVariable( "l3dSig := ctauPV/ctauErrPV",       "l3dSig", "", 'F' );

   // You can add so-called "Spectator variables", which are not used in the MVA training,
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
   // input variables, the response values of all trained MVAs, and the spectator variables
   // factory->AddSpectator( "spec1 := mass*2",  "Spectator 1", "units", 'F' );
   // factory->AddSpectator( "spec2 := mass*3",  "Spectator 2", "units", 'F' );
   factory->AddSpectator( "mass",                            "mass", "GeV/c^{2}", 'F' );


   // Read training and test data
   // (it is also possible to use ASCII format as input -> see TMVA Users Guide)

   if (gSystem->AccessPathName( fnameTrainS )) {  // file does not exist in local directory
     std::cout << "Did not access " << fnameTrainS << " exiting." << std::endl;
     std::exit(4);
   }

     //gSystem->Exec("wget http://root.cern.ch/files/tmva_class_example.root");
   
   TFile *inputTrainS = TFile::Open( fnameTrainS );
   TFile *inputTrainB = TFile::Open( fnameTrainB );
   TFile *inputTestS  = TFile::Open( fnameTestS  );
   TFile *inputTestB  = TFile::Open( fnameTestB  );
   // --- Register the training and test trees
   TTree *signalTrainTree     = (TTree*)inputTrainS->Get("probe_tree");
   TTree *backgroundTrainTree = (TTree*)inputTrainB->Get("probe_tree");
   TTree *signalTestTree     = (TTree*)inputTestS->Get("probe_tree");
   TTree *backgroundTestTree = (TTree*)inputTestB->Get("probe_tree");
   // global event weights per tree (see below for setting event-wise weights)
   Double_t signalTrainWeight     = 1.0;
   Double_t backgroundTrainWeight = 1.0;
   Double_t signalTestWeight     = 1.0;
   Double_t backgroundTestWeight = 1.0;
   // Decide if using the split and mixing or the full trees
   if( fnameTrainS == fnameTestS ) {
     if( fnameTrainB != fnameTestB ) {
       std::cout << "This macro cannot handle cases where the same signal sample is used for training and testing, but different background samples are used.";
       exit(1);
     }
     std::cout << "--- TMVAClassification       : Using input file: " << inputTrainS->GetName() << std::endl;
     std::cout << "--- and file: " << inputTrainB->GetName() << std::endl;
     // You can add an arbitrary number of signal or background trees
     factory->AddSignalTree    ( signalTrainTree,     signalTrainWeight     );
     factory->AddBackgroundTree( backgroundTrainTree, backgroundTrainWeight );
   }
   else {
     if( fnameTrainB == fnameTestB ) {
       std::cout << "This macro cannot handle cases where the same background sample is used for training and testing, but different signal samples are used.";
       exit(1);
     }
     std::cout << "--- TMVAClassification       : Using input file: " << inputTrainS->GetName() << std::endl;
     std::cout << "--- and file: " << inputTrainB->GetName() << " for training and" << std::endl;
     std::cout << "--- input file: " << inputTestS->GetName() << std::endl;
     std::cout << "--- and file: " << inputTestB->GetName() << " for testing." << std::endl;
     // To give different trees for training and testing, do as follows:
     factory->AddSignalTree( signalTrainTree,     signalTrainWeight, "Training" );
     factory->AddSignalTree( signalTestTree,      signalTestWeight,  "Test" );
     factory->AddBackgroundTree( backgroundTrainTree, backgroundTrainWeight, "Training" );
     factory->AddBackgroundTree( backgroundTestTree,  backgroundTestWeight,  "Test" );
   }

   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = "";
   TCut mycutb = "";

   // Tell the factory how to use the training and testing events
   //
   // If no numbers of events are given, half of the events in the tree are used 
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut,
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );




   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal="+eventsToTrain+":nTrain_Background="+eventsToTrain+":SplitMode=Random:NormMode=NumEvents:!V" );
   // factory->PrepareTrainingAndTestTree( mycuts, mycutb,
   //                                      "nTrain_Signal=3000:nTrain_Background=3000:nTest_Signal=3000:nTest_Background=3000:SplitMode=Random:NormMode=NumEvents:!V" );
   // factory->PrepareTrainingAndTestTree( mycuts, mycutb,
   //                                     "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );

   // ---- Book MVA methods
   //
   // Please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["CutsD"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsD",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

   if (Use["CutsPCA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

   if (Use["CutsGA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                           "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );

   if (Use["CutsSA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                           "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   // Likelihood ("naive Bayes estimator")
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                           "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

   // Decorrelated likelihood
   if (Use["LikelihoodD"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
                           "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

   // PCA-transformed likelihood
   if (Use["LikelihoodPCA"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 

   // Use a kernel density estimator to approximate the PDFs
   if (Use["LikelihoodKDE"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 

   // Use a variable-dependent mix of splines and kernel density estimator
   if (Use["LikelihoodMIX"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
                           "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

   // Test the multi-dimensional probability density estimator
   // here are the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSD"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

   if (Use["PDERSPCA"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

   // Multi-dimensional likelihood estimator using self-adapting phase-space binning
   if (Use["PDEFoam"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                           "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );

   if (Use["PDEFoamBoost"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost",
                           "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN",
                           "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // H-Matrix (chi2-squared) method
   if (Use["HMatrix"])
      factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" );

   // Linear discriminant (same as Fisher discriminant)
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher discriminant (same as LD)
   if (Use["Fisher"])
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher with Gauss-transformed input variables
   if (Use["FisherG"])
      factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

   // Composite classifier: ensemble (tree) of boosted Fisher classifiers
   if (Use["BoostedFisher"])
      factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", 
                           "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" );

   // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

   if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   if (Use["FDA_MT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   if (Use["FDA_MCMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

   // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
   if (Use["MLP"])
     // factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );
     factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+8:TestRate=5:!UseRegulator" );

   if (Use["MLPBFGS"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

   if (Use["MLPBNN"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators

   // CF(Clermont-Ferrand)ANN
   if (Use["CFMlpANN"])
      factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  

   // Tmlp(Root)ANN
   if (Use["TMlpANN"])
      factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDTG"]) // Gradient Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                           "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5" );

   if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=800:nEventsMin=50:MaxDepth=2:BoostType=AdaBoost:AdaBoostBeta=1:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:NNodesMax=5" );


   if (Use["BDTB"]) // Bagging
      factory->BookMethod( TMVA::Types::kBDT, "BDTB",
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   if (Use["BDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTD",
                           "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );

   if (Use["BDTF"])  // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables
      factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher",
                           "!H:!V:NTrees=50:nEventsMin=150:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   // RuleFit -- TMVA implementation of Friedman's method
   if (Use["RuleFit"])
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                           "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

   // For an example of the category classifier usage, see: TMVAClassificationCategory

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

   // factory->OptimizeAllMethods("SigEffAt001","Scan");
   // factory->OptimizeAllMethods("ROCIntegral","GA");

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   std::cout << "Training all methods" << std::endl;
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   std::cout << "Testing all methods" << std::endl;
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   std::cout << "Evaluating all methods" << std::endl;
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
예제 #11
0
////////////////////////////////////////////////////////////////////////////////
/// Main                                                                     ///
////////////////////////////////////////////////////////////////////////////////
void TrainRegressionFJ(TString myMethodList="")
{
    gROOT->SetBatch(1);
    gROOT->LoadMacro("HelperFunctions.h" );  // make functions visible to TTreeFormula

    if (!TString(gROOT->GetVersion()).Contains("5.34")) {
        std::cout << "INCORRECT ROOT VERSION! Please use 5.34:" << std::endl;
        std::cout << "source /uscmst1/prod/sw/cms/slc5_amd64_gcc462/lcg/root/5.34.02-cms/bin/thisroot.csh" << std::endl;
        std::cout << "Return without doing anything." << std::endl;
        return;
    }
    
    //TString curDynamicPath( gSystem->GetDynamicPath() );
    //gSystem->SetDynamicPath( "../lib:" + curDynamicPath );

    //TString curIncludePath(gSystem->GetIncludePath());
    //gSystem->SetIncludePath( " -I../include " + curIncludePath );

    // Load the library
    TMVA::Tools::Instance();


    //--------------------------------------------------------------------------
    // Default MVA methods to be trained + tested
    std::map<std::string, int> Use;

    // --- Mutidimensional likelihood and Nearest-Neighbour methods
    Use["PDERS"]           = 0;
    Use["PDEFoam"]         = 1;
    Use["KNN"]             = 1;
    //
    // --- Linear Discriminant Analysis
    Use["LD"]              = 1;
    //
    // --- Function Discriminant analysis
    Use["FDA_GA"]          = 1;
    Use["FDA_MC"]          = 0;
    Use["FDA_MT"]          = 0;
    Use["FDA_GAMT"]        = 0;
    //
    // --- Neural Network
    Use["MLP"]             = 1; 
    //
    // --- Support Vector Machine 
    Use["SVM"]             = 0;
    // 
    // --- Boosted Decision Trees
    Use["BDT"]             = 1;
    Use["BDT1"]            = 0;
    Use["BDTG"]            = 0;
    Use["BDTG1"]           = 0;

    //--------------------------------------------------------------------------
    std::cout << std::endl;
    std::cout << "==> Start TMVARegression" << std::endl;

    // Select methods (don't look at this code - not of interest)
    if (myMethodList != "") {
        for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

        std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
        for (UInt_t i=0; i<mlist.size(); i++) {
            std::string regMethod(mlist[i]);

            if (Use.find(regMethod) == Use.end()) {
                std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
                for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
                std::cout << std::endl;
                return;
            }
            Use[regMethod] = 1;
        }
    }

    //--------------------------------------------------------------------------
    // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
    TString outfileName( "TMVARegFJ.root" );
    TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

    // Create the factory object. Later you can choose the methods
    // whose performance you'd like to investigate. The factory will
    // then run the performance analysis for you.
    //
    // The first argument is the base of the name of all the
    // weightfiles in the directory weights/
    //
    // The second argument is the output file for the training results
    // All TMVA output can be suppressed by removing the "!" (not) in 
    // front of the "Silent" argument in the option string
    TMVA::Factory *factory = new TMVA::Factory( "TMVARegressionFJ", outputFile, 
                                                "!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=Regression" );

    // If you wish to modify default settings
    // (please check "src/Config.h" to see all available global options)
    //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
    //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

    const std::vector<std::string> & inputExpressions      = GetInputExpressionsFJReg();
    const std::vector<std::string> & inputExpressionLabels = GetInputExpressionLabelsFJReg();
    assert(inputExpressions.size() == inputExpressionLabels.size());

    // Define the input variables that shall be used for the MVA training
    // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
    //factory->AddVariable( "var1", "Variable 1", "units", 'F' );
    //factory->AddVariable( "var2", "Variable 2", "units", 'F' );
    
    for (UInt_t iexpr=0; iexpr!=inputExpressions.size(); iexpr++){
        Label label = MakeLabel(inputExpressionLabels.at(iexpr));
        TString expr = inputExpressions.at(iexpr);
        factory->AddVariable(expr, label.xlabel, label.unit, label.type);
    }

    // You can add so-called "Spectator variables", which are not used in the MVA training,
    // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
    // input variables, the response values of all trained MVAs, and the spectator variables
    //factory->AddSpectator( "spec1 := var1*2",  "Spectator 1", "units", 'F' );
    //factory->AddSpectator( "spec2 := var1*3",  "Spectator 2", "units", 'F' );

    // Add the variable carrying the regression target
    //factory->AddTarget( "fvalue" );
    factory->AddTarget( "fathFilterJets_genPt" );

    // It is also possible to declare additional targets for multi-dimensional regression, ie:
    // -- factory->AddTarget( "fvalue2" );
    // BUT: this is currently ONLY implemented for MLP

    //--------------------------------------------------------------------------
    // Read training and test data
    TFile *input(0);
    TString dirname = "skim_ZnnH_regression_fj/";
    TString prefix  = "skim_";
    TString suffix  = ".root";
    TTree *regTrainTree(0), *regTestTree(0);
    
    std::vector<std::string> processes;
    processes.push_back("ZnnH110");
    processes.push_back("ZnnH115");
    processes.push_back("ZnnH120");
    processes.push_back("ZnnH125");
    processes.push_back("ZnnH130");
    processes.push_back("ZnnH135");
    processes.push_back("ZnnH140");
    processes.push_back("ZnnH145");
    processes.push_back("ZnnH150");
#ifdef USE_WH
    processes.push_back("WlnH110");
    processes.push_back("WlnH115");
    processes.push_back("WlnH120");
    processes.push_back("WlnH125");
    processes.push_back("WlnH130");
    processes.push_back("WlnH135");
    processes.push_back("WlnH140");
    processes.push_back("WlnH145");
    processes.push_back("WlnH150");
#endif

    std::vector<TFile *> files;
    for (UInt_t i=0; i<processes.size(); i++){
        std::string process = processes.at(i);
        input = (TFile*) TFile::Open(dirname + prefix + process + suffix, "READ");
        if (!input) {
            std::cout << "ERROR: Could not open input file." << std::endl;
            exit(1);
        }
        std::cout << "--- TMVARegression           : Using input file: " << input->GetName() << std::endl;
        files.push_back(input);
        
        // --- Register the regression tree
        regTrainTree = (TTree*) input->Get("tree_train");
        regTestTree  = (TTree*) input->Get("tree_test");

        // Global event weights per tree (see below for setting event-wise weights)
        Double_t regWeight = 1.0;

        // You can add an arbitrary number of regression trees
        factory->AddRegressionTree(regTrainTree, regWeight, TMVA::Types::kTraining);
        factory->AddRegressionTree(regTestTree , regWeight, TMVA::Types::kTesting );
    }

    // Set individual event weights (the variables must exist in the original TTree)
    //factory->SetWeightExpression( "var1", "Regression" );

    // Apply additional cuts on the signal and background samples (can be different)
    TCut mycut = "fathFilterJets_genPt>10 && fathFilterJets_pt>15 && abs(fathFilterJets_eta)<2.5"; // this is to avoid 3rd filter jet without gen match
    //TCut mycut = "hJet_genPt[0] > 0. && hJet_genPt[1] > 0. && hJet_csv[0] > 0. && hJet_csv[1] > 0. && hJet_pt[0] > 20. && hJet_pt[1] > 20. && abs(hJet_eta[0])<2.5 && abs(hJet_eta[1])<2.5";

    // Tell the factory to use all remaining events in the trees after training for testing:
    factory->PrepareTrainingAndTestTree( mycut, "V:nTrain_Regression=0:nTest_Regression=0:SplitMode=Random:NormMode=NumEvents" );

    // If no numbers of events are given, half of the events in the tree are used 
    // for training, and the other half for testing:
    //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=Random:!V" );

    // --- Book MVA methods
    //
    // Please lookup the various method configuration options in the corresponding cxx files, eg:
    // src/MethodCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    // it is possible to preset ranges in the option string in which the cut optimisation should be done:
    // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    // PDE - RS method
    if (Use["PDERS"])
        factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                             "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=40:NEventsMax=60:VarTransform=None" );
    // And the options strings for the MinMax and RMS methods, respectively:
    //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );   
    //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );   

    if (Use["PDEFoam"])
        factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                             "!H:!V:MultiTargetRegression=F:TargetSelection=Mpv:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Compress=T:Kernel=None:Nmin=10:VarTransform=None" );

    // K-Nearest Neighbour classifier (KNN)
    if (Use["KNN"])
        factory->BookMethod( TMVA::Types::kKNN, "KNN",
                             "nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

    // Linear discriminant
    if (Use["LD"])
        factory->BookMethod( TMVA::Types::kLD, "LD", 
                             "!H:!V:VarTransform=None" );

    // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if (Use["FDA_MC"])
        factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                             "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=MC:SampleSize=100000:Sigma=0.1:VarTransform=D" );

    if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options) .. the formula of this example is good for parabolas
        factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                             "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:PopSize=100:Cycles=3:Steps=30:Trim=True:SaveBestGen=1:VarTransform=Norm" );

    if (Use["FDA_MT"])
        factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                             "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

    if (Use["FDA_GAMT"])
        factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                             "!H:!V:Formula=(0)+(1)*x0+(2)*x1:ParRanges=(-100,100);(-100,100);(-100,100):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

    // Neural network (MLP)
    if (Use["MLP"])
        factory->BookMethod( TMVA::Types::kMLP, "MLP", 
                             "!H:!V:VarTransform=Norm:NeuronType=tanh:NCycles=20000:HiddenLayers=N+20:TestRate=6:TrainingMethod=BFGS:Sampling=0.3:SamplingEpoch=0.8:ConvergenceImprove=1e-6:ConvergenceTests=15:!UseRegulator" );

    // Support Vector Machine
    if (Use["SVM"])
        factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

    // Boosted Decision Trees
    if (Use["BDT"])
        factory->BookMethod( TMVA::Types::kBDT, "BDT",
                             "!H:V:NTrees=100:nEventsMin=30:NodePurityLimit=0.5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" );
//"!H:V:NTrees=60:nEventsMin=20:NodePurityLimit=0.5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30:DoBoostMonitor" );

    if (Use["BDT1"])
        factory->BookMethod( TMVA::Types::kBDT, "BDT1",
                             "!H:V:NTrees=100:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" );

    if (Use["BDTG"])
        factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                             "!H:V:NTrees=2000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.7:nCuts=200:MaxDepth=3:NNodesMax=15" );

    if (Use["BDTG1"])
        factory->BookMethod( TMVA::Types::kBDT, "BDTG1",
                             "!H:V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:MaxDepth=3:NNodesMax=15" );

    //--------------------------------------------------------------------------
    // Train MVAs using the set of training events
    factory->TrainAllMethods();

    // --- Evaluate all MVAs using the set of test events
    factory->TestAllMethods();

    // --- Evaluate and compare performance of all configured MVAs
    factory->EvaluateAllMethods();

    //--------------------------------------------------------------------------
    // Save the output
    outputFile->Close();

    std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
    std::cout << "==> TMVARegression is done!" << std::endl;

    for (UInt_t i=0; i<files.size(); i++)
        files.at(i)->Close();

    delete outputFile;
    delete factory;

    // Launch the GUI for the root macros
    //gROOT->SetMacroPath( "$ROOTSYS/tmva/macros/" );
    //gROOT->Macro( "$ROOTSYS/tmva/macros/TMVAlogon.C" );
    //gROOT->LoadMacro( "$ROOTSYS/tmva/macros/TMVAGui.C" );
    //if (!gROOT->IsBatch()) TMVARegGui( outfileName );
}
예제 #12
0
파일: Test.C 프로젝트: IPHC/FrameworkLegacy
void testBDT(){


   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();
/*
  TString outfileName( "TMVA.root" );
  TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

  TMVA::Factory *factory = new TMVA::Factory( "testBDT", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );


 
   // global event weights per tree (see below for setting event-wise weights)
   //Double_t signalWeight     = 0.003582;
   //Double_t backgroundWeight = 0.0269;
   
   Double_t signalWeight     = 1;
   Double_t backgroundWeight = 1;
   
   TFile *input_sig = TFile::Open( "signal_exclusif.root" );
   TFile *input_wz = TFile::Open( "bruit_w_z.root" );
   
   TTree *signal     = (TTree*)input_sig->Get("tree");
   TTree *background = (TTree*)input_wz->Get("tree");

   // You can add an arbitrary number of signal or background trees
   factory->AddSignalTree    ( signal,     signalWeight     );
   factory->AddBackgroundTree( background, backgroundWeight );
   
   
   factory->AddVariable("PT_z"   , 'F');
   factory->AddVariable("ASYM"    , 'F');
   factory->AddVariable("PHI_lw_b", 'F');
   factory->AddVariable("M_top", 'F');
   */
   
   
   
   TString outfileName( "bdtTMVA_FCNC_tZ.root" );
  TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

  TMVA::Factory *factory = new TMVA::Factory( "doBDT_FCNC_tZ", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );


 
   // global event weights per tree (see below for setting event-wise weights)
   //Double_t signalWeight     = 0.003582;
   //Double_t backgroundWeight = 0.0269;
   
   Double_t signalWeight     = 1;
   Double_t backgroundWeight = 1;
   
   TFile *input_sig = TFile::Open( "proof.root" );
   TFile *input_wz = TFile::Open( "proof.root" );
   
   TTree *signal     = (TTree*)input_sig->Get("Ttree_FCNCkut");
   
   
   TTree *background_WZ = (TTree*)input_wz->Get("Ttree_WZ");
   /*TTree *background_ZZ = (TTree*)input_wz->Get("Ttree_ZZ");
   TTree *background_WW = (TTree*)input_wz->Get("Ttree_WW");
   
   TTree *background_TTbar  = (TTree*)input_wz->Get("Ttree_TTbar");
   TTree *background_Zjets  = (TTree*)input_wz->Get("Ttree_Zjets");
   TTree *background_Wjets  = (TTree*)input_wz->Get("Ttree_Wjets");
   TTree *background_TtW    = (TTree*)input_wz->Get("Ttree_TtW");
   TTree *background_TbartW = (TTree*)input_wz->Get("Ttree_TbartW");*/

   // You can add an arbitrary number of signal or background trees
   factory->AddSignalTree    ( signal,            signalWeight     );
   factory->AddBackgroundTree( background_WZ,     backgroundWeight );
   /*factory->AddBackgroundTree( background_ZZ,     backgroundWeight );
   factory->AddBackgroundTree( background_WW,     backgroundWeight );
   factory->AddBackgroundTree( background_TTbar,  backgroundWeight );
   factory->AddBackgroundTree( background_Zjets,  backgroundWeight );
   factory->AddBackgroundTree( background_Wjets,  backgroundWeight );
   factory->AddBackgroundTree( background_TtW,    backgroundWeight );
   factory->AddBackgroundTree( background_TbartW, backgroundWeight );*/
   
   
   factory->AddVariable("tree_topMass",    'F');
   factory->AddVariable("tree_deltaPhilb", 'F');
   factory->AddVariable("tree_asym",       'F');
   factory->AddVariable("tree_Zpt",        'F');
   
   
   
   
   
   
   
   
   
   // to set weights. The variable must exist in the tree
   //    for signal    : factory->SetSignalWeightExpression    ("weight1*weight2");
   //    for background: factory->SetBackgroundWeightExpression("weight1*weight2");
   
   
   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5";

   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
   
   
   
   factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=100:nEventsMin=100:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );




   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );


}
예제 #13
0
void trainMVACat()
{
  char name[1000];
  float XSEC[6] = {3.67e+5,2.94e+4,6.524e+03,1.064e+03,121.5,2.542e+01};
  float NORM[6];
  TCut preselectionCut = "ht>400 && jetPt[5]>40 && (triggerBit[0] || triggerBit[2]) && nBJets>1 && nLeptons==0";
  TFile *bkgSrc[6];
  bkgSrc[0] = TFile::Open("flatTree_QCD_HT300to500.root");
  bkgSrc[1] = TFile::Open("flatTree_QCD_HT500to700.root");
  bkgSrc[2] = TFile::Open("flatTree_QCD_HT700to1000.root");
  bkgSrc[3] = TFile::Open("flatTree_QCD_HT1000to1500.root");
  bkgSrc[4] = TFile::Open("flatTree_QCD_HT1500to2000.root");
  bkgSrc[5] = TFile::Open("flatTree_QCD_HT2000toInf.root");

  TFile *sigSrc = TFile::Open("flatTree_ttHJetTobb_M125.root");
  //TFile *sigSrc = TFile::Open("flatTree_TT.root");
  TTree *sigTree = (TTree*)sigSrc->Get("hadtop/events"); 
  TTree *bkgTree[6];
  
  
  TFile *outf = new TFile("mva_Cat_QCD.root","RECREATE");
  TMVA::Factory* factory = new TMVA::Factory("factory_mva_Cat_QCD_",outf,"!V:!Silent:Color:DrawProgressBar:Transformations=I;G:AnalysisType=Classification");
  factory->AddSignalTree(sigTree);

  for(int k=0;k<6;k++) {
    NORM[k] = ((TH1F*)bkgSrc[k]->Get("hadtop/pileup"))->GetEntries();
    bkgTree[k] = (TTree*)bkgSrc[k]->Get("hadtop/events");
    factory->AddBackgroundTree(bkgTree[k],XSEC[k]/NORM[k]);
  }
  
  //int N_SIG(sigTree->GetEntries(preselectionCut));
  
  //int N_BKG0(bkgTree[0]->GetEntries(preselectionCut));
  //int N_BKG1(bkgTree[1]->GetEntries(preselectionCut));
  //int N_BKG2(bkgTree[2]->GetEntries(preselectionCut));
  //int N_BKG3(bkgTree[3]->GetEntries(preselectionCut));

  //float N_BKG_EFF = N_BKG0*XSEC[0]/NORM[0]+N_BKG1*XSEC[1]/NORM[1]+N_BKG2*XSEC[2]/NORM[2]+N_BKG3*XSEC[3]/NORM[3];
  
  //int N = TMath::Min((float)N_SIG,N_BKG_EFF);

  //cout<<N_SIG<<" "<<N_BKG_EFF<<endl;
  
  const int NVAR = 21;
  TString VAR[NVAR] = {
    "nJets",
    //"nBJets",
    "ht",
    "jetPt[0]","jetPt[1]","jetPt[2]","jetPt[3]","jetPt[4]","jetPt[5]",
    "mbbMin","dRbbMin",
    //"dRbbAve","mbbAve",
    //"btagAve","btagMax","btagMin",
    //"qglAve","qglMin","qglMedian",
    "sphericity","aplanarity","foxWolfram[0]","foxWolfram[1]","foxWolfram[2]","foxWolfram[3]",
    "mTop[0]","ptTTbar","mTTbar","dRbbTop","chi2"
  };
  char TYPE[NVAR] = {
    'I',
    //'I',
    'F',
    'F','F','F','F','F','F', 
    'F','F',
    //'F','F',
    //'F','F','F',
    //'F','F','F',
    'F','F','F','F','F','F', 
    'F','F','F','F','F'
  };

  for(int i=0;i<NVAR;i++) {
    factory->AddVariable(VAR[i],TYPE[i]);
  }

  factory->AddSpectator("status",'I');
  factory->AddSpectator("nBJets",'I');

  sprintf(name,"nTrain_Signal=%d:nTrain_Background=%d:nTest_Signal=%d:nTest_Background=%d",-1,-1,-1,-1);
  factory->PrepareTrainingAndTestTree(preselectionCut,name);

  TMVA::IMethod* BDT_Category = factory->BookMethod( TMVA::Types::kCategory,"BDT_Category");
  TMVA::MethodCategory* mcategory_BDT = dynamic_cast<TMVA::MethodCategory*>(BDT_Category); 

  mcategory_BDT->AddMethod("status == 0 && nBJets == 2",
                      "nJets:ht:jetPt[0]:jetPt[1]:jetPt[2]:jetPt[3]:jetPt[4]:jetPt[5]:mbbMin:dRbbMin:sphericity:aplanarity:foxWolfram[0]:foxWolfram[1]:foxWolfram[2]:foxWolfram[3]:mTop[0]:ptTTbar:mTTbar:dRbbTop:chi2:",
                      TMVA::Types::kBDT,
                      "BDT_Cat1",
                      "NTrees=2000:BoostType=Grad:Shrinkage=0.1");

  mcategory_BDT->AddMethod("status == 0 && nBJets > 2",
                      "nJets:ht:jetPt[0]:jetPt[1]:jetPt[2]:jetPt[3]:jetPt[4]:jetPt[5]:mbbMin:dRbbMin:sphericity:aplanarity:foxWolfram[0]:foxWolfram[1]:foxWolfram[2]:foxWolfram[3]:mTop[0]:ptTTbar:mTTbar:dRbbTop:chi2:",
                      TMVA::Types::kBDT,
                      "BDT_Cat2",
                      "NTrees=2000:BoostType=Grad:Shrinkage=0.1");

  mcategory_BDT->AddMethod("status < 0 && nBJets == 2",
                      "nJets:ht:jetPt[0]:jetPt[1]:jetPt[2]:jetPt[3]:jetPt[4]:jetPt[5]:mbbMin:dRbbMin:sphericity:aplanarity:foxWolfram[0]:foxWolfram[1]:foxWolfram[2]:foxWolfram[3]:",
                      TMVA::Types::kBDT,
                      "BDT_Cat3",
                      "NTrees=2000:BoostType=Grad:Shrinkage=0.1");

  mcategory_BDT->AddMethod("status < 0 && nBJets > 2",
                      "nJets:ht:jetPt[0]:jetPt[1]:jetPt[2]:jetPt[3]:jetPt[4]:jetPt[5]:mbbMin:dRbbMin:sphericity:aplanarity:foxWolfram[0]:foxWolfram[1]:foxWolfram[2]:foxWolfram[3]:",
                      TMVA::Types::kBDT,
                      "BDT_Cat4",
                      "NTrees=2000:BoostType=Grad:Shrinkage=0.1");

  TMVA::IMethod* Fisher_Category = factory->BookMethod( TMVA::Types::kCategory,"Fisher_Category");
  TMVA::MethodCategory* mcategory_Fisher = dynamic_cast<TMVA::MethodCategory*>(Fisher_Category);
  
  mcategory_Fisher->AddMethod("status == 0 && nBJets == 2",
                      "nJets:ht:jetPt[0]:jetPt[1]:jetPt[2]:jetPt[3]:jetPt[4]:jetPt[5]:mbbMin:dRbbMin:sphericity:aplanarity:foxWolfram[0]:foxWolfram[1]:foxWolfram[2]:foxWolfram[3]:mTop[0]:ptTTbar:mTTbar:dRbbTop:chi2:",
                      TMVA::Types::kFisher,
                      "Fisher_Cat1","H:!V:Fisher");

  mcategory_Fisher->AddMethod("status == 0 && nBJets > 2",
                      "nJets:ht:jetPt[0]:jetPt[1]:jetPt[2]:jetPt[3]:jetPt[4]:jetPt[5]:mbbMin:dRbbMin:sphericity:aplanarity:foxWolfram[0]:foxWolfram[1]:foxWolfram[2]:foxWolfram[3]:mTop[0]:ptTTbar:mTTbar:dRbbTop:chi2:",
                      TMVA::Types::kFisher,
                      "Fisher_Cat2","H:!V:Fisher");

  mcategory_Fisher->AddMethod("status < 0 && nBJets == 2",
                      "nJets:ht:jetPt[0]:jetPt[1]:jetPt[2]:jetPt[3]:jetPt[4]:jetPt[5]:mbbMin:dRbbMin:sphericity:aplanarity:foxWolfram[0]:foxWolfram[1]:foxWolfram[2]:foxWolfram[3]:",
                      TMVA::Types::kFisher,
                      "Fisher_Cat3","H:!V:Fisher");

  mcategory_Fisher->AddMethod("status < 0 && nBJets > 2",
                      "nJets:ht:jetPt[0]:jetPt[1]:jetPt[2]:jetPt[3]:jetPt[4]:jetPt[5]:mbbMin:dRbbMin:sphericity:aplanarity:foxWolfram[0]:foxWolfram[1]:foxWolfram[2]:foxWolfram[3]:",
                      TMVA::Types::kFisher,
                      "Fisher_Cat4","H:!V:Fisher");

  // specify the training methods
  //factory->BookMethod(TMVA::Types::kFisher,"Fisher");
  //factory->BookMethod(TMVA::Types::kBDT,"BDT_GRAD_2000","NTrees=2000:BoostType=Grad:Shrinkage=0.1");
  
  factory->TrainAllMethods();
  factory->TestAllMethods();
  factory->EvaluateAllMethods(); 
  outf->Close();
}
예제 #14
0
void Boost(){
   TString outfileName = "boost.root";
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" );
   factory->AddVariable( "var0", 'F' );
   factory->AddVariable( "var1", 'F' );
   TFile *input(0);
   TString fname = "./data.root";
   if (!gSystem->AccessPathName( fname )) {
      // first we try to find tmva_example.root in the local directory
      std::cout << "--- BOOST       : Accessing " << fname << std::endl;
      input = TFile::Open( fname );
   }
   else {
      gROOT->LoadMacro( "./createData.C");
      create_circ(20000);
      cout << " created data.root with data and circle arranged in half circles"<<endl;
      input = TFile::Open( fname );
   }
   if (!input) {
      std::cout << "ERROR: could not open data file" << std::endl;
      exit(1);
   }
   TTree *signal     = (TTree*)input->Get("TreeS");
   TTree *background = (TTree*)input->Get("TreeB");
   Double_t signalWeight     = 1.0;
   Double_t backgroundWeight = 1.0;
   
   gROOT->cd( outfileName+TString(":/") );
   factory->AddSignalTree    ( signal,     signalWeight     );
   factory->AddBackgroundTree( background, backgroundWeight );
   factory->PrepareTrainingAndTestTree( "", "",
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

   TString fisher="H:!V";
   factory->BookMethod( TMVA::Types::kFisher, "Fisher", fisher );
   factory->BookMethod( TMVA::Types::kFisher, "FisherBoost", fisher+":Boost_Num=100:Boost_Type=AdaBoost" );
   factory->BookMethod( TMVA::Types::kFisher, "FisherBoostLog", fisher+":Boost_Num=100:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=1.0" );
   factory->BookMethod( TMVA::Types::kFisher, "FisherBoostLog2", fisher+":Boost_Num=100:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=2.0" );
   factory->BookMethod( TMVA::Types::kFisher, "FisherBoostStep", fisher+":Boost_Num=100:Boost_Transform=step:Boost_Type=AdaBoost:Boost_AdaBoostBeta=1.0" );
   factory->BookMethod( TMVA::Types::kFisher, "FisherBoostStep2", fisher+":Boost_Num=100:Boost_Transform=step:Boost_Type=AdaBoost:Boost_AdaBoostBeta=1.2" );
   factory->BookMethod( TMVA::Types::kFisher, "FisherBoostStep3", fisher+":Boost_Num=100:Boost_Transform=step:Boost_Type=AdaBoost:Boost_AdaBoostBeta=1.5" );

  // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------
   
   // Save the output
   outputFile->Close();
   
   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;
   
   delete factory;
   
   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );
   
   
}
예제 #15
0
파일: Reg.C 프로젝트: skyriacoCMS/PhotonID
void Reg(){
  
  TMVA::Tools::Instance();
  std::cout << "==> Start TMVARegression" << std::endl;
    
  ifstream myfile; 
  myfile.open("99per.txt");


  ostringstream xcS,xcH,xcP,xcC,xcN;  
  double xS,xH,xC,xN,xP;

  if(myfile.is_open()){
    while(!myfile.eof()){
      myfile>>xS>>xH>>xC>>xN>>xP;
    }
  }

  xcS<<xS;
  xcH<<xH;
  xcC<<xC;
  xcN<<xN;
  xcP<<xP;

  //Output file 
  TString outfileName( "Ex1out_FullW_def.root" );
  TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
  
  //Declaring the factory
  TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, 
					      "!V:!Silent:Color:DrawProgressBar" );
  //Declaring Input Varibles 
  factory->AddVariable( "Sieie",'F');
  factory->AddVariable( "ToE", 'F' );
  factory->AddVariable( "isoC",'F' );
  factory->AddVariable( "isoN",'F' );
  factory->AddVariable( "isoP",'F' );
  
  TString fname = "../../CutTMVATrees_Barrel.root";
  input = TFile::Open( fname );
  
  // --- Register the regression tree
  TTree *signal = (TTree*)input->Get("t_S");
  TTree *background = (TTree*)input->Get("t_B");
  
  //Just Some more settings
   Double_t signalWeight      = 1.0; 
   Double_t backgroundWeight  = 1.0; 

   // You can add an arbitrary number of regression trees
   factory->AddSignalTree( signal, signalWeight );
   factory->AddBackgroundTree( background , backgroundWeight );
 
   TCut mycuts ="";
   TCut mycutb ="";

   // factory->PrepareTrainingAndTestTree(mycuts,mycutb,"nTrain_Signal=9000:nTrain_Background=9000:nTest_Signal=10000:nTest_Background=10000");

   factory->SetBackgroundWeightExpression("weightPT*weightXS");
   factory->SetSignalWeightExpression("weightPT*weightXS");

   TString methodName = "Cuts_FullsampleW_def";
   TString methodOptions ="!H:!V:FitMethod=GA:EffMethod=EffSEl"; 
   methodOptions +=":VarProp[0]=FMin:VarProp[1]=FMin:VarProp[2]=FMin:VarProp[3]=FMin:VarProp[4]=FMin";
  
   methodOptions +=":CutRangeMax[0]="+xcS.str(); 
   methodOptions +=":CutRangeMax[1]="+xcH.str();
   methodOptions +=":CutRangeMax[2]="+xcC.str();
   methodOptions +=":CutRangeMax[3]="+xcN.str();
   methodOptions +=":CutRangeMax[4]="+xcP.str();

   //************
   factory->BookMethod(TMVA::Types::kCuts,methodName,methodOptions);
   factory->TrainAllMethods();
   factory->TestAllMethods();
   factory->EvaluateAllMethods();    
   
   // --------------------------------------------------------------
   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVARegression is done!" << std::endl;      
   delete factory;

}
예제 #16
0
void TMVAMulticlass( TString myMethodList = "" )
{

   // This loads the library
   TMVA::Tools::Instance();

   // to get access to the GUI and all tmva macros
   //
   //     TString tmva_dir(TString(gRootDir) + "/tmva");
   //     if(gSystem->Getenv("TMVASYS"))
   //        tmva_dir = TString(gSystem->Getenv("TMVASYS"));
   //     gROOT->SetMacroPath(tmva_dir + "/test/:" + gROOT->GetMacroPath() );
   //     gROOT->ProcessLine(".L TMVAMultiClassGui.C");


   //---------------------------------------------------------------
   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;
   Use["MLP"]             = 1;
   Use["BDTG"]            = 1;
   Use["DNN"]             = 0;
   Use["FDA_GA"]          = 0;
   Use["PDEFoam"]         = 0;
   //---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAMulticlass" << std::endl;

   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 1;
      }
   }

   // Create a new root output file.
   TString outfileName = "TMVAMulticlass.root";
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   TMVA::Factory *factory = new TMVA::Factory( "TMVAMulticlass", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=multiclass" );
   TMVA::DataLoader *dataloader=new TMVA::DataLoader("dataset");

   dataloader->AddVariable( "var1", 'F' );
   dataloader->AddVariable( "var2", "Variable 2", "", 'F' );
   dataloader->AddVariable( "var3", "Variable 3", "units", 'F' );
   dataloader->AddVariable( "var4", "Variable 4", "units", 'F' );

   TFile *input(0);
   TString fname = "./tmva_example_multiple_background.root";
   if (!gSystem->AccessPathName( fname )) {
      // first we try to find the file in the local directory
      std::cout << "--- TMVAMulticlass   : Accessing " << fname << std::endl;
      input = TFile::Open( fname );
   }
   else {
      std::cout << "Creating testdata...." << std::endl;
      TString createDataMacro = TString(gROOT->GetTutorialsDir()) + "/tmva/createData.C";
      gROOT->ProcessLine(TString::Format(".L %s",createDataMacro.Data()));
      gROOT->ProcessLine("create_MultipleBackground(2000)");
      std::cout << " created tmva_example_multiple_background.root for tests of the multiclass features"<<std::endl;
      input = TFile::Open( fname );
   }
   if (!input) {
      std::cout << "ERROR: could not open data file" << std::endl;
      exit(1);
   }

   TTree *signalTree  = (TTree*)input->Get("TreeS");
   TTree *background0 = (TTree*)input->Get("TreeB0");
   TTree *background1 = (TTree*)input->Get("TreeB1");
   TTree *background2 = (TTree*)input->Get("TreeB2");

   gROOT->cd( outfileName+TString(":/") );
   dataloader->AddTree    (signalTree,"Signal");
   dataloader->AddTree    (background0,"bg0");
   dataloader->AddTree    (background1,"bg1");
   dataloader->AddTree    (background2,"bg2");

   dataloader->PrepareTrainingAndTestTree( "", "SplitMode=Random:NormMode=NumEvents:!V" );

   if (Use["BDTG"]) // gradient boosted decision trees
      factory->BookMethod( dataloader,  TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.50:nCuts=20:MaxDepth=2");
   if (Use["MLP"]) // neural network
      factory->BookMethod( dataloader,  TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:NCycles=1000:HiddenLayers=N+5,5:TestRate=5:EstimatorType=MSE");
   if (Use["FDA_GA"]) // functional discriminant with GA minimizer
      factory->BookMethod( dataloader,  TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );
   if (Use["PDEFoam"]) // PDE-Foam approach
      factory->BookMethod( dataloader,  TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );

   if (Use["DNN"]) {
       TString layoutString ("Layout=TANH|100,TANH|50,TANH|10,LINEAR");
       TString training0 ("LearningRate=1e-1, Momentum=0.5, Repetitions=1, ConvergenceSteps=10,"
                          " BatchSize=256, TestRepetitions=10, Multithreading=True");
       TString training1 ("LearningRate=1e-2, Momentum=0.0, Repetitions=1, ConvergenceSteps=10,"
                          " BatchSize=256, TestRepetitions=7, Multithreading=True");
       TString trainingStrategyString ("TrainingStrategy=");
       trainingStrategyString += training0 + "|" + training1;
       TString nnOptions ("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=N:"
                          "WeightInitialization=XAVIERUNIFORM:Architecture=STANDARD");
       nnOptions.Append (":"); nnOptions.Append (layoutString);
       nnOptions.Append (":"); nnOptions.Append (trainingStrategyString);
       factory->BookMethod(dataloader, TMVA::Types::kDNN, "DNN", nnOptions );
   }

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;
   delete dataloader;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAMultiClassGui( outfileName );


}
예제 #17
0
void TMVAClassification( TString myMethodList = "" , TString myModel = "")
{
   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //
   // if you like to use a method via the plugin mechanism, we recommend using
   //
   // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)
   // (an example is given for using the BDT as plugin (see below),
   // but of course the real application is when you write your own
   // method based)

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // to get access to the GUI and all tmva macros
    TString tmva_dir(TString(gRootDir) + "/tmva");
    if(gSystem->Getenv("TMVASYS"))
       tmva_dir = TString(gSystem->Getenv("TMVASYS"));
    gROOT->SetMacroPath(tmva_dir + "/test/:" + gROOT->GetMacroPath() );
    gROOT->ProcessLine(".L TMVAGui.C");



   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   // --- Cut optimisation
   Use["Cuts"]            = 1;
   Use["CutsD"]           = 0;
   Use["CutsPCA"]         = 0;
   Use["CutsGA"]          = 0;
   Use["CutsSA"]          = 0;
   // 
   // --- 1-dimensional likelihood ("naive Bayes estimator")
   Use["Likelihood"]      = 0;
   Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
   Use["LikelihoodPCA"]   = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
   Use["LikelihoodKDE"]   = 0;
   Use["LikelihoodMIX"]   = 0;
   //
   // --- Mutidimensional likelihood and Nearest-Neighbour methods
   Use["PDERS"]           = 0;
   Use["PDERSD"]          = 0;
   Use["PDERSPCA"]        = 0;
   Use["PDEFoam"]         = 0;
   Use["PDEFoamBoost"]    = 0; // uses generalised MVA method boosting
   Use["KNN"]             = 0; // k-nearest neighbour method
   //
   // --- Linear Discriminant Analysis
   Use["LD"]              = 0; // Linear Discriminant identical to Fisher
   Use["Fisher"]          = 0;
   Use["FisherG"]         = 0;
   Use["BoostedFisher"]   = 0; // uses generalised MVA method boosting
   Use["HMatrix"]         = 0;
   //
   // --- Function Discriminant analysis
   Use["FDA_GA"]          = 0; // minimisation of user-defined function using Genetics Algorithm
   Use["FDA_SA"]          = 0;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   Use["FDA_MCMT"]        = 0;
   //
   // --- Neural Networks (all are feed-forward Multilayer Perceptrons)
   Use["MLP"]             = 0; // Recommended ANN
   Use["MLPBFGS"]         = 0; // Recommended ANN with optional training method
   Use["MLPBNN"]          = 0; // Recommended ANN with BFGS training method and bayesian regulator
   Use["CFMlpANN"]        = 0; // Depreciated ANN from ALEPH
   Use["TMlpANN"]         = 0; // ROOT's own ANN
   //
   // --- Support Vector Machine 
   Use["SVM"]             = 0;
   // 
   // --- Boosted Decision Trees
   Use["BDT"]             = 0; // uses Adaptive Boost
   Use["BDTG"]            = 0; // uses Gradient Boost
   Use["BDTB"]            = 0; // uses Bagging
   Use["BDTD"]            = 0; // decorrelation + Adaptive Boost
   Use["BDTF"]            = 0; // allow usage of fisher discriminant for node splitting 
   // 
   // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
   Use["RuleFit"]         = 0;
   // ---------------------------------------------------------------

   // Default model to be trained + tested
   std::map<std::string,int> Model;

   // --- Cut optimisation
   Model[ "MM"  ]   = 0; // Mass mechanism
   Model[ "RHC_L" ] = 0; // Right Handed Current
   Model[ "RHC_E" ] = 0; // Right Handed Current
   Model[ "M1"  ]   = 0; // Majoron
   Model[ "M2"  ]   = 0; // Majoron
   Model[ "M3"  ]   = 0; // Majoron
   Model[ "M7"  ]   = 0; // Majoron

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;

   // Select methods (don't look at this code - not of interest)
   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 1;
      }
   }

   if(myModel != "") {
  	
		std::string regModel(myModel);
		
		if( Model.find(regModel) == Model.end() ){
			std::cout << "Model \"" << myModel << "\" not known in under this name. Choose among the following:" << std::endl;
			for (std::map<std::string,int>::iterator it = Model.begin(); it != Model.end(); it++) std::cout << it->first << " ";
			std::cout << std::endl;
			return;
		}
	   
		Model[regModel] = 1;
	
   } else {
   	
	   std::cout << "No signal model as been specified. You must choose one among the following:" << std::endl;
       for (std::map<std::string,int>::iterator it = Model.begin(); it != Model.end(); it++) std::cout << it->first << " ";
       std::cout << std::endl;
       return;
   }

    // --------------------------------------------------------------------------------------------------

    // --- Here the preparation phase begins

    // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
    TString outfileName;
	outfileName.Form( "TMVA_%s.root", myModel.Data() );
	//TString outfileDir( "/Users/alberto/Software/SuperNEMO/work/nemo3/plot/plot_FINAL_TECHNOTE_20150921/TMVA/" );
	TString outfileDir( "/Users/alberto/Software/SuperNEMO/work/nemo3/plot/plot_UPDATE_TECHNOTE_20160429/TMVA/" );
	TFile* outputFile = TFile::Open( outfileDir + outfileName , "RECREATE" );
   
   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory is 
   // the only TMVA object you have to interact with
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in
   // front of the "Silent" argument in the option string
	TString weightBaseName;
	weightBaseName.Form("TMVAClassification_%s", myModel.Data());	
   TMVA::Factory *factory = new TMVA::Factory( weightBaseName , outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification" );

   // If you wish to modify default settings
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
   //factory->AddVariable( "myvar1 := var1+var2", 'F' );
   //factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' );
   //factory->AddVariable( "var3",                "Variable 3", "units", 'F' );
   //factory->AddVariable( "var4",                "Variable 4", "units", 'F' );

	factory->AddVariable( "min_el_en"                                                                   , 'F' );  
	factory->AddVariable( "max_el_en"                                                                   , 'F' );  
	factory->AddVariable( "el_en_asym := (max_el_en-min_el_en)/(min_el_en+max_el_en)"                   , 'F' );  
	factory->AddVariable( "el_en_sum := min_el_en+max_el_en"                                            , 'F' );  
	factory->AddVariable( "cos_theta"                                                                   , 'F' );
	factory->AddVariable( "prob_int"                                                                    , 'F' );
	factory->AddVariable( "min_el_track_len"                                                            , 'F' );       
	factory->AddVariable( "max_el_track_len"                                                            , 'F' );       
	//factory->AddVariable( "min_el_curv := min_el_track_r*min_el_sign"                                   , 'F' );       
	//factory->AddVariable( "max_el_curv := max_el_track_r*max_el_sign"                                   , 'F' );       
	//factory->AddVariable( "max_vertex_s"                                                                , 'F' );       
	//factory->AddVariable( "max_vertex_z"                                                                , 'F' );                 
	//factory->AddVariable( "min_vertex_s"                                                                , 'F' );       
	//factory->AddVariable( "min_vertex_z"                                                                , 'F' );                 
 
   // You can add so-called "Spectator variables", which are not used in the MVA training,
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
   // input variables, the response values of all trained MVAs, and the spectator variables
   //factory->AddSpectator( "spec1 := var1*2",  "Spectator 1", "units", 'F' );
   //factory->AddSpectator( "spec2 := var1*3",  "Spectator 2", "units", 'F' );

   // Read training and test data
   // (it is also possible to use ASCII format as input -> see TMVA Users Guide)
   //TString fdir = "/sps/nemo/scratch/remoto/nemo3/plot/plot_FINAL_TECHNOTE_20150921/";
   TString fdir = "/Users/alberto/Software/SuperNEMO/work/nemo3/plot/plot_UPDATE_TECHNOTE_20160429/";
   TString fname = "TwoElectronIntTree.root";
      
   TFile *input = TFile::Open( fdir + fname , "READ");
   
   std::cout << "--- TMVAClassification       : Using input file: " << input->GetName() << std::endl;
   
   TTree *  sig_tree    = 0;
   Double_t sig_weight  = 1.; 
   
   if ( Model[ "MM"    ] ) sig_tree = (TTree*) input->Get( "Cd116_2b0n_m1_tree"  ) ; 
   if ( Model[ "RHC_L" ] ) sig_tree = (TTree*) input->Get( "Cd116_2b0n_m2_tree"  ) ; 
   if ( Model[ "RHC_E" ] ) sig_tree = (TTree*) input->Get( "Cd116_2b0n_m18_tree"  ) ;    
   if ( Model[ "M1"    ] ) sig_tree = (TTree*) input->Get( "Cd116_2b0n_m5_tree"  ) ; 
   if ( Model[ "M2"    ] ) sig_tree = (TTree*) input->Get( "Cd116_2b0n_m15_tree" ) ; 
   if ( Model[ "M3"    ] ) sig_tree = (TTree*) input->Get( "Cd116_2b0n_m6_tree"  ) ; 
   if ( Model[ "M7"    ] ) sig_tree = (TTree*) input->Get( "Cd116_2b0n_m7_tree"  ) ; 

   factory->AddSignalTree( sig_tree , sig_weight );     

   //Double_t Cd116_2b0n_m1_weight  = 1.; 
   //TTree *  Cd116_2b0n_m1_tree    = (TTree*) input->Get("Cd116_2b0n_m1_tree"  ) ; 
   //factory->AddSignalTree( Cd116_2b0n_m1_tree , Cd116_2b0n_m1_weight     );     
   
   TTree *  Cd116_Tl208_tree       = (TTree*) input->Get("Cd116_Tl208_tree"    ) ; Double_t Cd116_Tl208_weight       = 6.52838           ; if( Cd116_Tl208_tree    -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Cd116_Tl208_tree       , Cd116_Tl208_weight       ); };
   TTree *  Cd116_Ac228_tree       = (TTree*) input->Get("Cd116_Ac228_tree"    ) ; Double_t Cd116_Ac228_weight       = 7.62351           ; if( Cd116_Ac228_tree    -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Cd116_Ac228_tree       , Cd116_Ac228_weight       ); };
   TTree *  Cd116_Bi212_tree       = (TTree*) input->Get("Cd116_Bi212_tree"    ) ; Double_t Cd116_Bi212_weight       = 3.00708           ; if( Cd116_Bi212_tree    -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Cd116_Bi212_tree       , Cd116_Bi212_weight       ); };
   TTree *  Cd116_Bi214_tree       = (TTree*) input->Get("Cd116_Bi214_tree"    ) ; Double_t Cd116_Bi214_weight       = 18.1504           ; if( Cd116_Bi214_tree    -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Cd116_Bi214_tree       , Cd116_Bi214_weight       ); };
   TTree *  Cd116_Pb214_tree       = (TTree*) input->Get("Cd116_Pb214_VT_tree" ) ; Double_t Cd116_Pb214_weight       = 0.186417          ; if( Cd116_Pb214_tree    -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Cd116_Pb214_tree       , Cd116_Pb214_weight       ); };
   TTree *  Mylar_Bi214_tree       = (TTree*) input->Get("Mylar_Bi214_tree"    ) ; Double_t Mylar_Bi214_weight       = 11.1346           ; if( Mylar_Bi214_tree    -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Mylar_Bi214_tree       , Mylar_Bi214_weight       ); };
   TTree *  Mylar_Pb214_tree       = (TTree*) input->Get("Mylar_Pb214_tree"    ) ; Double_t Mylar_Pb214_weight       = 0.496238          ; if( Mylar_Pb214_tree    -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Mylar_Pb214_tree       , Mylar_Pb214_weight       ); };
   TTree *  Cd116_K40_tree         = (TTree*) input->Get("Cd116_K40_tree"      ) ; Double_t Cd116_K40_weight         = 8.9841+25.8272    ; if( Cd116_K40_tree      -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Cd116_K40_tree         , Cd116_K40_weight         ); };
   TTree *  Cd116_Pa234m_tree      = (TTree*) input->Get("Cd116_Pa234m_tree"   ) ; Double_t Cd116_Pa234m_weight      = 27.9307+72.4667   ; if( Cd116_Pa234m_tree   -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Cd116_Pa234m_tree      , Cd116_Pa234m_weight      ); };
   TTree *  SFoil_Bi210_tree       = (TTree*) input->Get("SFoil_Bi210_tree"    ) ; Double_t SFoil_Bi210_weight       = 0+23.2438         ; if( SFoil_Bi210_tree    -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SFoil_Bi210_tree       , SFoil_Bi210_weight       ); };
   TTree *  SWire_Bi210_tree       = (TTree*) input->Get("SWire_Bi210_tree"    ) ; Double_t SWire_Bi210_weight       = 0.136147+0.624187 ; if( SWire_Bi210_tree    -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SWire_Bi210_tree       , SWire_Bi210_weight       ); };
   TTree *  SScin_Bi210_tree       = (TTree*) input->Get("SScin_Bi210_tree"    ) ; Double_t SScin_Bi210_weight       = 1.75641           ; if( SScin_Bi210_tree    -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SScin_Bi210_tree       , SScin_Bi210_weight       ); };
   TTree *  SScin_Bi214_tree       = (TTree*) input->Get("SScin_Bi214_tree"    ) ; Double_t SScin_Bi214_weight       = 0.0510754         ; if( SScin_Bi214_tree    -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SScin_Bi214_tree       , SScin_Bi214_weight       ); };
   TTree *  SScin_Pb214_tree       = (TTree*) input->Get("SScin_Pb214_tree"    ) ; Double_t SScin_Pb214_weight       = 0                 ; if( SScin_Pb214_tree    -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SScin_Pb214_tree       , SScin_Pb214_weight       ); };
   TTree *  SWire_Tl208_tree       = (TTree*) input->Get("SWire_Tl208_tree"    ) ; Double_t SWire_Tl208_weight       = 0.217623+1.07641  ; if( SWire_Tl208_tree    -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SWire_Tl208_tree       , SWire_Tl208_weight       ); };
   TTree *  SWire_Bi214_P1_tree    = (TTree*) input->Get("SWire_Bi214_tree"    ) ; Double_t SWire_Bi214_weight       = 21.4188+17.8236   ; if( SWire_Bi214_tree    -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SWire_Bi214_tree       , SWire_Bi214_weight       ); };
   TTree *  SFoil_Bi214_tree       = (TTree*) input->Get("SFoil_Bi214_tree"    ) ; Double_t SFoil_Bi214_weight       = 5.83533+2.80427   ; if( SFoil_Bi214_tree    -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SFoil_Bi214_tree       , SFoil_Bi214_weight       ); };
   TTree *  SWire_Pb214_tree       = (TTree*) input->Get("SWire_Pb214_tree"    ) ; Double_t SWire_Pb214_weight       = 0.458486+0.649167 ; if( SWire_Pb214_tree    -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SWire_Pb214_tree       , SWire_Pb214_weight       ); };
   TTree *  SFoil_Pb214_tree       = (TTree*) input->Get("SFoil_Pb214_tree"    ) ; Double_t SFoil_Pb214_weight       = 0.218761+0.195287 ; if( SFoil_Pb214_tree    -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( SFoil_Pb214_tree       , SFoil_Pb214_weight       ); };
   TTree *  FeShield_Bi214_tree    = (TTree*) input->Get("FeShield_Bi214_tree" ) ; Double_t FeShield_Bi214_weight    = 50.7021           ; if( FeShield_Bi214_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( FeShield_Bi214_tree    , FeShield_Bi214_weight    ); };
   TTree *  FeShield_Tl208_tree    = (TTree*) input->Get("FeShield_Tl208_tree" ) ; Double_t FeShield_Tl208_weight    = 0.859465          ; if( FeShield_Tl208_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( FeShield_Tl208_tree    , FeShield_Tl208_weight    ); };
   TTree *  FeShield_Ac228_tree    = (TTree*) input->Get("FeShield_Ac228_tree" ) ; Double_t FeShield_Ac228_weight    = 0.126868          ; if( FeShield_Ac228_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( FeShield_Ac228_tree    , FeShield_Ac228_weight    ); };
   TTree *  CuTower_Co60_tree      = (TTree*) input->Get("CuTower_Co60_tree"   ) ; Double_t CuTower_Co60_weight      = 3.9407            ; if( CuTower_Co60_tree   -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( CuTower_Co60_tree      , CuTower_Co60_weight      ); };
   TTree *  Air_Bi214_P1_tree      = (TTree*) input->Get("Air_Bi214_tree"      ) ; Double_t Air_Bi214_P1_weight      = 4.19744           ; if( Air_Bi214_P1_tree   -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Air_Bi214_P1_tree      , Air_Bi214_P1_weight      ); };
   TTree *  Air_Tl208_P1_tree      = (TTree*) input->Get("Air_Tl208_tree"      ) ; Double_t Air_Tl208_P1_weight      = 0                 ; if( Air_Tl208_P1_tree   -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Air_Tl208_P1_tree      , Air_Tl208_P1_weight      ); };
   TTree *  PMT_Bi214_tree         = (TTree*) input->Get("PMT_Bi214_tree"      ) ; Double_t PMT_Bi214_weight         = 27.9661           ; if( PMT_Bi214_tree      -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( PMT_Bi214_tree         , PMT_Bi214_weight         ); };
   TTree *  PMT_Tl208_tree         = (TTree*) input->Get("PMT_Tl208_tree"      ) ; Double_t PMT_Tl208_weight         = 22.923            ; if( PMT_Tl208_tree      -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( PMT_Tl208_tree         , PMT_Tl208_weight         ); };
   TTree *  PMT_Ac228_tree         = (TTree*) input->Get("PMT_Ac228_tree"      ) ; Double_t PMT_Ac228_weight         = 3.60712           ; if( PMT_Ac228_tree      -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( PMT_Ac228_tree         , PMT_Ac228_weight         ); };
   TTree *  PMT_K40_tree           = (TTree*) input->Get("PMT_K40_tree"        ) ; Double_t PMT_K40_weight           = 16.813            ; if( PMT_K40_tree        -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( PMT_K40_tree           , PMT_K40_weight           ); };
   TTree *  ScintInn_K40_tree      = (TTree*) input->Get("ScintInn_K40_tree"   ) ; Double_t ScintInn_K40_weight      = 0.333988          ; if( ScintInn_K40_tree   -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( ScintInn_K40_tree      , ScintInn_K40_weight      ); };
   TTree *  ScintOut_K40_tree      = (TTree*) input->Get("ScintOut_K40_tree"   ) ; Double_t ScintOut_K40_weight      = 0.601178          ; if( ScintOut_K40_tree   -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( ScintOut_K40_tree      , ScintOut_K40_weight      ); };
   TTree *  ScintPet_K40_tree      = (TTree*) input->Get("ScintPet_K40_tree"   ) ; Double_t ScintPet_K40_weight      = 1.00195           ; if( ScintPet_K40_tree   -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( ScintPet_K40_tree      , ScintPet_K40_weight      ); };
   TTree *  MuMetal_Pa234m_tree    = (TTree*) input->Get("MuMetal_Pa234m_tree" ) ; Double_t MuMetal_Pa234m_weight    = 0.739038          ; if( MuMetal_Pa234m_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( MuMetal_Pa234m_tree    , MuMetal_Pa234m_weight    ); };
   TTree *  Cd116_2b2n_m14_tree    = (TTree*) input->Get("Cd116_2b2n_m14_tree" ) ; Double_t Cd116_2b2n_m14_weight    = 4977.55           ; if( Cd116_2b2n_m14_tree -> GetEntriesFast() > 0. ) {factory->AddBackgroundTree( Cd116_2b2n_m14_tree    , Cd116_2b2n_m14_weight    ); };

   // --- end of tree registration 

   // Set individual event weights (the variables must exist in the original TTree)
   //    for signal    : factory->SetSignalWeightExpression    ("weight1*weight2");
   //    for background: factory->SetBackgroundWeightExpression("weight1*weight2");
   factory->SetBackgroundWeightExpression( "weight" );

   // Apply additional cuts on the signal and background samples (can be different)
	
	// Apply cut on charge
	//TCut mycuts = "min_el_sign < 0 && max_el_sign < 0.";
	//TCut mycutb = "min_el_sign < 0 && max_el_sign < 0.";

	// Apply cut on vertex
	//TCut mycuts = "((max_vertex_x - min_vertex_x)**2 + (max_vertex_y - min_vertex_y)**2 <= 4**2)&&((max_vertex_z-min_vertex_z)**2<8**2)"; 
	//TCut mycutb = "((max_vertex_x - min_vertex_x)**2 + (max_vertex_y - min_vertex_y)**2 <= 4**2)&&((max_vertex_z-min_vertex_z)**2<8**2)"; 

	TCut mycuts = ""; 
	TCut mycutb = "";

   // Tell the factory how to use the training and testing events
   //
   // If no numbers of events are given, half of the events in the tree are used 
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut,
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );
   
   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

   // ---- Book MVA methods
   //
   // Please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["CutsD"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsD",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

   if (Use["CutsPCA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

   if (Use["CutsGA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                           "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );

   if (Use["CutsSA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                           "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   // Likelihood ("naive Bayes estimator")
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                           "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

   // Decorrelated likelihood
   if (Use["LikelihoodD"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
                           "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

   // PCA-transformed likelihood
   if (Use["LikelihoodPCA"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 

   // Use a kernel density estimator to approximate the PDFs
   if (Use["LikelihoodKDE"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 

   // Use a variable-dependent mix of splines and kernel density estimator
   if (Use["LikelihoodMIX"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
                           "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

   // Test the multi-dimensional probability density estimator
   // here are the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSD"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

   if (Use["PDERSPCA"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

   // Multi-dimensional likelihood estimator using self-adapting phase-space binning
   if (Use["PDEFoam"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                           "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );

   if (Use["PDEFoamBoost"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost",
                           "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN",
                           "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // H-Matrix (chi2-squared) method
   if (Use["HMatrix"])
      factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" );

   // Linear discriminant (same as Fisher discriminant)
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher discriminant (same as LD)
   if (Use["Fisher"])
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher with Gauss-transformed input variables
   if (Use["FisherG"])
      factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

   // Composite classifier: ensemble (tree) of boosted Fisher classifiers
   if (Use["BoostedFisher"])
      factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", 
                           "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" );

   // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

   if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   if (Use["FDA_MT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   if (Use["FDA_MCMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

   // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );

   if (Use["MLPBFGS"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

   if (Use["MLPBNN"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators

   // CF(Clermont-Ferrand)ANN
   if (Use["CFMlpANN"])
      factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  

   // Tmlp(Root)ANN
   if (Use["TMlpANN"])
      factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDTG"]) // Gradient Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                           "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" );

   if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );

   if (Use["BDTB"]) // Bagging
      factory->BookMethod( TMVA::Types::kBDT, "BDTB",
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" );

   if (Use["BDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTD",
                           "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" );

   if (Use["BDTF"])  // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables
      factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher",
                           "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=-1" );

   // RuleFit -- TMVA implementation of Friedman's method
   if (Use["RuleFit"])
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                           "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

   // For an example of the category classifier usage, see: TMVAClassificationCategory

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

   // ---- STILL EXPERIMENTAL and only implemented for BDT's ! 
   // factory->OptimizeAllMethods("SigEffAt001","Scan");
   // factory->OptimizeAllMethods("ROCIntegral","FitGA");

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileDir + outfileName );
}
예제 #18
0
int main( int argc, char** argv )
{//main
  std::string folder;

  if (argc > 1) {
    folder = argv[1];
  }
  else {
    folder = "output_tmva/nunu/MET130/";
  }

  bool useQCD = true;
  bool useOthers = false;
  bool useOthersAsSignal = true;

  //List of input signal files
  std::vector<std::string> sigfiles;
  //sigfiles.push_back("MC_VBF_HToZZTo4Nu_M-120");
  sigfiles.push_back("MC_Powheg-Htoinv-mH125");

  if (useOthersAsSignal) {
    sigfiles.push_back("MC_TTJets");
    //powheg samples
    //sigfiles.push_back("MC_TT-v1");
    //sigfiles.push_back("MC_TT-v2");
    //
    sigfiles.push_back("MC_T-tW");
    sigfiles.push_back("MC_Tbar-tW");
    sigfiles.push_back("MC_SingleT-s-powheg-tauola");
    sigfiles.push_back("MC_SingleTBar-s-powheg-tauola");
    sigfiles.push_back("MC_SingleT-t-powheg-tauola");
    sigfiles.push_back("MC_SingleTBar-t-powheg-tauola");
    sigfiles.push_back("MC_WW-pythia6-tauola");
    sigfiles.push_back("MC_WZ-pythia6-tauola");
    sigfiles.push_back("MC_ZZ-pythia6-tauola");
    sigfiles.push_back("MC_W1JetsToLNu_enu");
    sigfiles.push_back("MC_W2JetsToLNu_enu");
    sigfiles.push_back("MC_W3JetsToLNu_enu");
    sigfiles.push_back("MC_W4JetsToLNu_enu");
    sigfiles.push_back("MC_WJetsToLNu-v1_enu");
    sigfiles.push_back("MC_WJetsToLNu-v2_enu");
    sigfiles.push_back("MC_W1JetsToLNu_munu");
    sigfiles.push_back("MC_W2JetsToLNu_munu");
    sigfiles.push_back("MC_W3JetsToLNu_munu");
    sigfiles.push_back("MC_W4JetsToLNu_munu");
    sigfiles.push_back("MC_WJetsToLNu-v1_munu");
    sigfiles.push_back("MC_WJetsToLNu-v2_munu");
    sigfiles.push_back("MC_W1JetsToLNu_taunu");
    sigfiles.push_back("MC_W2JetsToLNu_taunu");
    sigfiles.push_back("MC_W3JetsToLNu_taunu");
    sigfiles.push_back("MC_W4JetsToLNu_taunu");
    sigfiles.push_back("MC_WJetsToLNu-v1_taunu");
    sigfiles.push_back("MC_WJetsToLNu-v2_taunu");
    sigfiles.push_back("MC_DYJetsToLL");
    sigfiles.push_back("MC_DY1JetsToLL");
    sigfiles.push_back("MC_DY2JetsToLL");
    sigfiles.push_back("MC_DY3JetsToLL");
    sigfiles.push_back("MC_DY4JetsToLL");
    sigfiles.push_back("MC_ZJetsToNuNu_100_HT_200");
    sigfiles.push_back("MC_ZJetsToNuNu_200_HT_400");
    sigfiles.push_back("MC_ZJetsToNuNu_400_HT_inf");
    sigfiles.push_back("MC_ZJetsToNuNu_50_HT_100");
    sigfiles.push_back("MC_GJets-HT-200To400-madgraph");
    sigfiles.push_back("MC_GJets-HT-400ToInf-madgraph");
    sigfiles.push_back("MC_WGamma");
    sigfiles.push_back("MC_EWK-Z2j");
    sigfiles.push_back("MC_EWK-Z2jiglep");
    sigfiles.push_back("MC_EWK-W2jminus_enu");
    sigfiles.push_back("MC_EWK-W2jplus_enu");
    sigfiles.push_back("MC_EWK-W2jminus_munu");
    sigfiles.push_back("MC_EWK-W2jplus_munu");
    sigfiles.push_back("MC_EWK-W2jminus_taunu");
    sigfiles.push_back("MC_EWK-W2jplus_taunu");
  }

  //List of input files
  std::vector<std::string> bkgfiles;
  if (useQCD){
    bkgfiles.push_back("MC_QCD-Pt-30to50-pythia6");
    bkgfiles.push_back("MC_QCD-Pt-50to80-pythia6");
    bkgfiles.push_back("MC_QCD-Pt-80to120-pythia6");
    bkgfiles.push_back("MC_QCD-Pt-120to170-pythia6");
    bkgfiles.push_back("MC_QCD-Pt-170to300-pythia6");
    bkgfiles.push_back("MC_QCD-Pt-300to470-pythia6");
    bkgfiles.push_back("MC_QCD-Pt-470to600-pythia6");
    bkgfiles.push_back("MC_QCD-Pt-600to800-pythia6");
    bkgfiles.push_back("MC_QCD-Pt-800to1000-pythia6");
    bkgfiles.push_back("MC_QCD-Pt-1000to1400-pythia6");
    bkgfiles.push_back("MC_QCD-Pt-1400to1800-pythia6");
    bkgfiles.push_back("MC_QCD-Pt-1800-pythia6");
  }
  if (useOthers) {
    bkgfiles.push_back("MC_TTJets");
    //powheg samples
    //bkgfiles.push_back("MC_TT-v1");
    //bkgfiles.push_back("MC_TT-v2");
    //
    bkgfiles.push_back("MC_T-tW");
    bkgfiles.push_back("MC_Tbar-tW");
    bkgfiles.push_back("MC_SingleT-s-powheg-tauola");
    bkgfiles.push_back("MC_SingleTBar-s-powheg-tauola");
    bkgfiles.push_back("MC_SingleT-t-powheg-tauola");
    bkgfiles.push_back("MC_SingleTBar-t-powheg-tauola");
    bkgfiles.push_back("MC_WW-pythia6-tauola");
    bkgfiles.push_back("MC_WZ-pythia6-tauola");
    bkgfiles.push_back("MC_ZZ-pythia6-tauola");
    bkgfiles.push_back("MC_W1JetsToLNu_enu");
    bkgfiles.push_back("MC_W2JetsToLNu_enu");
    bkgfiles.push_back("MC_W3JetsToLNu_enu");
    bkgfiles.push_back("MC_W4JetsToLNu_enu");
    bkgfiles.push_back("MC_WJetsToLNu-v1_enu");
    bkgfiles.push_back("MC_WJetsToLNu-v2_enu");
    bkgfiles.push_back("MC_W1JetsToLNu_munu");
    bkgfiles.push_back("MC_W2JetsToLNu_munu");
    bkgfiles.push_back("MC_W3JetsToLNu_munu");
    bkgfiles.push_back("MC_W4JetsToLNu_munu");
    bkgfiles.push_back("MC_WJetsToLNu-v1_munu");
    bkgfiles.push_back("MC_WJetsToLNu-v2_munu");
    bkgfiles.push_back("MC_W1JetsToLNu_taunu");
    bkgfiles.push_back("MC_W2JetsToLNu_taunu");
    bkgfiles.push_back("MC_W3JetsToLNu_taunu");
    bkgfiles.push_back("MC_W4JetsToLNu_taunu");
    bkgfiles.push_back("MC_WJetsToLNu-v1_taunu");
    bkgfiles.push_back("MC_WJetsToLNu-v2_taunu");
    bkgfiles.push_back("MC_DYJetsToLL");
    bkgfiles.push_back("MC_DY1JetsToLL");
    bkgfiles.push_back("MC_DY2JetsToLL");
    bkgfiles.push_back("MC_DY3JetsToLL");
    bkgfiles.push_back("MC_DY4JetsToLL");
    bkgfiles.push_back("MC_ZJetsToNuNu_100_HT_200");
    bkgfiles.push_back("MC_ZJetsToNuNu_200_HT_400");
    bkgfiles.push_back("MC_ZJetsToNuNu_400_HT_inf");
    bkgfiles.push_back("MC_ZJetsToNuNu_50_HT_100");
    bkgfiles.push_back("MC_GJets-HT-200To400-madgraph");
    bkgfiles.push_back("MC_GJets-HT-400ToInf-madgraph");
    bkgfiles.push_back("MC_WGamma");
    bkgfiles.push_back("MC_EWK-Z2j");
    bkgfiles.push_back("MC_EWK-Z2jiglep");
    bkgfiles.push_back("MC_EWK-W2jminus_enu");
    bkgfiles.push_back("MC_EWK-W2jplus_enu");
    bkgfiles.push_back("MC_EWK-W2jminus_munu");
    bkgfiles.push_back("MC_EWK-W2jplus_munu");
    bkgfiles.push_back("MC_EWK-W2jminus_taunu");
    bkgfiles.push_back("MC_EWK-W2jplus_taunu");
  }

   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
  TFile *output_tmva = TFile::Open((folder+"/TMVA_QCDrej.root").c_str(),"RECREATE");

  // Create the factory object. Later you can choose the methods
  // whose performance you'd like to investigate. The factory is 
  // the only TMVA object you have to interact with
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", output_tmva,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );


  //fill the variables with event weight from the trees
  //const unsigned nVars = 4;

   
   factory->AddSpectator("jet1_pt","Jet 1 p_{T}", "GeV", 'F');
   factory->AddSpectator("jet2_pt","Jet 2 p_{T}", "GeV", 'F');
   factory->AddSpectator("jet1_eta","Jet 1 #eta", "", 'F');
   factory->AddVariable("jet2_eta","Jet 2 #eta", "", 'F');// **
   factory->AddSpectator("jet1_phi","Jet 1 #phi", "", 'F');
   factory->AddSpectator("jet2_phi","Jet 2 #phi", "", 'F');
   factory->AddSpectator("dijet_M","M_{jj}", " GeV", 'F');
   factory->AddSpectator("dijet_deta","#Delta#eta_{jj}", "", 'F');
   factory->AddSpectator("dijet_sumeta","#eta_{j1}+#eta_{j2}", "", 'F');
   factory->AddSpectator("dijet_dphi","#Delta#phi_{jj}", "", 'F');
   factory->AddSpectator("met","MET", "GeV", 'F');// **
   factory->AddSpectator("met_phi","MET #phi", "", 'F');
   factory->AddVariable("met_significance","MET significance", "", 'F');// **
   factory->AddSpectator("sumet","#Sum E_{T}", "GeV", 'F');
   factory->AddSpectator("ht","H_{T}", "GeV", 'F');
   factory->AddVariable("mht","MH_{T}", "GeV", 'F');// **
   factory->AddSpectator("sqrt_ht","#sqrt{H_{T}}", "GeV^{0.5}", 'F');
   factory->AddSpectator("unclustered_et","Unclustered E_{T}", "GeV", 'F');
   factory->AddSpectator("unclustered_phi","Unclustered #phi", "GeV", 'F');
   factory->AddSpectator("jet1met_dphi","#Delta#phi(MET,jet1)", "", 'F');
   factory->AddVariable("jet2met_dphi","#Delta#phi(MET,jet2)", "", 'F');// **
   factory->AddVariable("jetmet_mindphi","minimum #Delta#phi(MET,jet)", "", 'F');// **
   factory->AddVariable("jetunclet_mindphi","minimum #Delta#phi(unclustered,jet)", "",  'F');// **
   factory->AddVariable("metunclet_dphi","#Delta#phi(MET,unclustered)", "",  'F');// **
   factory->AddVariable("dijetmet_scalarSum_pt", "p_{T}^{jet1}+p_{T}^{jet2}+MET", "GeV", 'F');// **
   factory->AddSpectator("dijetmet_vectorialSum_pt","p_{T}(#vec{j1}+#vec{j2}+#vec{MET})", "GeV", 'F');
   factory->AddVariable("dijetmet_ptfraction","p_{T}^{dijet}/(p_{T}^{dijet}+MET)", "", 'F');// **
   //factory->AddVariable("jet1met_scalarprod := (jet1_pt*cos(jet1_phi)*met_x+jet1_pt*sin(jet1_phi)*met_y)/met", "#vec{p_{T}^{jet1}}.#vec{MET}/MET", "GeV" , 'F');
   //factory->AddVariable("jet2met_scalarprod := (jet2_pt*cos(jet2_phi)*met_x+jet2_pt*sin(jet2_phi)*met_y)/met", "#vec{p_{T}^{jet2}}.#vec{MET}/MET", "GeV" , 'F');
   factory->AddVariable("jet1met_scalarprod", "#vec{p_{T}^{jet1}}.#vec{MET}/MET", "GeV" , 'F');// **
   factory->AddVariable("jet2met_scalarprod", "#vec{p_{T}^{jet2}}.#vec{MET}/MET", "GeV" , 'F');// **
   factory->AddVariable("jet1met_scalarprod_frac := jet1met_scalarprod/met", "#vec{p_{T}^{jet1}}.#vec{MET}/MET^{2}", "" , 'F');// **
   factory->AddVariable("jet2met_scalarprod_frac := jet2met_scalarprod/met", "#vec{p_{T}^{jet2}}.#vec{MET}/MET^{2}", "" , 'F');// **
   factory->AddSpectator("n_jets_cjv_30","CJV jets (30 GeV)", "" , 'I');
   factory->AddSpectator("n_jets_cjv_20EB_30EE","CJV jets (|#eta|<2.4 and 20 GeV, or 30 GeV)", "" , 'I');
   

   //test with only VBF variables used in cut-based analysis
   //factory->AddVariable("dijet_M","M_{jj}", " GeV", 'F');
   //factory->AddVariable("dijet_deta","#Delta#eta_{jj}", "", 'F');
   //factory->AddVariable("dijet_dphi","#Delta#phi_{jj}", "", 'F');
   //factory->AddVariable("met","MET", "GeV", 'F');
   //factory->AddVariable("n_jets_cjv_30","CJV jets (30 GeV)", "" , 'I');


  //get input files
  //signal
  //TFile *signalfile = TFile::Open((folder+"/"+"MC_VBF_HToZZTo4Nu_M-120.root").c_str());
  //TTree *signal = (TTree*)signalfile->Get("TmvaInputTree");
  //Double_t signalWeight     = 1.0;
  //factory->AddSignalTree(signal,signalWeight);
  //Set individual event weights (the variables must exist in the original TTree)
  //factory->SetSignalWeightExpression("total_weight");

  //background
  std::map<std::string, TFile *> tfiles;
  for (unsigned i = 0; i < bkgfiles.size(); ++i) {
    std::string filename = (bkgfiles[i]+".root");
    TFile * tmp = new TFile((folder+"/"+filename).c_str());
    if (!tmp) {
      std::cerr << "Warning, file " << filename << " could not be opened." << std::endl;
    } else {
      tfiles[bkgfiles[i]] = tmp;      
    }
  }
  TTree *background[bkgfiles.size()];

  //signal
  std::map<std::string, TFile *> sfiles;
  for (unsigned i = 0; i < sigfiles.size(); ++i) {
    std::string filename = (sigfiles[i]+".root");
    TFile * tmp = new TFile((folder+"/"+filename).c_str());
    if (!tmp) {
      std::cerr << "Warning, file " << filename << " could not be opened." << std::endl;
    } else {
      sfiles[sigfiles[i]] = tmp;      
    }
  }
  TTree *signal[sigfiles.size()];

  for (unsigned i = 0; i < bkgfiles.size(); ++i) {

    std::string f = bkgfiles[i];
    if (tfiles[f]){
      background[i] = (TTree*)tfiles[f]->Get("TmvaInputTree");
      //if (f.find("QCD-Pt")!=f.npos){
      //}
      Double_t backgroundWeight = 1.0;
      factory->AddBackgroundTree(background[i],backgroundWeight);
      factory->SetBackgroundWeightExpression("total_weight");

    }//if file exist
    else {
      std::cout << " Cannot find background file " << f << std::endl;
    }
  }//loop on files

  for (unsigned i = 0; i < sigfiles.size(); ++i) {

    std::string f = sigfiles[i];
    if (sfiles[f]){
      signal[i] = (TTree*)sfiles[f]->Get("TmvaInputTree");
      //if (f.find("QCD-Pt")!=f.npos){
      //}
      Double_t signalWeight = 1.0;
      factory->AddSignalTree(signal[i],signalWeight);
      factory->SetSignalWeightExpression("total_weight");

    }//if file exist
    else {
      std::cout << " Cannot find signal file " << f << std::endl;
    }
  }//loop on files


   // Apply additional cuts on the signal and background samples (can be different)
  TCut mycuts = "";//dijet_deta>3.8 && dijet_M > 1100 && met > 100 && met_significance>5";
  TCut mycutb = "";//dijet_deta>3.8 && dijet_M > 1100 && met > 100 && met_significance>5";

  factory->PrepareTrainingAndTestTree( mycuts, mycutb,
				       "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
  


   // Likelihood ("naive Bayes estimator")
  //factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
  //"H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

  // Linear discriminant (same as Fisher discriminant)
  //factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

  // Fisher discriminant (same as LD)
  factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

  // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
  //factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=60:HiddenLayers=N+5:TestRate=5:!UseRegulator" );

  // Boosted Decision Trees
  // Gradient Boost
  //factory->BookMethod( TMVA::Types::kBDT, "BDTG",
  //"!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" );
  //factory->BookMethod( TMVA::Types::kBDT, "BDTG",
  //                       "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:nCuts=20:MaxDepth=2" );


  // Adaptive Boost
  //factory->BookMethod( TMVA::Types::kBDT, "BDT1000",
  //	       "!H:!V:NTrees=1000:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" );

  factory->BookMethod( TMVA::Types::kBDT, "BDT",
		       "!H:!V:NTrees=1000:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.2:SeparationType=GiniIndex:nCuts=20" );

  // Bagging
  //factory->BookMethod( TMVA::Types::kBDT, "BDTB",
  //                       "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" );

  // Decorrelation + Adaptive Boost
  //factory->BookMethod( TMVA::Types::kBDT, "BDTD",
  //                       "!H:!V:NTrees=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" );

  // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables
  //factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher",
  //       "!H:!V:NTrees=50:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" );

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   output_tmva->Close();

   std::cout << "==> Wrote root file: " << output_tmva->GetName() << std::endl
             << "==> TMVAClassification is done!" << std::endl
             << std::endl
             << "==> To view the results, launch the GUI: \"root -l ./TMVAGui.C\"" << std::endl
             << std::endl;

   // Clean up
   delete factory;

  return 0;
}//main
예제 #19
0
void classifyBDT(TString inputVariables = "trainingVars.txt",
                 TString signalName = "/mnt/hscratch/dabercro/skims2/BDT_Signal.root",
                 TString backName = "/mnt/hscratch/dabercro/skims2/BDT_Background.root") {
  TMVA::Tools::Instance();
  std::cout << "==> Start TMVAClassification" << std::endl;

   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   TString outfileName( "TMVA/TMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationCategory", outputFile,
					       "!V:!Silent:Color:DrawProgressBar:Transformations=I;N" );

   // A very simple MVA (feel free to uncomment and comment what you like) => as a rule of thumb 10-20 variables is where people start to get worried about total number

   ifstream configFile;
   configFile.open(inputVariables.Data());
   TString tempFormula;

   configFile >> tempFormula;  // Is the name of the BDT
   while(!configFile.eof()){
     configFile >> tempFormula;
     if(tempFormula != ""){
       factory->AddVariable(tempFormula,'F');
     }
   }

   TString lVars;

   // TCut lCut   = "jet1qg2<2.&&jet1pt>250.&&jet1pullAngle>-5.";// < 10 && jet1mass_m2 > 60 && jet1mass_m2 < 120";
   // TCut lCut = "passZ > 3  && fjet1pt > 250 && fjet1MassPruned < 120 && fatjetid < 2";
   TCut lCut   = "abs(fjet1PartonId)!=24&&abs(fjet1PartonId)!=23";
   // std::string lEventCut = "event % 2 == 1";
   // lCut += lEventCut.c_str();

   // TCut lSCut = "passT > 0   && fjet1pt > 250 && fjet1MassPruned < 120 && abs(fjet1PartonId) == 24&& fatjetid < 2";
   TCut lSCut   = "abs(fjet1PartonId)==24||abs(fjet1PartonId)==23";
   // lSCut += lEventCut.c_str();

   TCut cleanCut = "fjet1QGtagSub2 > -10 && fjet1PullAngle > -4 && abs(fjet1pt/fjet1MassTrimmed)<200 && abs(fjet1pt/fjet1MassPruned)<200";

   TFile *lSAInput = TFile::Open(signalName);
   TTree   *lSASignal    = (TTree*)lSAInput    ->Get("DMSTree"); 
   TFile *lSBInput = TFile::Open(backName);
   TTree   *lSBSignal    = (TTree*)lSBInput    ->Get("DMSTree"); 
   
   Double_t lSWeight = 1.0;
   Double_t lBWeight = 1.0;
   gROOT->cd( outfileName+TString(":/") );   
   factory->AddSignalTree    ( lSASignal, lSWeight );
   
   gROOT->cd( outfileName+TString(":/") );   
   factory->AddBackgroundTree( lSBSignal, lBWeight );
   
   factory->SetWeightExpression("weight");
   std::stringstream pSignal,pBackground;
   pSignal << "nTrain_Signal="<< lSASignal->GetEntries() << ":nTrain_Background=" << lSBSignal->GetEntries();
   // factory->PrepareTrainingAndTestTree( lSCut, lCut,(pSignal.str()+":SplitMode=Block:NormMode=NumEvents:!V").c_str() );
   factory->PrepareTrainingAndTestTree(lSCut&&cleanCut,lCut&&cleanCut,"nTrain_Signal=0:nTrain_Background=0:SplitMode=Alternate:NormMode=NumEvents:!V");
   std::string lName = "alpha_VBF";
   TString lBDTDef   = "!H:!V:NTrees=400:BoostType=Grad:Shrinkage=0.1:UseBaggedGrad=F:nCuts=2000:NNodesMax=10000:MaxDepth=5:UseYesNoLeaf=F:nEventsMin=200";
//    TString lBDTDef   = "!H:!V:NTrees=400:BoostType=Grad:Shrinkage=0.1:UseBaggedGrad=F:nCuts=2000:MaxDepth=5:UseYesNoLeaf=F:MinNodeSize=0.086:NegWeightTreatment=IgnoreNegWeightsInTraining";
   factory->BookMethod(TMVA::Types::kBDT,"BDT_simple_alpha",lBDTDef);   
   factory->TrainAllMethods();
   factory->TestAllMethods();
   factory->EvaluateAllMethods();
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;
   delete factory;
   //if (!gROOT->IsBatch()) TMVAGui( outfileName );
   //TString lBDTDef   = "!H:!V:NTrees=100:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad=F:nCuts=2000:NNodesMax=10000:MaxDepth=3:SeparationType=GiniIndex";
}
예제 #20
0
void TMVAMulticlass(){
   TString outfileName = "TMVAMulticlass.root";
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=multiclass" );
   factory->AddVariable( "var0", 'F' );
   factory->AddVariable( "var1", 'F' );
   TFile *input(0);
   TString fname = "./data.root";
   if (!gSystem->AccessPathName( fname )) {
      // first we try to find data.root in the local directory
      std::cout << "--- TMVAMulticlass   : Accessing " << fname << std::endl;
      input = TFile::Open( fname );
   }
   else {
      gROOT->LoadMacro( "./createData.C");
      create_multiclassdata(20000);
      cout << " created data.root for tests of the multiclass features"<<endl;
      input = TFile::Open( fname );
   }
   if (!input) {
      std::cout << "ERROR: could not open data file" << std::endl;
      exit(1);
   }
   TTree *tree     = (TTree*)input->Get("TreeR");
   
   gROOT->cd( outfileName+TString(":/") );
   factory->AddTree    ( tree, "Signal1",    1. , "cls==0"   );
   factory->AddTree    ( tree, "Signal2",    1. , "cls==1"   );
   factory->AddTree    ( tree, "Background",    1., "cls==2" );
   factory->PrepareTrainingAndTestTree( "", "SplitMode=Random:NormMode=NumEvents:!V" );

   factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5");
   factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:VarTransform=N:NCycles=100:HiddenLayers=N+5,3:TestRate=5"); // testing vartransforms
   factory->BookMethod( TMVA::Types::kMLP, "MLP2", "!H:!V:NeuronType=tanh:NCycles=100:HiddenLayers=N+5,3:TestRate=5");
   factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                        "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x0*x1:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );
   
  // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------
   
   // Save the output
   outputFile->Close();
   
   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;
   
   delete factory;
   
   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );
   
   
}
예제 #21
0
void ZTMVAClassification( TString myMethodList = "" )
{
   

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   // --- Cut optimisation
   Use["Cuts"]            = 0;
   Use["CutsD"]           = 0;
   Use["CutsPCA"]         = 0;
   Use["CutsGA"]          = 0;
   Use["CutsSA"]          = 0;
   // 
   // --- 1-dimensional likelihood ("naive Bayes estimator")
   Use["Likelihood"]      = 0;
   Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
   Use["LikelihoodPCA"]   = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
   Use["LikelihoodKDE"]   = 0;
   Use["LikelihoodMIX"]   = 0;
   //
   // --- Mutidimensional likelihood and Nearest-Neighbour methods
   Use["PDERS"]           = 0;
   Use["PDERSD"]          = 0;
   Use["PDERSPCA"]        = 0;
   Use["PDEFoam"]         = 0;
   Use["PDEFoamBoost"]    = 0; // uses generalised MVA method boosting
   Use["KNN"]             = 0; // k-nearest neighbour method
   //
   // --- Linear Discriminant Analysis
   Use["LD"]              = 0; // Linear Discriminant identical to Fisher
   Use["Fisher"]          = 0;
   Use["FisherG"]         = 0;
   Use["BoostedFisher"]   = 0; // uses generalised MVA method boosting
   Use["HMatrix"]         = 0;
   //
   // --- Function Discriminant analysis
   Use["FDA_GA"]          = 0; // minimisation of user-defined function using Genetics Algorithm
   Use["FDA_SA"]          = 0;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   Use["FDA_MCMT"]        = 0;
   //
   // --- Neural Networks (all are feed-forward Multilayer Perceptrons)
   Use["MLP"]             = 1; // Recommended ANN
   Use["MLPBFGS"]         = 0; // Recommended ANN with optional training method
   Use["MLPBNN"]          = 0; // Recommended ANN with BFGS training method and bayesian regulator
   Use["CFMlpANN"]        = 0; // Depreciated ANN from ALEPH
   Use["TMlpANN"]         = 0; // ROOT's own ANN
   //
   // --- Support Vector Machine 
   Use["SVM"]             = 0;
   // 
   // --- Boosted Decision Trees
   Use["BDT"]             = 1; // uses Adaptive Boost
   Use["BDTG"]            = 1; // uses Gradient Boost
   Use["BDTB"]            = 0; // uses Bagging
   Use["BDTD"]            = 1; // decorrelation + Adaptive Boost
   Use["BDTF"]            = 0; // allow usage of fisher discriminant for node splitting 
   // 
   // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
   Use["RuleFit"]         = 0;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;



   // --------------------------------------------------------------------------------------------------

   // --- Here the preparation phase begins

   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   TString outfileName( "TMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );


   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );

   //factory->AddVariable( "maxpioneta", "maxpioneta", "", 'F' );
   //factory->AddVariable( "minpioneta", "minpioneta", "", 'F' );
   factory->AddVariable( "nTT", "nTT", "", 'F' );
   // factory->AddVariable( "pidpimin", "pidpimin", "", 'F' );
   // factory->AddVariable( "pidpimax", "pidpimax", "", 'F' );
   factory->AddVariable( "normxpt", "normxpt", "", 'F' );
   factory->AddVariable( "eta", "eta", "", 'F' );
   //factory->AddVariable( "phi", "phi", "", 'F' );
   //  factory->AddVariable( "normptsum", "normptsum", "", 'F' );
    //factory->AddVariable( "ptAsym", "ptAsym", "", 'F' );
   //factory->AddVariable( "dphimax", "dphimax", "", 'F' );
 //factory->AddVariable( "dphimin", "dphimin", "", 'F' );
   //factory->AddVariable( "drmax", "drmax", "", 'F' );
   // factory->AddVariable( "drmin", "drmin", "", 'F' );
    //    factory->AddVariable( "normpionp", "normpionp", "", 'F' );
    factory->AddVariable( "normminpionpt", "normminpionpt", "", 'F' );
    //factory->AddVariable( "normminpionp", "normminpionp", "", 'F' );
    factory->AddVariable( "normmaxpionpt", "normmaxpionpt", "", 'F' );
    //  factory->AddVariable( "normptj", "normptj", "", 'F' );
    //factory->AddVariable( "jmasspull", "jmasspull", "", 'F' );
    //factory->AddVariable( "vchi2dof", "vchi2dof", "", 'F' );
    //    factory->AddVariable("maxchi2","maxchi2","", 'F');  
    // factory->AddVariable("normr","normr","", 'F');    
    //    factory->AddVariable("normq","normq","", 'F'); 
    //factory->AddVariable("normminm","normminm","", 'F'); 
    factory->AddVariable("logipmax","logipmax","", 'F'); 
    factory->AddVariable("logipmin","logipmin","", 'F'); 
    factory->AddVariable("logfd","logfd",'F');
    factory->AddVariable("logvd","logvd",'F');
    //factory->AddVariable("pointAngle","pointingAngle",'F');
    factory->AddVariable("logvpi","",'F');
    //factory->AddVariable("logmaxprob","",'F');
    //factory->AddVariable("logminprob","",'F');
 
    factory->AddSpectator( "mReFit", "mReFit", "", 'D' );
    //    factory->AddSpectator( "Qdecay", "Qdecay", "",'F' );
    //  factory->AddSpectator( "m23", "m23", "",'F' );
    
    //   TFile * input_Background = new TFile("../back.root");
   TFile * input_Signal = new TFile("../cmx12.root");
   TFile * input_Background = new TFile("../background12.root");
   std::cout << "--- TMVAClassification       : Using input file for signal    : " << input_Signal->GetName() << std::endl;
   std::cout << "--- TMVAClassification       : Using input file for backgound : " << input_Background->GetName() << std::endl;
   
   // --- Register the training and test trees

   TTree *signal     = (TTree*)input_Signal->Get("psiCand");
   TTree *background = (TTree*)input_Background->Get("psiCand");
   
   // global event weights per tree (see below for setting event-wise weights)
   Double_t signalWeight     = 1.0;
   Double_t backgroundWeight = 1.0;
   
   // You can add an arbitrary number of signal or background trees
   factory->AddSignalTree    ( signal,     signalWeight     );
   factory->AddBackgroundTree( background, backgroundWeight );
   

   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = "QDecay < 300&&fdchi2 > 300"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   TCut mycutb = "QDecay < 300&&fdchi2> 300"; // for example: TCut mycutb = "abs(var1)<0.5";

   
   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

   // ---- Book MVA methods
   //
   // Please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["CutsD"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsD",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

   if (Use["CutsPCA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

   if (Use["CutsGA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                           "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );

   if (Use["CutsSA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                           "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   // Likelihood ("naive Bayes estimator")
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                           "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

   // Decorrelated likelihood
   if (Use["LikelihoodD"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
                           "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

   // PCA-transformed likelihood
   if (Use["LikelihoodPCA"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 

   // Use a kernel density estimator to approximate the PDFs
   if (Use["LikelihoodKDE"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 

   // Use a variable-dependent mix of splines and kernel density estimator
   if (Use["LikelihoodMIX"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
                           "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

   // Test the multi-dimensional probability density estimator
   // here are the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSD"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

   if (Use["PDERSPCA"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

   // Multi-dimensional likelihood estimator using self-adapting phase-space binning
   if (Use["PDEFoam"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                           "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );

   if (Use["PDEFoamBoost"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost",
                           "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN",
                           "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // H-Matrix (chi2-squared) method
   if (Use["HMatrix"])
      factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" );

   // Linear discriminant (same as Fisher discriminant)
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher discriminant (same as LD)
   if (Use["Fisher"])
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher with Gauss-transformed input variables
   if (Use["FisherG"])
      factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

   // Composite classifier: ensemble (tree) of boosted Fisher classifiers
   if (Use["BoostedFisher"])
      factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", 
                           "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" );

   // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

   if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   if (Use["FDA_MT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   if (Use["FDA_MCMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );


   // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
   if (Use["MLP"]){
     // factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );
     //factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=Norm:NCycles=600:HiddenLayers=N+5:TestRate=5" );
     // factory->BookMethod( TMVA::Types::kMLP, "MLPCE", "H:!V:NeuronType=sigmoid:VarTransform=Norm:NCycles=600:HiddenLayers=N+5:TestRate=5:EstimatorType=CE" );
     factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=sigmoid:VarTransform=Norm:NCycles=600:HiddenLayers=9:TestRate=5:EstimatorType=CE" );
     //   factory->BookMethod( TMVA::Types::kMLP, "MLPCE83", "H:!V:NeuronType=tanh:VarTransform=Norm:NCycles=600:HiddenLayers=8,3:TestRate=5:EstimatorType=CE" );
   }

   if (Use["MLPBFGS"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

   if (Use["MLPBNN"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators

   // CF(Clermont-Ferrand)ANN
   if (Use["CFMlpANN"])
      factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  

   // Tmlp(Root)ANN
   if (Use["TMlpANN"])
      factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDTG"]) { // Gradient Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                           "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5" );
      //  factory->BookMethod( TMVA::Types::kBDT, "BDTGI",
      //                     "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5:SeparationType=GiniIndexWithLaplace" );

      //     factory->BookMethod( TMVA::Types::kBDT, "BDTG6",
      //                      "!H:!V:NTrees=600:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.4:nCuts=20:NNodesMax=6" );
      //factory->BookMethod( TMVA::Types::kBDT, "BDTG2",
      //                    "!H:!V:NTrees=800:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.4:nCuts=20:NNodesMax=6" );
      factory->BookMethod( TMVA::Types::kBDT, "BDTG3",
                           "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.4:nCuts=20:NNodesMax=6" );
      // factory->BookMethod( TMVA::Types::kBDT, "BDTG4",
      //                     "!H:!V:NTrees=1200:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.4:nCuts=20:NNodesMax=6" );
      // factory->BookMethod( TMVA::Types::kBDT, "BDTG5",
      //                     "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.4:nCuts=20:NNodesMax=5" );

  }
   if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );


   if (Use["BDTB"]) // Bagging
      factory->BookMethod( TMVA::Types::kBDT, "BDTB",
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   if (Use["BDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTD",
                           "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );

   if (Use["BDTF"])  // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables
      factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher",
                           "!H:!V:NTrees=50:nEventsMin=150:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   // RuleFit -- TMVA implementation of Friedman's method
   if (Use["RuleFit"])
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                           "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

   // For an example of the category classifier usage, see: TMVAClassificationCategory

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

   // factory->OptimizeAllMethods("SigEffAt001","Scan");
   // factory->OptimizeAllMethods("ROCIntegral","GA");

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
   //  if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
예제 #22
0
void TMVAClassification( TString myMethodList = "" ) 
{

//    TString curDynamicPath( gSystem->GetDynamicPath() );
//    gSystem->SetDynamicPath( "/usr/local/bin/root/bin:" + curDynamicPath );

//    TString curIncludePath(gSystem->GetIncludePath());
//    gSystem->SetIncludePath( " -I /usr/local/bin/root/include " + curIncludePath );

//    // load TMVA shared library created in local release: for MAC OSX
//    if (TString(gSystem->GetBuildArch()).Contains("macosx") ) gSystem->Load( "libTMVA.so" );


   // gSystem->Load( "libTMVA" );
//   TMVA::Tools::Instance();

//    // welcome the user
//    TMVA::gTools().TMVAWelcomeMessage();
   
//    TMVAGlob::SetTMVAStyle();

//    // this loads the library
//    TMVA::Tools::Instance();

   //---------------------------------------------------------------
   // default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   Use["Cuts"]            = 1;
   // Use["Likelihood"]      = 1;
 
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;

   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 1;
      }
   }

   // Create a new root output file.
   TString outfileName( "TMVA.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory will
   // then run the performance analysis for you.
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/ 
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in 
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, 
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" );

   // If you wish to modify default settings 
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
   // factory->AddVariable( "myvar1 := var1+var2", 'F' );
   // factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' );
   // factory->AddVariable( "var3",                "Variable 3", "units", 'F' );
   // factory->AddVariable( "var4",                "Variable 4", "units", 'F' );

   factory->AddVariable("deltaEta := deta", 'F');
   factory->AddVariable("deltaPhi := dphi", 'F');
   factory->AddVariable("sigmaIetaIeta := sieie", 'F');
   factory->AddVariable("HoverE := hoe", 'F');
   factory->AddVariable("trackIso := trackiso", 'F');
   factory->AddVariable("ecalIso := ecaliso", 'F');
   factory->AddVariable("hcalIso := hcaliso", 'F');
   //factory->AddVariable("nMissingHits := misshits", 'I');


   // You can add so-called "Spectator variables", which are not used in the MVA training, 
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
   // input variables, the response values of all trained MVAs, and the spectator variables
   factory->AddSpectator( "et",  'F' );
   factory->AddSpectator( "eta",  'F' );
   factory->AddSpectator( "phi",  'F' );


   // read training and test data
    TFile *input = TFile::Open( "SigElectrons.root" );
    TFile *inputB = TFile::Open( "BkgElectrons.root" );

   std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl;
   
   TTree *signal     = (TTree*)input->Get("ntuple");
   TTree *background = (TTree*)inputB->Get("ntuple");
   
   factory->AddSignalTree    ( signal,     1.0 );
   factory->AddBackgroundTree( background, 1.0 );

   
   // This would set individual event weights (the variables defined in the 
   // expression need to exist in the original TTree)
   //    for signal    : factory->SetSignalWeightExpression("weight1*weight2");
   //    for background: factory->SetBackgroundWeightExpression("weight1*weight2");
   //factory->SetBackgroundWeightExpression("weight");

   // Apply additional cuts on the signal and background samples (can be different)

   TCut mycuts = ""; 
   TCut mycutb = ""; 



   // tell the factory to use all remaining events in the trees after training for testing:
   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

   // If no numbers of events are given, half of the events in the tree are used for training, and 
   // the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );  
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut, 
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );  

   // ---- Book MVA methods
   //
   // please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts", 
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );
   
   // Likelihood
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", 
                           "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); 



   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
    factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
    factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();    

   // --------------------------------------------------------------
   
   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;      

   delete factory;

//   gROOT->ProcessLine(".x /usr/local/bin/root/tmva/test/correlations.C");
   gROOT->ProcessLine(".x /usr/local/bin/root/tmva/test/variables.C");
}
//require mumucl>0.6
//opening angle >10
//coplanarity >90
//pang<90
void TMVAClassification_cc1pcoh_bdt_ver6noveract( TString myMethodList = "" )
{
    //---------------------------------------------------------------
    // This loads the library
    TMVA::Tools::Instance();

    // to get access to the GUI and all tmva macros
    TString thisdir = gSystem->DirName(gInterpreter->GetCurrentMacroName());
    gROOT->SetMacroPath(thisdir + ":" + gROOT->GetMacroPath());
    gROOT->ProcessLine(".L TMVAGui.C");

    // Default MVA methods to be trained + tested
    std::map<std::string,int> Use;

    // --- Cut optimisation
    Use["Cuts"]            = 1;
    Use["CutsD"]           = 1;
    Use["CutsPCA"]         = 0;
    Use["CutsGA"]          = 0;
    Use["CutsSA"]          = 0;
    //
    // --- 1-dimensional likelihood ("naive Bayes estimator")
    Use["Likelihood"]      = 1;
    Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
    Use["LikelihoodPCA"]   = 1; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
    Use["LikelihoodKDE"]   = 0;
    Use["LikelihoodMIX"]   = 0;
    //
    // --- Mutidimensional likelihood and Nearest-Neighbour methods
    Use["PDERS"]           = 1;
    Use["PDERSD"]          = 0;
    Use["PDERSPCA"]        = 0;
    Use["PDEFoam"]         = 1;
    Use["PDEFoamBoost"]    = 0; // uses generalised MVA method boosting
    Use["KNN"]             = 1; // k-nearest neighbour method
    //
    // --- Linear Discriminant Analysis
    Use["LD"]              = 1; // Linear Discriminant identical to Fisher
    Use["Fisher"]          = 0;
    Use["FisherG"]         = 0;
    Use["BoostedFisher"]   = 0; // uses generalised MVA method boosting
    Use["HMatrix"]         = 0;
    //
    // --- Function Discriminant analysis
    Use["FDA_GA"]          = 1; // minimisation of user-defined function using Genetics Algorithm
    Use["FDA_SA"]          = 0;
    Use["FDA_MC"]          = 0;
    Use["FDA_MT"]          = 0;
    Use["FDA_GAMT"]        = 0;
    Use["FDA_MCMT"]        = 0;
    //
    // --- Neural Networks (all are feed-forward Multilayer Perceptrons)
    Use["MLP"]             = 0; // Recommended ANN
    Use["MLPBFGS"]         = 0; // Recommended ANN with optional training method
    Use["MLPBNN"]          = 1; // Recommended ANN with BFGS training method and bayesian regulator
    Use["CFMlpANN"]        = 0; // Depreciated ANN from ALEPH
    Use["TMlpANN"]         = 0; // ROOT's own ANN
    //
    // --- Support Vector Machine
    Use["SVM"]             = 1;
    //
    // --- Boosted Decision Trees
    Use["BDT"]             = 1; // uses Adaptive Boost
    Use["BDTG"]            = 0; // uses Gradient Boost
    Use["BDTB"]            = 0; // uses Bagging
    Use["BDTD"]            = 0; // decorrelation + Adaptive Boost
    Use["BDTF"]            = 0; // allow usage of fisher discriminant for node splitting
    //
    // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
    Use["RuleFit"]         = 1;
    
    // ---------------------------------------------------------------
    // Choose method
    std::cout << std::endl;
    std::cout << "==> Start TMVAClassification" << std::endl;

    // Select methods (don't look at this code - not of interest)
    if (myMethodList != "") {
        for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

        std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
        for (UInt_t i=0; i<mlist.size(); i++) {
            std::string regMethod(mlist[i]);

            if (Use.find(regMethod) == Use.end()) {
                std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
                for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
                std::cout << std::endl;
                return;
            }
            Use[regMethod] = 1;
        }
    }

    // ---------------------------------------------------------------
    // --- Here the preparation phase begins

    // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
    TString outfileName( "TMVA_cc1pcoh_bdt_ver6noveract.root" );//newchange
    TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

    // Create the factory object.
    TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification_ver6noveract", outputFile,//newchange
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );

    
    // Add variable
    //sprintf(select,  "Ntrack==2&&mumucl>0.6&&pmucl>0.25&&pang<90&&muang_t<15 && veract*7.66339869e-2<34");
    //factory->AddVariable( "Ntrack", 'F' );
    factory->AddVariable( "mumucl", 'F' );
    factory->AddVariable( "pmucl", 'F' );
    factory->AddVariable( "pang_t", 'F' );//use pang instead of pang_t
    factory->AddVariable( "muang_t", 'F' );
    //factory->AddVariable( "veract", 'F' );
    factory->AddVariable( "ppe", 'F');
    factory->AddVariable( "mupe", 'F');
    factory->AddVariable( "range", 'F');
    factory->AddVariable( "coplanarity", 'F');
    factory->AddVariable( "opening", 'F');//newadd

    // Add spectator
    factory->AddSpectator( "fileIndex", 'I' );
    factory->AddSpectator( "nuE", 'F' );
    factory->AddSpectator( "inttype", 'I' );
    factory->AddSpectator( "norm", 'F' );
    factory->AddSpectator( "totcrsne", 'F' );
    factory->AddSpectator( "veract", 'F' );
    factory->AddSpectator( "pang", 'F' );
    factory->AddSpectator( "mupdg", 'I' );
    factory->AddSpectator( "ppdg", 'I' );

    // ---------------------------------------------------------------
    // --- Get weight
    TString fratioStr="/home/kikawa/macros/nd34_tuned_11bv3.1_250ka.root";
    
    
    
    // ---------------------------------------------------------------
    // --- Add sample
    TString fsignalStr="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmva/pm_merged_ccqe_tot.root";
    TString fbarStr="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmva/pmbar_merged_ccqe.root";
    TString fbkgStr="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmva/wall_merged_ccqe_tot.root";
    TString fbkg2Str="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmva/ingrid_merged_nd3_ccqe_tot.root";
    /*TString fsignalStr="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmvafix/pm_merged_ccqe_tot.root";
    TString fbarStr="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmvafix/pmbar_merged_ccqe.root";
    TString fbkgStr="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmvafix/wall_merged_ccqe_tot.root";
    TString fbkg2Str="/home/cvson/cc1picoh/frkikawa/meAna/ip4tmvafix/ingrid_merged_nd3_ccqe_tot.root";*/

    
    TFile *pfileSignal = new TFile(fsignalStr);
    TFile *pfileBar = new TFile(fbarStr);
    TFile *pfileBkg = new TFile(fbkgStr);
    TFile *pfileBkg2 = new TFile(fbkg2Str);
    TFile *pfileRatio = new TFile(fratioStr);
    
    TTree *ptree_sig  = (TTree*)pfileSignal->Get("tree");
    TTree *ptree_bar  = (TTree*)pfileBar->Get("tree");
    TTree *ptree_bkg   = (TTree*)pfileBkg->Get("tree");
    TTree *ptree_bkg2  = (TTree*)pfileBkg2->Get("tree");
    
    // POT normalization
    const int   nmcFile  = 3950;
    const int   nbarFile  = 986;
    const int   nbkgFile  = 55546;//(31085+24461);
    const int   nbkg2File  = 7882;//(3941+3941);
    

   
    // global event weights per tree (see below for setting event-wise weights)
    // adding for signal sample
    // using this as standard and add other later
    Double_t signalWeight_sig     = 1.0;
    Double_t backgroundWeight_sig = 1.0;
   
    factory->AddSignalTree    ( ptree_sig,     signalWeight_sig );
    factory->AddBackgroundTree( ptree_sig, backgroundWeight_sig );
    
    // Add Numubar sample
    //Double_t signalWeight_bar     = nmcFile/float(nbarFile);
    Double_t backgroundWeight_bar = nmcFile/float(nbarFile);
    
    //factory->AddSignalTree    ( ptree_bar,     signalWeight_bar );
    factory->AddBackgroundTree( ptree_bar, backgroundWeight_bar );
    
    // Add wall background
    //Double_t signalWeight_bkg     = nmcFile/float(nbkgFile);
    Double_t backgroundWeight_bkg = nmcFile/float(nbkgFile);
    
    //factory->AddSignalTree    ( ptree_bkg,     signalWeight_bkg );
    factory->AddBackgroundTree( ptree_bkg, backgroundWeight_bkg );
    
    // Add INGRID background
    //Double_t signalWeight_bkg2     = nmcFile/float(nbkg2File);
    Double_t backgroundWeight_bkg2 = nmcFile/float(nbkg2File);
    
    //factory->AddSignalTree    ( ptree_bkg2,     signalWeight_bkg2 );
    factory->AddBackgroundTree( ptree_bkg2, backgroundWeight_bkg2 );
    
   
   
    //factory->SetSignalWeightExpression    ("norm*totcrsne*2.8647e-13");
    //factory->SetBackgroundWeightExpression( "norm*totcrsne*2.8647e-13" );

   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = "Ntrack==2 && abs(inttype)==16 && fileIndex==1 && pang<90 && mumucl>0.6 && opening>10 && coplanarity>90 && pmucl>0.2"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   TCut mycutb = "Ntrack==2 && (abs(inttype)!=16 || fileIndex>1) && pang<90 && mumucl>0.6 && opening>10 && coplanarity>90 && pmucl>0.2"; // for example: TCut mycutb = "abs(var1)<0.5";

   // Tell the factory how to use the training and testing events
   //
   // If no numbers of events are given, half of the events in the tree are used 
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut,
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );
   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

   // ---- Book MVA methods
   //
   // Please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["CutsD"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsD",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

   if (Use["CutsPCA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

   if (Use["CutsGA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                           "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );

   if (Use["CutsSA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                           "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   // Likelihood ("naive Bayes estimator")
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                           "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

   // Decorrelated likelihood
   if (Use["LikelihoodD"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
                           "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

   // PCA-transformed likelihood
   if (Use["LikelihoodPCA"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 

   // Use a kernel density estimator to approximate the PDFs
   if (Use["LikelihoodKDE"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 

   // Use a variable-dependent mix of splines and kernel density estimator
   if (Use["LikelihoodMIX"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
                           "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

   // Test the multi-dimensional probability density estimator
   // here are the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSD"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

   if (Use["PDERSPCA"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

   // Multi-dimensional likelihood estimator using self-adapting phase-space binning
   if (Use["PDEFoam"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                           "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );

   if (Use["PDEFoamBoost"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost",
                           "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN",
                           "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // H-Matrix (chi2-squared) method
   if (Use["HMatrix"])
      factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" );

   // Linear discriminant (same as Fisher discriminant)
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher discriminant (same as LD)
   if (Use["Fisher"])
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher with Gauss-transformed input variables
   if (Use["FisherG"])
      factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

   // Composite classifier: ensemble (tree) of boosted Fisher classifiers
   if (Use["BoostedFisher"])
      factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", 
                           "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" );

   // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

   if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   if (Use["FDA_MT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   if (Use["FDA_MCMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

   // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );

   if (Use["MLPBFGS"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

   if (Use["MLPBNN"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators

   // CF(Clermont-Ferrand)ANN
   if (Use["CFMlpANN"])
      factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  

   // Tmlp(Root)ANN
   if (Use["TMlpANN"])
      factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDTG"]) // Gradient Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                           "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" );

   /*if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );*/
    if (Use["BDT"])  // Adaptive Boost
        factory->BookMethod( TMVA::Types::kBDT, "BDT",
                            "!H:!V:NTrees=850:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20" );

   if (Use["BDTB"]) // Bagging
      factory->BookMethod( TMVA::Types::kBDT, "BDTB",
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" );

   if (Use["BDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTD",
                           "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" );

   if (Use["BDTF"])  // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables
      factory->BookMethod( TMVA::Types::kBDT, "BDTMitFisher",
                           "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" );

   // RuleFit -- TMVA implementation of Friedman's method
   if (Use["RuleFit"])
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                           "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

   // For an example of the category classifier usage, see: TMVAClassificationCategory

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

   // ---- STILL EXPERIMENTAL and only implemented for BDT's ! 
   // factory->OptimizeAllMethods("SigEffAt001","Scan");
   // factory->OptimizeAllMethods("ROCIntegral","FitGA");

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

    
   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();

   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

   delete factory;

   // Launch the GUI for the root macros
   //if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
예제 #24
0
void TMVA_stop( TString signal_name = "T2tt", int train_region = 1, float x_parameter = 0.25)
{
   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVA_stop.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //
   // if you like to use a method via the plugin mechanism, we recommend using
   //
   // mylinux~> root -l TMVA_stop.C\(\"P_myMethod\"\)
   // (an example is given for using the BDT as plugin (see below),
   // but of course the real application is when you write your own
   // method based)

  //-----------------------------------------------------
  // define event selection (store in TCut sel)
  //-----------------------------------------------------

  TCut njets4("mini_njets>=4");
  TCut met100("mini_met>=100");
  TCut mt120("mini_mt>=120");
  TCut nb1("mini_nb>=1");
  TCut isotrk("mini_passisotrk==1");
  TCut lep_pt30("mini_nlep>=1 && mini_lep1pt>30.0");
  TCut sig("mini_sig==1");
   
  TCut  sel0  = njets4 + met100 + mt120 + nb1 + isotrk + lep_pt30 + sig;

  cout << "Using selection      : " << sel0.GetTitle() << endl;
  cout << "Doing signal point   : " << train_region       << endl;

  //-----------------------------------------------------
  // choose which variables to include in MVA training
  //-----------------------------------------------------
  
  std::map<std::string,int> mvaVar;
  mvaVar[ "met" ]			= 1;
  mvaVar[ "lep1pt" ]  	    = 0;
  mvaVar[ "mt2w" ]	  		= 1;
  mvaVar[ "htratiom" ]	    = 1;
  mvaVar[ "chi2" ]	        = 1;
  mvaVar[ "dphimjmin" ]		= 1;
  mvaVar[ "pt_b" ]			= 0;
  mvaVar[ "nb" ]			= 0;
  mvaVar[ "pt_J1" ]			= 0;
  mvaVar[ "pt_J2" ]			= 0;
  mvaVar[ "rand" ]			= 0;

  mvaVar[ "mt" ]			= 0;
  mvaVar[ "mt2bl" ]			= 0;
  mvaVar[ "mt2b" ]			= 0;
  mvaVar[ "lep1eta" ]			= 0;
  mvaVar[ "thrjetlm" ]			= 0;
  mvaVar[ "apljetlm" ]			= 0;
  mvaVar[ "sphjetlm" ]			= 0;
  mvaVar[ "cirjetlm" ]			= 0;
  mvaVar[ "chi2min" ]			= 0;
  mvaVar[ "chi2min_mt2b" ]		= 0;
  mvaVar[ "chi2min_mt2bl" ]		= 0;
  mvaVar[ "chi2min_mt2w" ]		= 0;
  mvaVar[ "mt2bmin" ]			= 0;
  mvaVar[ "mt2blmin" ]			= 0;
  mvaVar[ "mt2wmin_chi2" ]		= 0;
  mvaVar[ "mt2bmin_chi2" ]		= 0;
  mvaVar[ "mt2blmin_chi2" ]		= 0;
  mvaVar[ "mt2wmin_chi2prob" ]		= 0;
  mvaVar[ "mt2bmin_chi2prob" ]		= 0;
  mvaVar[ "mt2blmin_chi2prob" ]		= 0;
  mvaVar[ "htratiol" ]              	= 0;
  mvaVar[ "dphimj1" ]			= 0;
  mvaVar[ "dphimj2" ]			= 0;
  mvaVar[ "metsig" ]			= 0;

  //---------------------------------
  //choose bkg samples to include
  //---------------------------------
  cout << "Background trees: " << endl;
  int n_backgrounds = 8;

  TString backgrounds[] = {"ttdl_powheg", "ttsl_powheg", "w1to4jets", "tW_lep", "triboson", "diboson", "ttV", "DY1to4Jtot" };

  TString bkgPath = "/nfs-3/userdata/stop/Train/V00-02-18__V00-03-00_4jetsMET100_bkg/";

  TChain* chBackground = new TChain("t");
 
  for (int i = 0; i < n_backgrounds; i++) {
     TString backgroundChain = bkgPath + "/" + backgrounds[i] + ".root";
     cout << "    " << backgroundChain << endl;
     chBackground ->Add(backgroundChain );
  }

  //---------------------------------
  //choose signal sample to include
  //---------------------------------
  cout << "Signal trees: " << endl;
  TString s_train_region = "";
  s_train_region += train_region;
  TString s_x_parameter = "";
  s_x_parameter = Form("%.2f",x_parameter);

  TString signalPath = "/nfs-3/userdata/stop/Train/";
  TString signalVersion = "V00-02-18__V00-03-00_4jetsMET100_";

  TChain *chSignal = new TChain("t");

  TString base_name = signalPath + "/" + signalVersion + signal_name + "/" + signal_name + "_" + s_train_region;
  if (signal_name == "T2bw") base_name = base_name + "_" + s_x_parameter;
  TString signalChain  = base_name + ".root" ;

  cout << "    " << signalChain << endl;

  chSignal->Add(signalChain);

  //-----------------------------------------------------
  // choose backgrounds to include for multiple outputs
  //-----------------------------------------------------
  
  // bool doMultipleOutputs = false;

  // TChain *chww = new TChain("Events");
  // chww->Add(Form("%s/WWTo2L2Nu_PU_testFinal_baby.root",babyPath));
  // chww->Add(Form("%s/GluGluToWWTo4L_PU_testFinal_baby.root",babyPath));
  
  // TChain *chwjets = new TChain("Events");
  // chwjets->Add(Form("%s/WJetsToLNu_PU_testFinal_baby.root",babyPath));
  
  // TChain *chtt = new TChain("Events");
  // chtt->Add(Form("%s/TTJets_PU_testFinal_baby.root",babyPath));
  
  // std::map<std::string,int> includeBkg;
  // includeBkg["ww"]      = 1;
  // includeBkg["wjets"]   = 0;
  // includeBkg["tt"]      = 0;

   //---------------------------------------------------------------
   // This loads the library
   TMVA::Tools::Instance();

   // Default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   // --- Cut optimisation
   Use["Cuts"]            = 0;
   Use["CutsD"]           = 0;
   Use["CutsPCA"]         = 0;
   Use["CutsGA"]          = 0;
   Use["CutsSA"]          = 0;
   // 
   // --- 1-dimensional likelihood ("naive Bayes estimator")
   Use["Likelihood"]      = 0;
   Use["LikelihoodD"]     = 0; // the "D" extension indicates decorrelated input variables (see option strings)
   Use["LikelihoodPCA"]   = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
   Use["LikelihoodKDE"]   = 0;
   Use["LikelihoodMIX"]   = 0;
   //
   // --- Mutidimensional likelihood and Nearest-Neighbour methods
   Use["PDERS"]           = 0;
   Use["PDERSD"]          = 0;
   Use["PDERSPCA"]        = 0;
   Use["PDEFoam"]         = 0;
   Use["PDEFoamBoost"]    = 0; // uses generalised MVA method boosting
   Use["KNN"]             = 0; // k-nearest neighbour method
   //
   // --- Linear Discriminant Analysis
   Use["LD"]              = 0; // Linear Discriminant identical to Fisher
   Use["Fisher"]          = 0;
   Use["FisherG"]         = 0;
   Use["BoostedFisher"]   = 0; // uses generalised MVA method boosting
   Use["HMatrix"]         = 0;
   //
   // --- Function Discriminant analysis
   Use["FDA_GA"]          = 0; // minimisation of user-defined function using Genetics Algorithm
   Use["FDA_SA"]          = 0;
   Use["FDA_MC"]          = 0;
   Use["FDA_MT"]          = 0;
   Use["FDA_GAMT"]        = 0;
   Use["FDA_MCMT"]        = 0;
   //
   // --- Neural Networks (all are feed-forward Multilayer Perceptrons)
   Use["MLP"]             = 0; // Recommended ANN
   Use["MLPBFGS"]         = 0; // Recommended ANN with optional training method
   Use["MLPBNN"]          = 0; // Recommended ANN with BFGS training method and bayesian regulator
   Use["CFMlpANN"]        = 0; // Depreciated ANN from ALEPH
   Use["TMlpANN"]         = 0; // ROOT's own ANN
   //
   // --- Support Vector Machine 
   Use["SVM"]             = 0;
   // 
   // --- Boosted Decision Trees
   Use["BDT"]             = 1; // uses Adaptive Boost
   Use["BDT1"]            = 0; // uses Adaptive Boost
   Use["BDTG"]            = 0; // uses Gradient Boost
   Use["BDTB"]            = 0; // uses Bagging
   Use["BDTD"]            = 0; // decorrelation + Adaptive Boost
   // 
   // --- Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
   Use["RuleFit"]         = 0;
   //
   // --- multi-output MVA's
   Use["multi_BDTG"]      = 0;
   Use["multi_MLP"]       = 0;
   Use["multi_FDA_GA"]    = 0;

   //
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;

   // --- Here the preparation phase begins

   // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
   TString outfileName = "TMVA_" + signal_name + "_" + s_train_region;
   if (signal_name == "T2bw") outfileName = outfileName +"_" + s_x_parameter;
   outfileName += ".root";
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   TString classification_name = "classification_" + signal_name + "_" + s_train_region;
   if (signal_name == "T2bw") classification_name = classification_name +"_" + s_x_parameter;

   /*
   TString multioutfileName( "TMVA_HWW_multi.root" );
   TFile* multioutputFile;

   if( doMultipleOutputs )
     multioutputFile = TFile::Open( multioutfileName, "RECREATE" );
   */

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory is 
   // the only TMVA object you have to interact with
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( classification_name, outputFile,
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );
   /*
   TMVA::Factory *multifactory;
   if( doMultipleOutputs )
     multifactory= new TMVA::Factory( "TMVAMulticlass", multioutputFile,
                                      "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=multiclass" );
   */
   
   // If you wish to modify default settings
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
   //factory->AddVariable( "myvar1 := var1+var2", 'F' );
   //factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' );
   //factory->AddVariable( "var3",                "Variable 3", "units", 'F' );
   //factory->AddVariable( "var4",                "Variable 4", "units", 'F' );

   //--------------------------------------------------------
   // choose which variables to include in training
   //--------------------------------------------------------

   if( mvaVar[ "met"           ]  == 1 ) factory->AddVariable( "mini_met"                    ,  "E_{T}^{miss}"               ,       "GeV", 'F' );
   if( mvaVar[ "mt"            ]  == 1 ) factory->AddVariable( "mini_mt"                     ,  "M_{T}"                      ,       "GeV", 'F' );
   if( mvaVar[ "mt2w"          ]  == 1 ) factory->AddVariable( "mini_mt2w"                   ,  "MT2W"                       ,       "GeV", 'F' );
   if( mvaVar[ "mt2bl"         ]  == 1 ) factory->AddVariable( "mini_mt2bl"                  ,  "MT2bl"                      ,       "GeV", 'F' );
   if( mvaVar[ "mt2b"          ]  == 1 ) factory->AddVariable( "mini_mt2b"                   ,  "MT2b"                       ,       "GeV", 'F' );
   if( mvaVar[ "chi2"          ]  == 1 ) factory->AddVariable( "mini_chi2"                   ,  "chi2"                       ,       ""   , 'F' );
   if( mvaVar[ "lep1pt"        ]  == 1 ) factory->AddVariable( "mini_lep1pt"                 ,  "lepton pt"                  ,       ""   , 'F' );
   if( mvaVar[ "lep1eta"       ]  == 1 ) factory->AddVariable( "mini_lep1eta"                ,  "lepton eta"                 ,       ""   , 'F' );
   if( mvaVar[ "thrjetlm"      ]  == 1 ) factory->AddVariable( "mini_thrjetlm"               ,  "thrust"                     ,       ""   , 'F' );
   if( mvaVar[ "apljetlm"      ]  == 1 ) factory->AddVariable( "mini_apljetlm"               ,  "aplanarity"                 ,       ""   , 'F' );
   if( mvaVar[ "sphjetlm"      ]  == 1 ) factory->AddVariable( "mini_sphjetlm"               ,  "sphericity"                 ,       ""   , 'F' );
   if( mvaVar[ "cirjetlm"      ]  == 1 ) factory->AddVariable( "mini_cirjetlm"               ,  "circularity"                ,       ""   , 'F' );
   if( mvaVar[ "chi2min"       ]  == 1 ) factory->AddVariable( "mini_min(chi2min,100)"       ,  "#chi^{2}_{min}"             ,       ""   , 'F' );
   if( mvaVar[ "chi2minprob"   ]  == 1 ) factory->AddVariable( "mini_chi2minprob"            ,  "Prob(#chi^{2}_{min})"       ,       ""   , 'F' );
   if( mvaVar[ "chi2min_mt2b"  ]  == 1 ) factory->AddVariable( "mini_chi2min_mt2b"           ,  "MT2b(#chi^{2}_{min})"       ,       ""   , 'F' );
   if( mvaVar[ "chi2min_mt2bl" ]  == 1 ) factory->AddVariable( "mini_chi2min_mt2bl"          ,  "MT2bl(#chi^{2}_{min})"      ,       ""   , 'F' );
   if( mvaVar[ "chi2min_mt2w"  ]  == 1 ) factory->AddVariable( "mini_chi2min_mt2w"           ,  "MT2W(#chi^{2}_{min})"       ,       ""   , 'F' );
   if( mvaVar[ "mt2bmin"       ]  == 1 ) factory->AddVariable( "mini_mt2bmin"                ,  "MT2b_{min}"                 ,       ""   , 'F' );
   if( mvaVar[ "mt2blmin"      ]  == 1 ) factory->AddVariable( "mini_mt2blmin"               ,  "MT2bl_{min}"                ,       ""   , 'F' );
   if( mvaVar[ "mt2wmin"       ]  == 1 ) factory->AddVariable( "mini_mt2wmin"                ,  "MT2W_{min}"                 ,       ""   , 'F' );
   if( mvaVar[ "mt2bmin_chi2"  ]  == 1 ) factory->AddVariable( "min(mt2bmin_chi2,100)"  ,  "#chi^{2}(MT2b_{min})"       ,       ""   , 'F' );
   if( mvaVar[ "mt2blmin_chi2" ]  == 1 ) factory->AddVariable( "min(mt2blmin_chi2,100)" ,  "#chi^{2}(MT2bl_{min})"      ,       ""   , 'F' );
   if( mvaVar[ "mt2wmin_chi2"  ]  == 1 ) factory->AddVariable( "min(mt2wmin_chi2,100)"  ,  "#chi^{2}(MT2W_{min})"       ,       ""   , 'F' );
   if( mvaVar[ "mt2bmin_chi2prob"  ]  == 1 ) factory->AddVariable( "mt2bmin_chi2prob"   ,  "Prob(#chi^{2}(MT2b_{min}))"       ,       ""   , 'F' );
   if( mvaVar[ "mt2blmin_chi2prob" ]  == 1 ) factory->AddVariable( "mt2blmin_chi2prob"  ,  "Prob(#chi^{2}(MT2bl_{min}))"      ,       ""   , 'F' );
   if( mvaVar[ "mt2wmin_chi2prob"  ]  == 1 ) factory->AddVariable( "mt2wmin_chi2prob"   ,  "Prob(#chi^{2}(MT2W_{min}))"       ,       ""   , 'F' );
   if( mvaVar[ "htratiol"      ]  == 1 ) factory->AddVariable( "mini_htssl/(mini_htosl+mini_htssl)"    ,  "H_{T}^{SSL}/H_{T}"          ,       ""   , 'F' );
   if( mvaVar[ "htratiom"      ]  == 1 ) factory->AddVariable( "mini_htssm/(mini_htosm+mini_htssm)"    ,  "H_{T}^{SSM}/H_{T}"          ,       ""   , 'F' );
   if( mvaVar[ "dphimj1"       ]  == 1 ) factory->AddVariable( "mini_dphimj1"                ,  "#Delta#phi(j1,E_{T}^{miss})",       ""   , 'F' );
   if( mvaVar[ "dphimj2"       ]  == 1 ) factory->AddVariable( "mini_dphimj2"                ,  "#Delta#phi(j2,E_{T}^{miss})",       ""   , 'F' );
   if( mvaVar[ "dphimjmin"     ]  == 1 ) factory->AddVariable( "mini_dphimjmin"              ,  "min(#Delta#phi(j_{1,2},E_{T}^{miss}))",       ""   , 'F' );
   if( mvaVar[ "rand"          ]  == 1 ) factory->AddVariable( "mini_rand"                   ,  "random(0,1)"                ,       ""   , 'F' );
   if( mvaVar[ "metsig"        ]  == 1 ) factory->AddVariable( "met/sqrt(htosl+htssl)"  ,  "E_{T}^{miss}/#sqrt{H_{T}}"  ,       "#sqrt{GeV}"   , 'F' )
;
   if( mvaVar[ "pt_b"          ]  == 1 ) factory->AddVariable( "mini_pt_b"  ,       "P_T(b) GeV"   , 'F' );
   if( mvaVar[ "nb"            ]  == 1 ) factory->AddVariable( "mini_nb"  ,       "P_T(b) GeV"   , 'F' );
   if( mvaVar[ "pt_J1"          ]  == 1 ) factory->AddVariable( "pt_J1"  ,       "P_T(J1) GeV"   , 'F' );
   if( mvaVar[ "pt_J2"          ]  == 1 ) factory->AddVariable( "pt_J2"  ,       "P_T(J2) GeV"   , 'F' );
   
   /*
   if( doMultipleOutputs ){
     if (mvaVar["lephard_pt"])       multifactory->AddVariable( "lephard_pt",                 "1st lepton pt",                "GeV", 'F' );
     if (mvaVar["lepsoft_pt"])       multifactory->AddVariable( "lepsoft_pt",                 "2nd lepton pt",                "GeV", 'F' );
     if (mvaVar["dil_dphi"])         multifactory->AddVariable( "dil_dphi",                   "dphi(ll)",                     "",    'F' );
     if (mvaVar["dil_mass"])         multifactory->AddVariable( "dil_mass",                   "M(ll)",                        "GeV", 'F' );
     if (mvaVar["event_type"])       multifactory->AddVariable( "event_type",                 "Dil Flavor Type",              "",    'F' );
     if (mvaVar["met_projpt"])       multifactory->AddVariable( "met_projpt",                 "Proj. MET",                    "GeV", 'F' );
     if (mvaVar["met_pt"])           multifactory->AddVariable( "met_pt",                     "MET",                          "GeV", 'F' );
     if (mvaVar["mt_lephardmet"])    multifactory->AddVariable( "mt_lephardmet",              "MT(lep1,MET)",                 "GeV", 'F' );
     if (mvaVar["mt_lepsoftmet"])    multifactory->AddVariable( "mt_lepsoftmet",              "MT(lep2,MET)",                 "GeV", 'F' );
     if (mvaVar["mthiggs"])          multifactory->AddVariable( "mthiggs",                    "MT(Higgs)",                    "GeV", 'F' );
     if (mvaVar["dphi_lephardmet"])  multifactory->AddVariable( "dphi_lephardmet",            "dphi(lep1,MET)",               "GeV", 'F' );
     if (mvaVar["dphi_lepsoftmet"])  multifactory->AddVariable( "dphi_lepsoftmet",            "dphi(lep2,MET)",               "GeV", 'F' );
     if (mvaVar["lepsoft_fbrem"])    multifactory->AddVariable( "lepsoft_fbrem",              "2nd lepton f_{brem}",          "",    'F' );
     if (mvaVar["lepsoft_eOverPIn"]) multifactory->AddVariable( "lepsoft_eOverPIn",           "2nd lepton E/p",               "",    'F' );
     if (mvaVar["lepsoft_qdphi"])    multifactory->AddVariable( "lepsoft_q * lepsoft_dPhiIn", "2nd lepton q#times#Delta#phi", "",    'F' );
   }
   */

   // You can add so-called "Spectator variables", which are not used in the MVA training,
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
   // input variables, the response values of all trained MVAs, and the spectator variables
   //factory->AddSpectator( "spec1 := var1*2",  "Spectator 1", "units", 'F' );
   //factory->AddSpectator( "spec2 := var1*3",  "Spectator 2", "units", 'F' );

//   TTree* signalTrainingTree =  (TTree*) chSignalTrain;
//   TTree* signalTestTree =  (TTree*) chSignalTest;
//
//   TTree* bkgTrainingTree =  (TTree*) chBkgTrain;
//   TTree* bkgTestTree =  (TTree*) chBkgTest;
   
//    std::cout << "--- TMVAClassification       : Using bkg input files: -------------------" <<  std::endl;
// 
//    TObjArray *listOfBkgFiles = chbackground->GetListOfFiles();
//    TIter bkgFileIter(listOfBkgFiles);
//    TChainElement* currentBkgFile = 0;
// 
//    while((currentBkgFile = (TChainElement*)bkgFileIter.Next())) {
//      std::cout << currentBkgFile->GetTitle() << std::endl;
//    }
// 
//    std::cout << "--- TMVAClassification       : Using sig input files: -------------------" <<  std::endl;
//    
//    TObjArray *listOfSigFiles = chsignal->GetListOfFiles();
//    TIter sigFileIter(listOfSigFiles);
//    TChainElement* currentSigFile = 0;
// 
//    while((currentSigFile = (TChainElement*)sigFileIter.Next())) {
//      std::cout << currentSigFile->GetTitle() << std::endl;
//    }

   // global event weights per tree (see below for setting event-wise weights)
   Double_t signalWeight     = 1.0;
   Double_t backgroundWeight = 1.0;

   // You can add an arbitrary number of signal or background trees
//   factory->AddSignalTree    ( chSignal,     signalWeight     );
//   factory->AddBackgroundTree( chBackground, backgroundWeight );

   factory->AddTree(chSignal, "Signal", signalWeight, sel0+"mini_rand < 0.5", "train");
   factory->AddTree(chSignal, "Signal", signalWeight, sel0+"mini_rand >= 0.5", "test");
   factory->AddTree(chBackground, "Background", backgroundWeight, sel0+"mini_rand < 0.5", "train");
   factory->AddTree(chBackground, "Background", backgroundWeight, sel0+"mini_rand >= 0.5", "test");
   
   // To give different trees for training and testing, do as follows:
   //factory->AddSignalTree( signalTrainingTree, signalWeight, "Training" );
   //factory->AddSignalTree( signalTestTree,     signalWeight,  "Test" );

   //factory->AddBackgroundTree( bkgTrainingTree, backgroundWeight, "Training" );
   //factory->AddBackgroundTree( bkgTestTree,     backgroundWeight,  "Test" );
   
   // Use the following code instead of the above two or four lines to add signal and background
   // training and test events "by hand"
   // NOTE that in this case one should not give expressions (such as "var1+var2") in the input
   //      variable definition, but simply compute the expression before adding the event
   //
   //     // --- begin ----------------------------------------------------------
   //     std::vector<Double_t> vars( 4 ); // vector has size of number of input variables
   //     Float_t  treevars[4], weight;
   //     
   //     // Signal
   //     for (UInt_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
   //     for (UInt_t i=0; i<signal->GetEntries(); i++) {
   //        signal->GetEntry(i);
   //        for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
   //        // add training and test events; here: first half is training, second is testing
   //        // note that the weight can also be event-wise
   //        if (i < signal->GetEntries()/2.0) factory->AddSignalTrainingEvent( vars, signalWeight );
   //        else                              factory->AddSignalTestEvent    ( vars, signalWeight );
   //     }
   //   
   //     // Background (has event weights)
   //     background->SetBranchAddress( "weight", &weight );
   //     for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
   //     for (UInt_t i=0; i<background->GetEntries(); i++) {
   //        background->GetEntry(i);
   //        for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
   //        // add training and test events; here: first half is training, second is testing
   //        // note that the weight can also be event-wise
   //        if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight*weight );
   //        else                                factory->AddBackgroundTestEvent    ( vars, backgroundWeight*weight );
   //     }
   //      // --- end ------------------------------------------------------------
   //
   // --- end of tree registration 
   
   // Set individual event weights (the variables must exist in the original TTree)
   factory->SetSignalWeightExpression    ("mini_weight");
   factory->SetBackgroundWeightExpression("mini_weight");

   /*
   if( doMultipleOutputs ){
     multifactory->AddTree(signal,"Signal");
     multifactory->SetSignalWeightExpression    ("event_scale1fb");
     multifactory->SetBackgroundWeightExpression("event_scale1fb");
     multifactory->SetWeightExpression("event_scale1fb");
     
     if( includeBkg["ww"] ){
       TTree* ww = (TTree*) chww;
       multifactory->AddTree(ww,"WW");
       cout << "Added WW to multi-MVA" << endl;
     }
     if( includeBkg["wjets"] ){
       TTree* wjets = (TTree*) chwjets;
       multifactory->AddTree(wjets,"WJets");
       cout << "Added W+jets to multi-MVA" << endl;
     }
     if( includeBkg["tt"] ){
       TTree* tt = (TTree*) chtt;
       multifactory->AddTree(tt,"tt");
       cout << "Added ttbar multi-MVA" << endl;
     }
   }
   */

   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = sel0; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   TCut mycutb = sel0; // for example: TCut mycutb = "abs(var1)<0.5";

   // Tell the factory how to use the training and testing events
   //
   // If no numbers of events are given, half of the events in the tree are used 
   // for training, and the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut,
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );
   
   //Use random splitting
//   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
//                                        "nTrain_Signal=100000:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
   factory->PrepareTrainingAndTestTree( "", "",
                                        "nTrain_Signal=0:nTrain_Background=0:NormMode=None:!V" );

   // if( doMultipleOutputs ){
   //   multifactory->PrepareTrainingAndTestTree( mycuts, mycutb,
   //                                             "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
   // }

   //Use alternate splitting 
   //(this is preferable since its easier to track which events were used for training, but the job crashes! need to fix this...)
   //factory->PrepareTrainingAndTestTree( mycuts, mycutb,
   //                                     "nTrain_Signal=0:nTrain_Background=0:SplitMode=Alternate:NormMode=NumEvents:!V" );

   // ---- Book MVA methods
   //
   // Please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["CutsD"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsD",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

   if (Use["CutsPCA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsPCA",
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

   if (Use["CutsGA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                           "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );

   if (Use["CutsSA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                           "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   // Likelihood ("naive Bayes estimator")
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                           "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

   // Decorrelated likelihood
   if (Use["LikelihoodD"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD",
                           "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );

   // PCA-transformed likelihood
   if (Use["LikelihoodPCA"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA",
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 

   // Use a kernel density estimator to approximate the PDFs
   if (Use["LikelihoodKDE"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE",
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 

   // Use a variable-dependent mix of splines and kernel density estimator
   if (Use["LikelihoodMIX"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX",
                           "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

   // Test the multi-dimensional probability density estimator
   // here are the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS",
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSD"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSD",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

   if (Use["PDERSPCA"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA",
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

   // Multi-dimensional likelihood estimator using self-adapting phase-space binning
   if (Use["PDEFoam"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam",
                           "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );

   if (Use["PDEFoamBoost"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoamBoost",
                           "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN",
                           "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );

   // H-Matrix (chi2-squared) method
   if (Use["HMatrix"])
      factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" );

   // Linear discriminant (same as Fisher discriminant)
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher discriminant (same as LD)
   if (Use["Fisher"])
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );

   // Fisher with Gauss-transformed input variables
   if (Use["FisherG"])
      factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

   // Composite classifier: ensemble (tree) of boosted Fisher classifiers
   if (Use["BoostedFisher"])
      factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", 
                           "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" );

   // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

   if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   if (Use["FDA_MT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   if (Use["FDA_MCMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

   // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" );

//      factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:VarTransform=N:NCycles=1000:HiddenLayers=N+N:TestRate=5:!UseRegulator:LearningRate=0.2:DecayRate=0.001:BPMode=batch:BatchSize=500"); 

   if (Use["MLPBFGS"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );

   if (Use["MLPBNN"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators

   // CF(Clermont-Ferrand)ANN
   if (Use["CFMlpANN"])
      factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  

   // Tmlp(Root)ANN
   if (Use["TMlpANN"])
      factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...

   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );

   // Boosted Decision Trees
   if (Use["BDTG"]) // Gradient Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTG",
                           "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:NNodesMax=5" );

   if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT",
                           "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   if (Use["BDT1"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT1",
                             "!H:!V:NTrees=200:nEventsMin=300:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=4:PruneMethod=NoPruning" );

   if (Use["BDTB"]) // Bagging
      factory->BookMethod( TMVA::Types::kBDT, "BDTB",
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   if (Use["BDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTD",
                           "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );

   // RuleFit -- TMVA implementation of Friedman's method
   if (Use["RuleFit"])
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                           "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

   // if( doMultipleOutputs ){
   //   if (Use["multi_BDTG"]) // gradient boosted decision trees
   //     multifactory->BookMethod( TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.50:nCuts=20:NNodesMax=8");
   //   if (Use["multi_MLP"]) // neural network
   //     multifactory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:NCycles=1000:HiddenLayers=N+5,5:TestRate=5:EstimatorType=MSE");
   //   if (Use["multi_FDA_GA"]) // functional discriminant with GA minimizer
   //     multifactory->BookMethod( TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );
   // }
   
   // For an example of the category classifier usage, see: TMVAClassificationCategory

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events

   // factory->OptimizeAllMethods("SigEffAt001","Scan");
   // factory->OptimizeAllMethods("ROCIntegral","GA");

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs
  
   // Train MVAs using the set of training events
   factory->TrainAllMethods();
  
   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();
  
   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();
  
   // if( doMultipleOutputs ){
   //   // Train nulti-MVAs using the set of training events
   //   multifactory->TrainAllMethods();
     
   //   // ---- Evaluate all multi-MVAs using the set of test events
   //   multifactory->TestAllMethods();
     
   //   // ----- Evaluate and compare performance of all configured multi-MVAs
   //   multifactory->EvaluateAllMethods();
   // }
   
   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();
   //if( doMultipleOutputs )  multioutputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;
  
   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
예제 #25
0
void Classification()
{
   TMVA::Tools::Instance();
   TMVA::PyMethodBase::PyInitialize();

   TString outfileName("TMVA.root");
   TFile *outputFile = TFile::Open(outfileName, "RECREATE");

   TMVA::Factory *factory = new TMVA::Factory("TMVAClassification", outputFile,
         "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification");


   factory->AddVariable("myvar1 := var1+var2", 'F');
   factory->AddVariable("myvar2 := var1-var2", "Expression 2", "", 'F');
   factory->AddVariable("var3",                "Variable 3", "units", 'F');
   factory->AddVariable("var4",                "Variable 4", "units", 'F');


   factory->AddSpectator("spec1 := var1*2",  "Spectator 1", "units", 'F');
   factory->AddSpectator("spec2 := var1*3",  "Spectator 2", "units", 'F');


   TString fname = "./tmva_class_example.root";

   if (gSystem->AccessPathName(fname))    // file does not exist in local directory
      gSystem->Exec("curl -O http://root.cern.ch/files/tmva_class_example.root");

   TFile *input = TFile::Open(fname);

   std::cout << "--- TMVAClassification       : Using input file: " << input->GetName() << std::endl;

   // --- Register the training and test trees

   TTree *tsignal     = (TTree *)input->Get("TreeS");
   TTree *tbackground = (TTree *)input->Get("TreeB");

   // global event weights per tree (see below for setting event-wise weights)
   Double_t signalWeight     = 1.0;
   Double_t backgroundWeight = 1.0;

   // You can add an arbitrary number of signal or background trees
   factory->AddSignalTree(tsignal,     signalWeight);
   factory->AddBackgroundTree(tbackground, backgroundWeight);


   // Set individual event weights (the variables must exist in the original TTree)
   factory->SetBackgroundWeightExpression("weight");


   // Apply additional cuts on the signal and background samples (can be different)
   TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5";

   // Tell the factory how to use the training and testing events
   factory->PrepareTrainingAndTestTree(mycuts, mycutb,
                                       "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=0:nTest_Background=0:SplitMode=Random:NormMode=NumEvents:!V");


   ///////////////////
   //Booking         //
   ///////////////////
   // Boosted Decision Trees

   //PyMVA methods
   factory->BookMethod(TMVA::Types::kPyRandomForest, "PyRandomForest",
                       "!V:NEstimators=150:Criterion=gini:MaxFeatures=auto:MaxDepth=3:MinSamplesLeaf=1:MinWeightFractionLeaf=0:Bootstrap=kTRUE");
   factory->BookMethod(TMVA::Types::kPyAdaBoost, "PyAdaBoost",
                       "!V:BaseEstimator=None:NEstimators=100:LearningRate=1:Algorithm=SAMME.R:RandomState=None");
   factory->BookMethod(TMVA::Types::kPyGTB, "PyGTB",
                       "!V:NEstimators=150:Loss=deviance:LearningRate=0.1:Subsample=1:MaxDepth=6:MaxFeatures='auto'");


   // Train MVAs using the set of training events
   factory->TrainAllMethods();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();
   // --------------------------------------------------------------

   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;

}
예제 #26
0
void TMVAClassificationCategory()
{
    //---------------------------------------------------------------
    // Example for usage of different event categories with classifiers

    std::cout << std::endl << "==> Start TMVAClassificationCategory" << std::endl;

    bool batchMode = false;

    // Create a new root output file.
    TString outfileName( "TMVA.root" );
    TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

    // Create the factory object (see TMVAClassification.C for more information)

    std::string factoryOptions( "!V:!Silent:Transformations=I;D;P;G,D" );
    if (batchMode) factoryOptions += ":!Color:!DrawProgressBar";

    TMVA::Factory *factory = new TMVA::Factory( "TMVAClassificationCategory", outputFile, factoryOptions );

    // Define the input variables used for the MVA training
    factory->AddVariable( "var1", 'F' );
    factory->AddVariable( "var2", 'F' );
    factory->AddVariable( "var3", 'F' );
    factory->AddVariable( "var4", 'F' );

    // You can add so-called "Spectator variables", which are not used in the MVA training,
    // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
    // input variables, the response values of all trained MVAs, and the spectator variables
    factory->AddSpectator( "eta" );

    // Load the signal and background event samples from ROOT trees
    TFile *input(0);
    TString fname( "" );
    if (UseOffsetMethod) fname = "data/toy_sigbkg_categ_offset.root";
    else                 fname = "data/toy_sigbkg_categ_varoff.root";
    if (!gSystem->AccessPathName( fname )) {
        // first we try to find tmva_example.root in the local directory
        std::cout << "--- TMVAClassificationCategory: Accessing " << fname << std::endl;
        input = TFile::Open( fname );
    }

    if (!input) {
        std::cout << "ERROR: could not open data file: " << fname << std::endl;
        exit(1);
    }

    TTree *signal     = (TTree*)input->Get("TreeS");
    TTree *background = (TTree*)input->Get("TreeB");

    /// Global event weights per tree (see below for setting event-wise weights)
    Double_t signalWeight     = 1.0;
    Double_t backgroundWeight = 1.0;

    /// You can add an arbitrary number of signal or background trees
    factory->AddSignalTree    ( signal,     signalWeight     );
    factory->AddBackgroundTree( background, backgroundWeight );

    // Apply additional cuts on the signal and background samples (can be different)
    TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
    TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5";

    // Tell the factory how to use the training and testing events
    factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                         "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );

    // ---- Book MVA methods

    // Fisher discriminant
    factory->BookMethod( TMVA::Types::kFisher, "Fisher", "!H:!V:Fisher" );

    // Likelihood
    factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood",
                         "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

    // --- Categorised classifier
    TMVA::MethodCategory* mcat = 0;

    // The variable sets
    TString theCat1Vars = "var1:var2:var3:var4";
    TString theCat2Vars = (UseOffsetMethod ? "var1:var2:var3:var4" : "var1:var2:var3");

    // Fisher with categories
    TMVA::MethodBase* fiCat = factory->BookMethod( TMVA::Types::kCategory, "FisherCat","" );
    mcat = dynamic_cast<TMVA::MethodCategory*>(fiCat);
    mcat->AddMethod( "abs(eta)<=1.3", theCat1Vars, TMVA::Types::kFisher, "Category_Fisher_1","!H:!V:Fisher" );
    mcat->AddMethod( "abs(eta)>1.3",  theCat2Vars, TMVA::Types::kFisher, "Category_Fisher_2","!H:!V:Fisher" );

    // Likelihood with categories
    TMVA::MethodBase* liCat = factory->BookMethod( TMVA::Types::kCategory, "LikelihoodCat","" );
    mcat = dynamic_cast<TMVA::MethodCategory*>(liCat);
    mcat->AddMethod( "abs(eta)<=1.3",theCat1Vars, TMVA::Types::kLikelihood,
                     "Category_Likelihood_1","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );
    mcat->AddMethod( "abs(eta)>1.3", theCat2Vars, TMVA::Types::kLikelihood,
                     "Category_Likelihood_2","!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );

    // ---- Now you can tell the factory to train, test, and evaluate the MVAs

    // Train MVAs using the set of training events
    factory->TrainAllMethods();

    // ---- Evaluate all MVAs using the set of test events
    factory->TestAllMethods();

    // ----- Evaluate and compare performance of all configured MVAs
    factory->EvaluateAllMethods();

    // --------------------------------------------------------------

    // Save the output
    outputFile->Close();

    std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
    std::cout << "==> TMVAClassificationCategory is done!" << std::endl;

    // Clean up
    delete factory;

    // Launch the GUI for the root macros
    if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
//void TMVAClassification( TString myMethodList = "" ) 
void Example_Eric( TString myMethodList = "" ) 
{
   // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc
   // if you use your private .rootrc, or run from a different directory, please copy the 
   // corresponding lines from .rootrc

   // methods to be processed can be given as an argument; use format:
   //
   // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\)
   //
   // if you like to use a method via the plugin mechanism, we recommend using
   // 
   // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\)
   // (an example is given for using the BDT as plugin (see below),
   // but of course the real application is when you write your own
   // method based)

   // this loads the library
   TMVA::Tools::Instance();

   //---------------------------------------------------------------
   // default MVA methods to be trained + tested
   std::map<std::string,int> Use;

   Use["Cuts"]            =0;
   Use["CutsD"]           =0;
   Use["CutsPCA"]         =0;
   Use["CutsGA"]          =0;
   Use["CutsSA"]          =0;
   // ---
   Use["Likelihood"]      =0;
   Use["LikelihoodD"]     =0; // the "D" extension indicates decorrelated input variables (see option strings)
   Use["LikelihoodPCA"]   =1; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
   Use["LikelihoodKDE"]   =0;
   Use["LikelihoodMIX"]   =0;
   // ---
   Use["PDERS"]           =0;
   Use["PDERSD"]          =0;
   Use["PDERSPCA"]        =0;
   Use["PDERSkNN"]        =0; // depreciated until further notice
   Use["PDEFoam"]         =0;
   // --
   Use["KNN"]             =0;
   // ---
   Use["HMatrix"]         =0;
   Use["Fisher"]          =0;
   Use["FisherG"]         =0;
   Use["BoostedFisher"]   =0;
   Use["LD"]              =0;
   // ---
   Use["FDA_GA"]          =0;
   Use["FDA_SA"]          =0;
   Use["FDA_MC"]          =0;
   Use["FDA_MT"]          =0;
   Use["FDA_GAMT"]        =0;
   Use["FDA_MCMT"]        =0;
   // ---
   Use["MLP"]             = 1; // this is the recommended ANN
   Use["MLPBFGS"]         = 0; // recommended ANN with optional training method
   Use["CFMlpANN"]        =0; // *** missing
   Use["TMlpANN"]         =0; 
   // ---
   Use["SVM"]             =1;
   // ---
   Use["BDT"]             =1;
   Use["BDTD"]            =0;
   Use["BDTG"]            =0;
   Use["BDTB"]            =0;
   // ---
   Use["RuleFit"]         =1;
   // ---
   Use["Plugin"]          =0;
   // ---------------------------------------------------------------

   std::cout << std::endl;
   std::cout << "==> Start TMVAClassification" << std::endl;

   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 1;
      }
   }

   // Create a new root output file.
   TString outfileName( "TMVA_Eric2.root" );
   TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

   // Create the factory object. Later you can choose the methods
   // whose performance you'd like to investigate. The factory will
   // then run the performance analysis for you.
   //
   // The first argument is the base of the name of all the
   // weightfiles in the directory weight/ 
   //
   // The second argument is the output file for the training results
   // All TMVA output can be suppressed by removing the "!" (not) in 
   // front of the "Silent" argument in the option string
   TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, 
                                               "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D" );

   // If you wish to modify default settings 
   // (please check "src/Config.h" to see all available global options)
   //    (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
   //    (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";

   // Define the input variables that shall be used for the MVA training
   // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
   // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
 //   factory->AddVariable( "myvar1 := var1+var2", 'F' );
//    factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' );
//    factory->AddVariable( "var3",                "Variable 3", "units", 'F' );
//    factory->AddVariable( "var4",                "Variable 4", "units", 'F' );

   factory->AddVariable( "Mqq := Mqq", 'F' );
   factory->AddVariable( "Pt_qq := Pt_qq", 'F' );
   factory->AddVariable( "Eta_qq := Eta_qq", 'F' );
   factory->AddVariable( "Charge_qq := Charge_qq", 'F' );
   factory->AddVariable( "DPhi_ll := DPhi_ll", 'F' );
   factory->AddVariable( "DPt_ll := DPt_ll", 'F' );
   //factory->AddVariable( "MinDPhi_lMET := MinDPhi_lMET", 'F' );
   //factory->AddVariable( "Aplanarity := aplanarity", 'F' );
   //factory->AddVariable( "chargeEta := chargeEta",  'F' );
   //factory->AddVariable( "MET := Met",  'F' );
   //factory->AddVariable( "MtauJet := MtauJet",  'F' );
   //factory->AddVariable( "HT := Ht",  'F' );
   //factory->AddVariable( "Chi2 := kinFitChi2",  'F' );
   //factory->AddVariable( "DeltaPhiTauMET := DeltaPhiTauMet",  'F' );
   //factory->AddVariable( "Mt := Mt",  'F' );



   // You can add so-called "Spectator variables", which are not used in the MVA training, 
   // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
   // input variables, the response values of all trained MVAs, and the spectator variables
   //  factory->AddSpectator( "spec1:=var1*2",  "Spectator 1", "units", 'F' );
   // factory->AddSpectator( "spec2:=var1*3",  "Spectator 2", "units", 'F' );

   // read training and test data
   if (ReadDataFromAsciiIFormat) {
      // load the signal and background event samples from ascii files
      // format in file must be:
      // var1/F:var2/F:var3/F:var4/F
      // 0.04551   0.59923   0.32400   -0.19170
      // ...

      TString datFileS = "tmva_example_sig.dat";
      TString datFileB = "tmva_example_bkg.dat";

      factory->SetInputTrees( datFileS, datFileB );
   }
   else {

  
    //TFile* f0 = new TFile("/opt/sbg/data/data1/cms/lebihan/clean_january_2012_2/CMSSW_4_2_8_patch7/src/MiniTreeAnalysis/NTupleAnalysis/macros/TopTauJets/TMVA_sig_newLumi.root");
    //TFile* f1 = new TFile("/opt/sbg/data/data1/cms/lebihan/clean_january_2012_2/CMSSW_4_2_8_patch7/src/MiniTreeAnalysis/NTupleAnalysis/macros/TopTauJets/TMVA_bkg_newLumi.root");
    TFile* f0 = TFile::Open("/opt/sbg/data/data1/cms/echabert/ttbarMET/ProdAlexMars13/CMSSW_5_3_2_patch4/src/NTuple/NTupleAnalysis/macros/TTbarMET/backup_outputProof10-04-13_16-00-57/proof_ttW.root");
    TFile* f1 = TFile::Open("/opt/sbg/data/data1/cms/echabert/ttbarMET/ProdAlexMars13/CMSSW_5_3_2_patch4/src/NTuple/NTupleAnalysis/macros/TTbarMET/backup_outputProof10-04-13_16-00-57/proof_tt-dilepton.root");
  
    TTree *signal     = (TTree*)f0->Get("theTree2");
    TTree *background = (TTree*)f1->Get("theTree2");
    cout<<"trees: "<<signal<<" "<<background<<endl;

    //Double_t backgroundWeight = 1.0;
    //Double_t signalWeight     = 1.0;
    Double_t signalWeight     = 0.30*20/185338;
    Double_t backgroundWeight = 222.*0.1*20/9982625;
    // ====== register trees ====================================================
    //
    // the following method is the prefered one:
    // you can add an arbitrary number of signal or background trees

    factory->AddSignalTree    ( signal,     signalWeight     );
    factory->AddBackgroundTree( background, backgroundWeight );

     //   factory->AddSignalTree    ( signal );
     //factory->AddBackgroundTree( background );


      // To give different trees for training and testing, do as follows:
      //    factory->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" );
      //    factory->AddSignalTree( signalTestTree,     signalTestWeight,  "Test" );

      // Use the following code instead of the above two or four lines to add signal and background 
      // training and test events "by hand"
      // NOTE that in this case one should not give expressions (such as "var1+var2") in the input 
      //      variable definition, but simply compute the expression before adding the event
      // 
      //    // --- begin ----------------------------------------------------------
      //    std::vector<Double_t> vars( 4 ); // vector has size of number of input variables
      //    Float_t  treevars[4];
      //    for (Int_t ivar=0; ivar<4; ivar++) signal->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
      //    for (Int_t i=0; i<signal->GetEntries(); i++) {
      //       signal->GetEntry(i);
      //       for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
      //       // add training and test events; here: first half is training, second is testing
      //       // note that the weight can also be event-wise	
      //       if (i < signal->GetEntries()/2) factory->AddSignalTrainingEvent( vars, signalWeight ); 
      //       else                            factory->AddSignalTestEvent    ( vars, signalWeight ); 
      //    }
      //
      //    for (Int_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
      //    for (Int_t i=0; i<background->GetEntries(); i++) {
      //       background->GetEntry(i); 
      //       for (Int_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
      //       // add training and test events; here: first half is training, second is testing
      //       // note that the weight can also be event-wise	
      //       if (i < background->GetEntries()/2) factory->AddBackgroundTrainingEvent( vars, backgroundWeight ); 
      //       else                                factory->AddBackgroundTestEvent    ( vars, backgroundWeight ); 
      //    }
      //    // --- end ------------------------------------------------------------
      //
      // ====== end of register trees ==============================================
   }
   
   // This would set individual event weights (the variables defined in the 
   // expression need to exist in the original TTree)
   //    for signal    : factory->SetSignalWeightExpression("weight1*weight2");
   //    for background: factory->SetBackgroundWeightExpression("weight1*weight2");
   //factory->SetBackgroundWeightExpression("weight_BTAG");
   //factory->SetSignalWeightExpression("weight*weight_BTAG");
   // Apply additional cuts on the signal and background samples (can be different)
  
   // TCut mycuts = "MHt >=0  && MMTauJet >=0 && MM3 >= 0"; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   // TCut mycutb = "MHt >=0  && MMTauJet >=0 && MM3 >= 0"; // for example: TCut mycutb = "abs(var1)<0.5";
   //TCut mycuts = "Met>=20 "; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
   //TCut mycutb = "Met>=20 "; // for example: TCut mycutb = "abs(var1)<0.5";
   TCut mycuts;
   TCut mycutb;

   // tell the factory to use all remaining events in the trees after training for testing:
   factory->PrepareTrainingAndTestTree( mycuts, mycutb,
                                        "nTrain_Signal=3000:nTrain_Background=5000:SplitMode=Random:NormMode=NumEvents:!V" );

   // If no numbers of events are given, half of the events in the tree are used for training, and 
   // the other half for testing:
   //    factory->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );  
   // To also specify the number of testing events, use:
   //    factory->PrepareTrainingAndTestTree( mycut, 
   //                                         "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );  

   // ---- Book MVA methods
   //
   // please lookup the various method configuration options in the corresponding cxx files, eg:
   // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
   // it is possible to preset ranges in the option string in which the cut optimisation should be done:
   // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

   // Cut optimisation
   if (Use["Cuts"])
      factory->BookMethod( TMVA::Types::kCuts, "Cuts", 
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );

   if (Use["CutsD"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsD", 
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );

   if (Use["CutsPCA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsPCA", 
                           "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );

   if (Use["CutsGA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsGA",
                           "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );
   
   if (Use["CutsSA"])
      factory->BookMethod( TMVA::Types::kCuts, "CutsSA",
                           "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );
   
   // Likelihood
   if (Use["Likelihood"])
      factory->BookMethod( TMVA::Types::kLikelihood, "Likelihood", 
                           "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ); 

   // test the decorrelated likelihood
   if (Use["LikelihoodD"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodD", 
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ); 

   if (Use["LikelihoodPCA"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodPCA", 
                           "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ); 
 
   // test the new kernel density estimator
   if (Use["LikelihoodKDE"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodKDE", 
                           "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ); 

   // test the mixed splines and kernel density estimator (depending on which variable)
   if (Use["LikelihoodMIX"])
      factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", 
                           "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); 

   // test the multi-dimensional probability density estimator
   // here are the options strings for the MinMax and RMS methods, respectively:
   //      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );   
   //      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );   
   if (Use["PDERS"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERS", 
                           "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSkNN"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSkNN", 
                           "!H:!V:VolumeRangeMode=kNN:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

   if (Use["PDERSD"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSD", 
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );

   if (Use["PDERSPCA"])
      factory->BookMethod( TMVA::Types::kPDERS, "PDERSPCA", 
                           "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );

   // Multi-dimensional likelihood estimator using self-adapting phase-space binning
   if (Use["PDEFoam"])
      factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", 
                           "H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0333:nActiveCells=500:nSampl=2000:nBin=5:CutNmin=T:Nmin=100:Kernel=None:Compress=T" );

   // K-Nearest Neighbour classifier (KNN)
   if (Use["KNN"])
      factory->BookMethod( TMVA::Types::kKNN, "KNN", 
                           "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );
   // H-Matrix (chi2-squared) method
   if (Use["HMatrix"])
      factory->BookMethod( TMVA::Types::kHMatrix, "HMatrix", "!H:!V" ); 

   // Fisher discriminant   
   if (Use["Fisher"])
      factory->BookMethod( TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=60:NsmoothMVAPdf=10" );

   // Fisher with Gauss-transformed input variables
   if (Use["FisherG"])
      factory->BookMethod( TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );

   // Composite classifier: ensemble (tree) of boosted Fisher classifiers
   if (Use["BoostedFisher"])
      factory->BookMethod( TMVA::Types::kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2");

   // Linear discriminant (same as Fisher)
   if (Use["LD"])
      factory->BookMethod( TMVA::Types::kLD, "LD", "H:!V:VarTransform=None" );

	// Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
   if (Use["FDA_MC"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MC",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );
   
   if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

   if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_SA",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

   if (Use["FDA_MT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

   if (Use["FDA_GAMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_GAMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

   if (Use["FDA_MCMT"])
      factory->BookMethod( TMVA::Types::kFDA, "FDA_MCMT",
                           "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

   // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
   if (Use["MLP"])
      factory->BookMethod( TMVA::Types::kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5" );

   if (Use["MLPBFGS"])
      factory->BookMethod( TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS" );


   // CF(Clermont-Ferrand)ANN
   if (Use["CFMlpANN"])
      factory->BookMethod( TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ); // n_cycles:#nodes:#nodes:...  
  
   // Tmlp(Root)ANN
   if (Use["TMlpANN"])
      factory->BookMethod( TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ); // n_cycles:#nodes:#nodes:...
  
   // Support Vector Machine
   if (Use["SVM"])
      factory->BookMethod( TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );
   
   // Boosted Decision Trees
   if (Use["BDTG"]) // Gradient Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTG", 
                           "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" );

   if (Use["BDT"])  // Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDT", 
                           "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );
   
   if (Use["BDTB"]) // Bagging
      factory->BookMethod( TMVA::Types::kBDT, "BDTB", 
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" );

   if (Use["BDTD"]) // Decorrelation + Adaptive Boost
      factory->BookMethod( TMVA::Types::kBDT, "BDTD", 
                           "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" );
   
   // RuleFit -- TMVA implementation of Friedman's method
   if (Use["RuleFit"])
      factory->BookMethod( TMVA::Types::kRuleFit, "RuleFit",
                           "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );

   // --------------------------------------------------------------------------------------------------

   // As an example how to use the ROOT plugin mechanism, book BDT via
   // plugin mechanism
   if (Use["Plugin"]) {
         //
         // first the plugin has to be defined, which can happen either through the following line in the local or global .rootrc:
         //
         // # plugin handler          plugin name(regexp) class to be instanciated library        constructor format
         // Plugin.TMVA@@MethodBase:  ^BDT                TMVA::MethodBDT          TMVA.1         "MethodBDT(TString,TString,DataSet&,TString)"
         // 
         // or by telling the global plugin manager directly
      gPluginMgr->AddHandler("TMVA@@MethodBase", "BDT", "TMVA::MethodBDT", "TMVA.1", "MethodBDT(TString,TString,DataSet&,TString)");
      factory->BookMethod( TMVA::Types::kPlugins, "BDT",
                           "!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=50" );
   }

   // --------------------------------------------------------------------------------------------------

   // ---- Now you can tell the factory to train, test, and evaluate the MVAs

   // Train MVAs using the set of training events
   factory->TrainAllMethodsForClassification();

   // ---- Evaluate all MVAs using the set of test events
   factory->TestAllMethods();

   // ----- Evaluate and compare performance of all configured MVAs
   factory->EvaluateAllMethods();    

   // --------------------------------------------------------------
   
   // Save the output
   outputFile->Close();

   std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
   std::cout << "==> TMVAClassification is done!" << std::endl;      

   delete factory;

   // Launch the GUI for the root macros
   if (!gROOT->IsBatch()) TMVAGui( outfileName );
}
예제 #28
0
void mvaDonut(TString Type = "Dl", int iChannel = 1, TString Sample = "Sig") {

  TString Channels[] = {"D0","Ds0","Dp","Dsp"};
  TString fname = "mva"; if(Sample=="Dss") fname += Sample; 
  fname += Type; fname += Channels[iChannel-1];
  TString outfileName = fname; outfileName += ".root";
  TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
  int isDss = 0;
  if(Sample=="Dss") isDss=1;
  TMVA::Factory *factory = new TMVA::Factory( fname, outputFile, 
					      Form("!V:!Silent:%sColor", gROOT->IsBatch()?"!":"") );

  TChain c("ntp1");
  c.Add("~/releases/ntuplePID50/workdir/AWG82/ntuples/small/Add_R24MVA_RunAll.root");

  TString sigCuts[] = {"(MCType==1||MCType==3||MCType==5)", "(MCType==2||MCType==4||MCType==6)",
		       "(MCType==7||MCType==9||MCType==11)", "(MCType==8||MCType==10||MCType==12)",
		       "MCType>12"};
  TString bkgCuts[2][2] = {{"MCType>6", "(MCType>0&&MCType<7||MCType>12)"},
			   {"MCType>0&&MCType<13","MCType>0&&MCType<13"}};
  TString sigStr = "candLepTru==1&&"; if(isDss) sigStr += "pmisspi0"; else sigStr += "candPMiss";
  sigStr += ">0.2&&candType=="; sigStr += iChannel; sigStr += "&&";
  if(isDss) sigStr += sigCuts[4]; else sigStr += sigCuts[iChannel-1];
  TString bkgStr = "candType=="; bkgStr += iChannel; bkgStr += "&&";
  if(isDss) bkgStr += "pmisspi0"; else bkgStr += "candPMiss";
  bkgStr += ">0.2&&";
  if(Type=="Dl") bkgStr += bkgCuts[isDss][(iChannel-1)/2];
  else bkgStr += "MCType==0";
  TCut sigCut = "1", bkgCut = "1", mycuts = "", mycutb = "";
  sigCut += sigStr; bkgCut += bkgStr;

//   --- Base ---
//   int nSig = 9, nDpi0 = 10;
//   TString sigVari[] = {"candEExtra","candMES","candDmass","candDeltam","candTagChargedMult","candBTagDeltam",
// 		       "candBTagDmass","candDeltaE","candCosT"};
//   TString Dpi0Vari[] = {"mpi0","candDmass","dmpi0","eextrapi0","ppi0","e1pi0","candCosT","candDeltam",
// 			"candMES","candDeltaE"};

//   --- NoDmNoMp0 ---
//   int nSig = 8, nDpi0 = 9;
//   TString sigVari[] = {"candEExtra","candMES","candDmass","candDeltam","candTagChargedMult",
// 		       "candBTagDmass","candDeltaE","candCosT"};
//   TString Dpi0Vari[] = {"candDmass","dmpi0","eextrapi0","ppi0","e1pi0","candCosT","candDeltam",
// 			"candMES","candDeltaE"};
//   sigCuts[4] = "MCType>12&&mpi0>.125&&mpi0<.145";

//  ---  NoMes ---
//   int nSig = 8, nDpi0 = 9;
//   TString sigVari[] = {"candEExtra","candDmass","candDeltam","candTagChargedMult","candBTagDeltam",
// 		       "candBTagDmass","candDeltaE","candCosT"};
//   TString Dpi0Vari[] = {"mpi0","candDmass","dmpi0","eextrapi0","ppi0","e1pi0","candCosT","candDeltam",
// 			"candDeltaE"};

//   --- NoMulYesDm ---
  int nSig = 8, nDpi0 = 11;
  TString sigVari[] = {"candEExtra","candMES","candDmass","candDeltam","candBTagDeltam",
		       "candBTagDmass","candDeltaE","candCosT"};
  TString Dpi0Vari[] = {"mpi0","candDmass","dmpi0","eextrapi0","ppi0","e1pi0","candCosT","candDeltam",
			"candMES","candDeltaE","candBTagDeltam"};

  factory->SetInputTrees(&c, sigCut, bkgCut);
  if(isDss==0){
    for(int vari = 0; vari < nSig; vari++){
      if(sigVari[vari]=="candDeltam" && iChannel%2==1) continue;
      char variChar = 'F';
      if(sigVari[vari]=="candTagChargedMult") variChar = 'I';
      factory->AddVariable(sigVari[vari], variChar);
    }
  } else {
    for(int vari = 0; vari < nDpi0; vari++){
      if(Dpi0Vari[vari]=="candDeltam" && iChannel%2==1) continue;
      factory->AddVariable(Dpi0Vari[vari], 'F');
    }
  }

  factory->PrepareTrainingAndTestTree( mycuts, mycutb,
				       "NSigTest=100:NBkgTest=100:SplitMode=Random:NormMode=NumEvents:!V" );

  factory->BookMethod( TMVA::Types::kBDT, "BDT", 
		       "!H:!V:NTrees=500:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=2.5" );

  factory->TrainAllMethods();
  factory->TestAllMethods();
  factory->EvaluateAllMethods();    
   
  // Save the output
  outputFile->Close();
  std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
  delete factory;

  // Launch the GUI for the root macros
  //if (!gROOT->IsBatch()) TMVAGui( outfileName );
  gROOT->ProcessLine(".q");
}
예제 #29
0
파일: MVA.C 프로젝트: amanjong/AnalysisCMS
//------------------------------------------------------------------------------
// MVATrain
//------------------------------------------------------------------------------
void MVATrain(TString signal)
{
  TFile* outputfile = TFile::Open(trainingdir + signal + ".root", "recreate");


  // Factory
  //----------------------------------------------------------------------------
  TMVA::Factory* factory = new TMVA::Factory(signal, outputfile,    
					     "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification");


  // Get the trees
  //----------------------------------------------------------------------------
  _mctree.clear();

  AddProcess("signal", signal);
  AddProcess("background", "HZJ_HToWW_M125");
  AddProcess("background", "ggZH_HToWW_M125");

  //  AddProcess("background", "14_HZ");
  //  AddProcess("background", "10_HWW");
  //  AddProcess("background", "06_WW");
  //  AddProcess("background", "02_WZTo3LNu");
  //  AddProcess("background", "03_ZZ");
  //  AddProcess("background", "11_Wg");
  //  AddProcess("background", "07_ZJets");
  //  AddProcess("background", "09_TTV");
  //  AddProcess("background", "05_ST");
  //  AddProcess("background", "00_Fakes");

  Double_t weight = 1.0;

  factory->AddSignalTree(_signaltree, weight);

  for (UInt_t i=0; i<_mctree.size(); i++) factory->AddBackgroundTree(_mctree[i], weight);
  
  factory->SetWeightExpression("eventW");


  // Add variables
  //----------------------------------------------------------------------------
  // Be careful with the order: it must be respected at the reading step
  // factory->AddVariable("<var1>+<var2>", "pretty title", "unit", 'F');

  //  factory->AddVariable("channel",        "", "", 'F');
  factory->AddVariable("metPfType1",     "", "", 'F');
  factory->AddVariable("m2l",            "", "", 'F');
  //  factory->AddVariable("njet",           "", "", 'F');
  //  factory->AddVariable("nbjet20cmvav2l", "", "", 'F');
  factory->AddVariable("lep1pt",         "", "", 'F');
  factory->AddVariable("lep2pt",         "", "", 'F');
  //  factory->AddVariable("jet1pt",         "", "", 'F');
  factory->AddVariable("jet2pt",         "", "", 'F');
  factory->AddVariable("mtw1",           "", "", 'F');
  factory->AddVariable("dphill",         "", "", 'F');
  factory->AddVariable("dphilep1jet1",   "", "", 'F');
  //  factory->AddVariable("dphilep1jet2",   "", "", 'F');
  //  factory->AddVariable("dphilmet1",      "", "", 'F');
  //  factory->AddVariable("dphilep2jet1",   "", "", 'F');
  //  factory->AddVariable("dphilep2jet2",   "", "", 'F');
  //  factory->AddVariable("dphilmet2",      "", "", 'F');
  //  factory->AddVariable("dphijj",         "", "", 'F');
  //  factory->AddVariable("dphijet1met",    "", "", 'F');
  //  factory->AddVariable("dphijet2met",    "", "", 'F');
  factory->AddVariable("dphillmet",      "", "", 'F');


  // Preselection cuts and preparation
  //----------------------------------------------------------------------------
  factory->PrepareTrainingAndTestTree("", ":nTrain_Signal=0:nTest_Signal=0:nTrain_Background=0:nTest_Background=0:SplitMode=Alternate:MixMode=Random:!V");


  // Book MVA
  //----------------------------------------------------------------------------
  factory->BookMethod(TMVA::Types::kMLP, "MLP",
		      "H:!V:NeuronType=sigmoid:VarTransform=N:NCycles=600:HiddenLayers=25,10:TestRate=5:!UseRegulator");


  // Train, test and evaluate MVA
  //----------------------------------------------------------------------------
  factory->TrainAllMethods();     // Train using the set of training events
  factory->TestAllMethods();      // Evaluate using the set of test events
  factory->EvaluateAllMethods();  // Evaluate and compare performance


  // Save the output
  //----------------------------------------------------------------------------
  outputfile->Close();

  delete factory;
}
예제 #30
0
int main(){
  TMVA::Tools::Instance();
  std::cout<<"Hello world"<<std::endl;

  TFile* OutputFile = TFile::Open("Outputfile.root","RECREATE");

  TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", OutputFile,
					      "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );

  std::vector<VMVariable*> Variables;
  MVariable* Var3= new MVariable("var3",F,none);
  MVariable* Var4 = new MVariable("var4",F,none);
  Variables.push_back(Var3);
  Variables.push_back(Var4);
  MVariable* Var1 = new MVariable("var1",F,none);
  MVariable* Var2 = new MVariable("var2",F,none);

  MultiVariable* MyVar1 = new MultiVariable("Var1+Var2",sum);
  MyVar1->AddVariable(Var1);
  MyVar1->AddVariable(Var2);
  Variables.push_back(MyVar1);

  MultiVariable* MyVar2 = new MultiVariable("Minus",subtract);
  MyVar2->AddVariable(Var1);
  MyVar2->AddVariable(Var2);
  Variables.push_back(MyVar2);
  std::string InputName= "./tmva_class_exampleD.root";
  
  TFile *input = TFile::Open("./tmva_class_exampleD.root" );
  
  TTree *signal = (TTree*)input->Get("TreeS");
  TTree *background=(TTree*)input->Get("TreeB");

  Double_t signalWeight     = 1.0;
  Double_t backgroundWeight = 1.0;

  factory->AddSignalTree    ( signal,     signalWeight     );
  factory->AddBackgroundTree( background, backgroundWeight );

  for(auto v:Variables){
    factory->AddVariable(v->GetFactoryName(),v->GetType());
  }
  
  factory->SetBackgroundWeightExpression( "weight" );
  
  TCut mycuts = "";
  TCut mycutb = "";
  
  factory->PrepareTrainingAndTestTree( mycuts, mycutb,
				       "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
  
  std::vector<MClassifier*> Classifiers;
  
  Classifiers.push_back(new MClassifier(TMVA::Types::kBDT, "BDT",
					"!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20"));
  
  for(auto C:Classifiers){
    if(!(C->AddMethodToFactory(factory))){
      std::cout<<"Booking classifier failed"<<std::endl;
      return 1;
    }
  }

  factory->TrainAllMethods();
  
  factory->TestAllMethods();
  
  factory->EvaluateAllMethods();
  
  OutputFile->Close();
  
  delete factory;
  
  TMVA::Reader *reader = new TMVA::Reader( "!Color:!Silent" );    
  
  for(auto v: Variables){
    reader->AddVariable(v->GetFactoryName(),v->GetReaderAddress());
  }
  
  for(auto C:Classifiers){
    if(!(C->AddMethodToReader(reader,"./weights/","TMVAClassification"))){
      std::cout<<"Failed adding classifer to reader"<<std::endl;
      return 1;
    }
  }

  TFile* Input =  TFile::Open("./tmva_class_exampleD.root");
  TTree* TreeToEvaluate= (TTree*)Input->Get("TreeS");
  
  TFile* AppliedFile =  new TFile("AppliedFile.root","RECREATE");
  TTree* AppliedTree=TreeToEvaluate->CloneTree(0);
  
  for(auto C:Classifiers){
    if(!(C->MakeBranch(AppliedTree)))return 1;
  }
  
  for(auto Var:Variables){
    if(!(Var->SetBA(TreeToEvaluate))){
      std::cout<<"Problem Setting Branch addresses"<<std::endl;
      return 1;
    }
  }
  
  Long64_t N=TreeToEvaluate->GetEntries();
  LoopTimer LT(0.05);
  int vetoedeventcounter=0;
  double StartEntry=0.0;
  double LastEntry=0.0;
  Long64_t iStart=0;
  Long64_t iEnd=N;

  for(Long64_t i=iStart;i<iEnd;++i){
    LT.DeclareLoopStart(iEnd-iStart);
    TreeToEvaluate->GetEntry(i);
    bool useevent=true;
    for(auto Var:Variables){
      useevent=Var->DoOperation();
    }
    if(!useevent){
      vetoedeventcounter++;
      continue;
    }
    
    for(auto C:Classifiers){
      if(!(C->Apply(reader)))return 1;
    }
    
    AppliedTree->Fill();
  }

  AppliedTree->Write();
  AppliedFile->Close();
  std::cout<<"Got here"<<std::endl;
  // Compare Applied file from here with applied file from TMVA tests.
  TFile* ReadAppliedFile =  TFile::Open("AppliedFile.root");

  TTree* AppliedTreeRead=(TTree*)ReadAppliedFile->Get("TreeS");
  if(!AppliedTreeRead)std::cout<<"NUll pointer to tree"<<std::endl;
  double BDTResponse; AppliedTreeRead->SetBranchAddress("BDT_response",&BDTResponse);
  
  TFile* ReadTMVATestFile = TFile::Open("/home/tw/root-v5-34/tmva/test/TreeFile.root");
  if(!ReadTMVATestFile)std::cout<<"File open faild"<<std::endl;
  TTree* TMVATestTree=(TTree*)ReadTMVATestFile->Get("AppliedTree");
  if(!TMVATestTree)std::cout<<"NUll pointer to tree"<<std::endl;
  double TestBDTResponse; TMVATestTree->SetBranchAddress("BDT_response",&TestBDTResponse);
  Long64_t ATRN=AppliedTreeRead->GetEntries();
  Long64_t TTTN=TMVATestTree->GetEntries();
  std::cout<<"Entries in my tree= "<<ATRN<<std::endl;
  std::cout<<"Entries in TMVA tree= "<<TTTN<<std::endl;
  if(ATRN!=TTTN)std::cout<<"SOMETHING WRONG EVENTS NOT EQUAL"<<std::endl;
  std::vector<double> ATRValues;
  std::vector<double> TTTValues;
  for(int i=0;i<ATRN;++i){
    TMVATestTree->GetEntry(i);
    AppliedTreeRead->GetEntry(i);
    ATRValues.push_back(BDTResponse);
    TTTValues.push_back(TestBDTResponse);
    //    std::cout<<" MYTree = "<<BDTResponse<<" TMVATREE= "<<TestBDTResponse<<std::endl;
  }
  std::sort(ATRValues.begin(),ATRValues.end());
  std::sort(TTTValues.begin(),TTTValues.end());

  for(int i=0;i<TTTN;++i){
    std::cout<<" MY Value= "<<ATRValues.at(i)<<" TTT Value = "<<TTTValues.at(i)<<std::endl;
  }
  
}