Esempio n. 1
0
void TMVAMulticlassApplication( TString myMethodList = "" )
{

   TMVA::Tools::Instance();
   
   //---------------------------------------------------------------
   // default MVA methods to be trained + tested
   std::map<std::string,int> Use;
   Use["MLP"]             = 1;
   Use["BDTG"]            = 1;
   Use["FDA_GA"]          = 0;
   Use["PDEFoam"]         = 0;
   //---------------------------------------------------------------
  
   std::cout << std::endl;
   std::cout << "==> Start TMVAMulticlassApplication" << std::endl; 
   if (myMethodList != "") {
      for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

      std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' );
      for (UInt_t i=0; i<mlist.size(); i++) {
         std::string regMethod(mlist[i]);

         if (Use.find(regMethod) == Use.end()) {
            std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
            for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " " << std::endl;
            std::cout << std::endl;
            return;
         }
         Use[regMethod] = 1;
      }
   }

   
   // create the Reader object
   TMVA::Reader *reader = new TMVA::Reader( "!Color:!Silent" );    

   // create a set of variables and declare them to the reader
   // - the variable names must corresponds in name and type to 
   // those given in the weight file(s) that you use
   Float_t var1, var2, var3, var4;
   reader->AddVariable( "var1", &var1 );
   reader->AddVariable( "var2", &var2 );
   reader->AddVariable( "var3", &var3 );
   reader->AddVariable( "var4", &var4 );

   // book the MVA methods
   TString dir    = "dataset/weights/";
   TString prefix = "TMVAMulticlass";
   
   for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) {
      if (it->second) {
        TString methodName = TString(it->first) + TString(" method");
        TString weightfile = dir + prefix + TString("_") + TString(it->first) + TString(".weights.xml"); 
        reader->BookMVA( methodName, weightfile ); 
      }
   }

   // book output histograms
   UInt_t nbin = 100;
   TH1F *histMLP_signal(0), *histBDTG_signal(0), *histFDAGA_signal(0), *histPDEFoam_signal(0);
   if (Use["MLP"])    
      histMLP_signal    = new TH1F( "MVA_MLP_signal",    "MVA_MLP_signal",    nbin, 0., 1.1 );
   if (Use["BDTG"])
      histBDTG_signal  = new TH1F( "MVA_BDTG_signal",   "MVA_BDTG_signal",   nbin, 0., 1.1 );
   if (Use["FDA_GA"])
      histFDAGA_signal = new TH1F( "MVA_FDA_GA_signal", "MVA_FDA_GA_signal", nbin, 0., 1.1 );
   if (Use["PDEFoam"])
      histPDEFoam_signal = new TH1F( "MVA_PDEFoam_signal", "MVA_PDEFoam_signal", nbin, 0., 1.1 );


   TFile *input(0); 
   TString fname = "./tmva_example_multiple_background.root";
   if (!gSystem->AccessPathName( fname )) {
      input = TFile::Open( fname ); // check if file in local directory exists
   }
   if (!input) {
      std::cout << "ERROR: could not open data file, please generate example data first!" << std::endl;
      exit(1);
   }
   std::cout << "--- TMVAMulticlassApp : Using input file: " << input->GetName() << std::endl;
   
   // prepare the tree
   // - here the variable names have to corresponds to your tree
   // - you can use the same variables as above which is slightly faster,
   //   but of course you can use different ones and copy the values inside the event loop
  
   TTree* theTree = (TTree*)input->Get("TreeS");
   std::cout << "--- Select signal sample" << std::endl;
   theTree->SetBranchAddress( "var1", &var1 );
   theTree->SetBranchAddress( "var2", &var2 );
   theTree->SetBranchAddress( "var3", &var3 );
   theTree->SetBranchAddress( "var4", &var4 );

   std::cout << "--- Processing: " << theTree->GetEntries() << " events" << std::endl;
   TStopwatch sw;
   sw.Start();

   for (Long64_t ievt=0; ievt<theTree->GetEntries();ievt++) {
      if (ievt%1000 == 0){
         std::cout << "--- ... Processing event: " << ievt << std::endl;
      }
      theTree->GetEntry(ievt);
      
      if (Use["MLP"])
         histMLP_signal->Fill((reader->EvaluateMulticlass( "MLP method" ))[0]);
      if (Use["BDTG"])
         histBDTG_signal->Fill((reader->EvaluateMulticlass( "BDTG method" ))[0]);
      if (Use["FDA_GA"])
         histFDAGA_signal->Fill((reader->EvaluateMulticlass( "FDA_GA method" ))[0]);
      if (Use["PDEFoam"])
         histPDEFoam_signal->Fill((reader->EvaluateMulticlass( "PDEFoam method" ))[0]);
      
   }
   
   // get elapsed time
   sw.Stop();
   std::cout << "--- End of event loop: "; sw.Print();
   
   TFile *target  = new TFile( "TMVAMulticlassApp.root","RECREATE" );
   if (Use["MLP"])
      histMLP_signal->Write();
   if (Use["BDTG"])
      histBDTG_signal->Write(); 
   if (Use["FDA_GA"])
      histFDAGA_signal->Write();
   if (Use["PDEFoam"])
      histPDEFoam_signal->Write();

   target->Close();
   std::cout << "--- Created root file: \"TMVMulticlassApp.root\" containing the MVA output histograms" << std::endl;

   delete reader;
   
   std::cout << "==> TMVAClassificationApplication is done!" << std::endl << std::endl;
}
int main(int argc, char** argv) {
 
 if(argc != 2)
 {
  std::cerr << ">>>>> analysis.cpp::usage: " << argv[0] << " configFileName" << std::endl ;
  return 1;
 }
 
 // Parse the config file                                                                                                                                                          
 parseConfigFile (argv[1]) ;
 
 std::string treeName  = gConfigParser -> readStringOption("Input::treeName");
 std::string fileSamples = gConfigParser -> readStringOption("Input::fileSamples");
 std::string inputDirectory = gConfigParser -> readStringOption("Input::inputDirectory");
 
 std::string inputBeginningFile = "out_NtupleProducer_"; 
 try {
  inputBeginningFile = gConfigParser -> readStringOption("Input::inputBeginningFile");
 }
 catch (char const* exceptionString){
  std::cerr << " exception = " << exceptionString << std::endl;
 }
 std::cout << ">>>>> Input::inputBeginningFile  " << inputBeginningFile  << std::endl;  
 
 
 //==== list of methods
 std::string              MVADirectory           = gConfigParser -> readStringOption    ("Options::MVADirectory");
 std::vector<std::string> vectorMyMethodList     = gConfigParser -> readStringListOption("Options::MVAmethods");
 std::vector<std::string> vectorMyMethodMassList = gConfigParser -> readStringListOption("Options::MVAmethodsMass");
 
 std::string outputDirectory = gConfigParser -> readStringOption("Output::outputDirectory");
  
 //---- variables 
 float jetpt1;
 float jetpt2;
 float mjj;
 float detajj;
 float dphilljetjet;
 
 float pt1;
 float pt2;
 float mll;
 float dphill;
 float mth;
 
 float dphillmet;
 float mpmet;
 
 float channel;
 
 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 TFile* outputRootFile[500];
 TTree* cloneTreeJetLepVect[500];
 
 
 TTree *treeJetLepVect[500];
 TFile* file[500];
 
 char *nameSample[1000];
 char *nameHumanReadable[1000];
 char* xsectionName[1000];
 
 char nameFileIn[1000];
 sprintf(nameFileIn,"%s",fileSamples.c_str());
 
 int numberOfSamples = ReadFile(nameFileIn, nameSample, nameHumanReadable, xsectionName);
 
 double Normalization[1000];
 double xsection[1000];
 
 for (int iSample=0; iSample<numberOfSamples; iSample++){ 
  char nameFile[20000];
  sprintf(nameFile,"%s/%s%s.root",inputDirectory.c_str(),inputBeginningFile.c_str(),nameSample[iSample]);  
  
  file[iSample] = new TFile(nameFile, "READ");
  
  treeJetLepVect[iSample] = (TTree*) file[iSample]->Get(treeName.c_str());
  char nameTreeJetLep[100];
  sprintf(nameTreeJetLep,"treeJetLep_%d",iSample); 
  treeJetLepVect[iSample]->SetName(nameTreeJetLep);
  
  treeJetLepVect[iSample] -> SetBranchAddress("jetpt1",  &jetpt1);
  treeJetLepVect[iSample] -> SetBranchAddress("jetpt2",  &jetpt2);
  treeJetLepVect[iSample] -> SetBranchAddress("mjj",   &mjj);
  treeJetLepVect[iSample] -> SetBranchAddress("detajj", &detajj);
  treeJetLepVect[iSample] -> SetBranchAddress("dphilljetjet", &dphilljetjet);
  treeJetLepVect[iSample] -> SetBranchAddress("pt1", &pt1);
  treeJetLepVect[iSample] -> SetBranchAddress("pt2", &pt2);
  treeJetLepVect[iSample] -> SetBranchAddress("mll", &mll);
  treeJetLepVect[iSample] -> SetBranchAddress("dphill", &dphill);
  treeJetLepVect[iSample] -> SetBranchAddress("mth", &mth);
  treeJetLepVect[iSample] -> SetBranchAddress("dphillmet", &dphillmet);
  treeJetLepVect[iSample] -> SetBranchAddress("mpmet", &mpmet);
  treeJetLepVect[iSample] -> SetBranchAddress("channel", &channel);
  
  
  sprintf(nameFile,"%s/%s%s.root",outputDirectory.c_str(),inputBeginningFile.c_str(),nameSample[iSample]);    
  outputRootFile[iSample] = new TFile ( nameFile, "RECREATE") ;
  outputRootFile[iSample] -> cd () ;
  cloneTreeJetLepVect[iSample] = treeJetLepVect[iSample] -> CloneTree (0) ;
 }
 
 
 /**
  * cycle on MVA (method-mass)
  * * cycle on samples
  * * * cycle on events
 */
 
 for (int iMVA = 0; iMVA < vectorMyMethodList.size(); iMVA++) {
  std::cout << " vectorMyMethodList[" << iMVA << "] = " << vectorMyMethodList.at(iMVA) << std::endl;
  TString myMethodList = Form ("%s",vectorMyMethodList.at(iMVA).c_str());
  for (int iMVAMass = 0; iMVAMass < vectorMyMethodMassList.size(); iMVAMass++) {
   std::cout << " vectorMyMethodMassList[" << iMVAMass << "] = " << vectorMyMethodMassList.at(iMVAMass) << std::endl;
   
   TMVA::Reader *TMVAreader = new TMVA::Reader( "!Color:!Silent" );
   
//    TMVAreader->AddVariable("jetpt1",       &jetpt1);
//    TMVAreader->AddVariable("jetpt2",       &jetpt2);
//    TMVAreader->AddVariable("mjj",          &mjj);
//    TMVAreader->AddVariable("detajj",       &detajj);
//    TMVAreader->AddVariable("dphilljetjet", &dphilljetjet);
//    TMVAreader->AddVariable("pt1",          &pt1);
//    TMVAreader->AddVariable("pt2",          &pt2);
//    TMVAreader->AddVariable("mll",          &mll);
//    TMVAreader->AddVariable("dphill",       &dphill);
//    TMVAreader->AddVariable("mth",          &mth);
//    TMVAreader->AddVariable("dphillmet",    &dphillmet);
//    TMVAreader->AddVariable("mpmet",        &mpmet);

   Float_t input_variables[1000];
//    float input_variables[1000];
   
//    TMVAreader->AddVariable("jetpt1",       &(input_variables[0]));
//    TMVAreader->AddVariable("jetpt2",       &(input_variables[1]));
//    TMVAreader->AddVariable("mjj",          &(input_variables[2]));
//    TMVAreader->AddVariable("detajj",       &(input_variables[3]));
//    TMVAreader->AddVariable("dphilljetjet", &(input_variables[4]));
//    TMVAreader->AddVariable("pt1",          &(input_variables[5]));
//    TMVAreader->AddVariable("pt2",          &(input_variables[6]));
//    TMVAreader->AddVariable("mll",          &(input_variables[7]));
//    TMVAreader->AddVariable("dphill",       &(input_variables[8]));
//    TMVAreader->AddVariable("mth",          &(input_variables[9]));
//    TMVAreader->AddVariable("dphillmet",    &(input_variables[10]));
//    TMVAreader->AddVariable("mpmet",        &(input_variables[11]));
   
   TMVAreader->AddVariable("jetpt1",       &input_variables[0]);
   TMVAreader->AddVariable("jetpt2",       &input_variables[1]);
   TMVAreader->AddVariable("mjj",          &input_variables[2]);
   TMVAreader->AddVariable("detajj",       &input_variables[3]);
   TMVAreader->AddVariable("dphilljetjet", &input_variables[4]);
   TMVAreader->AddVariable("pt1",          &input_variables[5]);
   TMVAreader->AddVariable("pt2",          &input_variables[6]);
   TMVAreader->AddVariable("mll",          &input_variables[7]);
   TMVAreader->AddVariable("dphill",       &input_variables[8]);
   TMVAreader->AddVariable("mth",          &input_variables[9]);
   TMVAreader->AddVariable("dphillmet",    &input_variables[10]);
   TMVAreader->AddVariable("mpmet",        &input_variables[11]);
   TMVAreader->AddSpectator("channel",     &input_variables[12]);
   
 
   TString myMethodMassList = Form ("%s",vectorMyMethodMassList.at(iMVAMass).c_str());
   TString weightfile = Form ("%s/weights_%s_testVariables/TMVAMulticlass_%s.weights.xml",MVADirectory.c_str(),myMethodMassList.Data(),myMethodList.Data());
   
   std::cout << " myMethodList = " << myMethodList.Data() << std::endl;
   std::cout << " weightfile   = " << weightfile.Data()   << std::endl;
   
//    TString myMethodListBook = Form ("%s",vectorMyMethodList.at(iMVA).c_str());
   
//    TMVAreader->BookMVA( myMethodListBook, weightfile );
   TMVAreader->BookMVA( myMethodList, weightfile );
   
   
   for (int iSample=0; iSample<numberOfSamples; iSample++){ 
    std::cout << " iSample = " << iSample << " :: " << numberOfSamples << std::endl;
    file[iSample] -> cd();
    Double_t MVA_Value;
    TBranch *newBranch;
    
    TString methodName4Tree = Form ("%s_%s_MVAHiggs",myMethodList.Data(),myMethodMassList.Data());
    TString methodName4Tree2 =  Form ("%s_%s_MVAHiggs/D",myMethodList.Data(),myMethodMassList.Data());
    newBranch = cloneTreeJetLepVect[iSample]->Branch(methodName4Tree,&MVA_Value,methodName4Tree2);
//     newBranch = treeJetLepVect[iSample]->Branch(methodName4Tree,&MVA_Value,methodName4Tree2);
    
    
    ///==== loop ====
    Long64_t nentries = treeJetLepVect[iSample]->GetEntries();
    
    for (Long64_t iEntry = 0; iEntry < nentries; iEntry++){
     if((iEntry%1000) == 0) std::cout << ">>>>> analysis::GetEntry " << iEntry << " : " << nentries << std::endl;   
     
     treeJetLepVect[iSample]->GetEntry(iEntry);
     
     input_variables[0]  = static_cast<Float_t>(jetpt1);
     input_variables[1]  = static_cast<Float_t>(jetpt2);
     input_variables[2]  = static_cast<Float_t>(mjj);
     input_variables[3]  = static_cast<Float_t>(detajj);
     input_variables[4]  = static_cast<Float_t>(dphilljetjet);
     input_variables[5]  = static_cast<Float_t>(pt1);
     input_variables[6]  = static_cast<Float_t>(pt2);
     input_variables[7]  = static_cast<Float_t>(mll);
     input_variables[8]  = static_cast<Float_t>(dphill);
     input_variables[9]  = static_cast<Float_t>(mth);
     input_variables[10] = static_cast<Float_t>(dphillmet);
     input_variables[11] = static_cast<Float_t>(mpmet);
     input_variables[12] = static_cast<Float_t>(channel);
     
     int num = TMVAreader->EvaluateMulticlass(myMethodList).size();
     double max = -1e9;
     double tempmax;
     int numsel = -1;
     for (int inum = 0; inum<(num-2); inum++) { // il -2 è dovuto a Sig e Bkg che mi salva il training! Uffi!
      tempmax = (TMVAreader->EvaluateMulticlass(myMethodList))[inum];
      if (tempmax > max) {
       max = tempmax;
       numsel = inum;
      }
     }
     MVA_Value = max + 3*numsel;
     
     //      newBranch -> Fill();
     cloneTreeJetLepVect[iSample] -> Fill () ; 
    }
   }
  }
 }
 
 
 for (int iSample=0; iSample<numberOfSamples; iSample++){ 
  // save only the new version of the tree
  //   treeJetLepVect[iSample]->Write("", TObject::kOverwrite);
  cloneTreeJetLepVect[iSample] -> SetName (treeName.c_str());
  cloneTreeJetLepVect[iSample] -> AutoSave () ;
  outputRootFile[iSample] -> Close () ;
 }
     
  
}
Esempio n. 3
0
int testPyGTBMulticlass(){
   // Get data file
   std::cout << "Get test data..." << std::endl;
   TString fname = "./tmva_example_multiple_background.root";
   if (gSystem->AccessPathName(fname)){  // file does not exist in local directory
      std::cout << "Create multiclass test data..." << std::endl;
      TString createDataMacro = TString(gROOT->GetTutorialsDir()) + "/tmva/createData.C";
      gROOT->ProcessLine(TString::Format(".L %s",createDataMacro.Data()));
      gROOT->ProcessLine("create_MultipleBackground(200)");
      std::cout << "Created " << fname << " for tests of the multiclass features" << std::endl;
   }
   TFile *input = TFile::Open(fname);

   // Setup PyMVA and factory
   std::cout << "Setup TMVA..." << std::endl;
   TMVA::PyMethodBase::PyInitialize();
   TFile* outputFile = TFile::Open("ResultsTestPyGTBMulticlass.root", "RECREATE");
   TMVA::Factory *factory = new TMVA::Factory("testPyGTBMulticlass", outputFile,
      "!V:Silent:Color:!DrawProgressBar:AnalysisType=multiclass");

   // Load data
   TMVA::DataLoader *dataloader = new TMVA::DataLoader("datasetTestPyGTBMulticlass");

   TTree *signal = (TTree*)input->Get("TreeS");
   TTree *background0 = (TTree*)input->Get("TreeB0");
   TTree *background1 = (TTree*)input->Get("TreeB1");
   TTree *background2 = (TTree*)input->Get("TreeB2");
   dataloader->AddTree(signal, "Signal");
   dataloader->AddTree(background0, "Background_0");
   dataloader->AddTree(background1, "Background_1");
   dataloader->AddTree(background2, "Background_2");

   dataloader->AddVariable("var1");
   dataloader->AddVariable("var2");
   dataloader->AddVariable("var3");
   dataloader->AddVariable("var4");

   dataloader->PrepareTrainingAndTestTree("",
      "SplitMode=Random:NormMode=NumEvents:!V");

   // Book and train method
   factory->BookMethod(dataloader, TMVA::Types::kPyGTB, "PyGTB",
      "!H:!V:VarTransform=None:NEstimators=100:Verbose=0");
   std::cout << "Train classifier..." << std::endl;
   factory->TrainAllMethods();

   // Clean-up
   delete factory;
   delete dataloader;
   delete outputFile;

   // Setup reader
   UInt_t numEvents = 100;
   std::cout << "Run reader and classify " << numEvents << " events..." << std::endl;
   TMVA::Reader *reader = new TMVA::Reader("!Color:Silent");
   Float_t vars[4];
   reader->AddVariable("var1", vars+0);
   reader->AddVariable("var2", vars+1);
   reader->AddVariable("var3", vars+2);
   reader->AddVariable("var4", vars+3);
   reader->BookMVA("PyGTB", "datasetTestPyGTBMulticlass/weights/testPyGTBMulticlass_PyGTB.weights.xml");

   // Get mean response of method on signal and background events
   signal->SetBranchAddress("var1", vars+0);
   signal->SetBranchAddress("var2", vars+1);
   signal->SetBranchAddress("var3", vars+2);
   signal->SetBranchAddress("var4", vars+3);

   background0->SetBranchAddress("var1", vars+0);
   background0->SetBranchAddress("var2", vars+1);
   background0->SetBranchAddress("var3", vars+2);
   background0->SetBranchAddress("var4", vars+3);

   background1->SetBranchAddress("var1", vars+0);
   background1->SetBranchAddress("var2", vars+1);
   background1->SetBranchAddress("var3", vars+2);
   background1->SetBranchAddress("var4", vars+3);

   background2->SetBranchAddress("var1", vars+0);
   background2->SetBranchAddress("var2", vars+1);
   background2->SetBranchAddress("var3", vars+2);
   background2->SetBranchAddress("var4", vars+3);

   Float_t meanMvaSignal = 0;
   Float_t meanMvaBackground0 = 0;
   Float_t meanMvaBackground1 = 0;
   Float_t meanMvaBackground2 = 0;
   for(UInt_t i=0; i<numEvents; i++){
      signal->GetEntry(i);
      meanMvaSignal += reader->EvaluateMulticlass("PyGTB")[0];
      background0->GetEntry(i);
      meanMvaBackground0 += reader->EvaluateMulticlass("PyGTB")[1];
      background1->GetEntry(i);
      meanMvaBackground1 += reader->EvaluateMulticlass("PyGTB")[2];
      background2->GetEntry(i);
      meanMvaBackground2 += reader->EvaluateMulticlass("PyGTB")[3];
   }
   meanMvaSignal = meanMvaSignal/float(numEvents);
   meanMvaBackground0 = meanMvaBackground0/float(numEvents);
   meanMvaBackground1 = meanMvaBackground1/float(numEvents);
   meanMvaBackground2 = meanMvaBackground2/float(numEvents);

   // Check whether the response is obviously better than guessing
   std::cout << "Mean MVA response on signal: " << meanMvaSignal << std::endl;
   if(meanMvaSignal < 0.3){
      std::cout << "[ERROR] Mean response on signal is " << meanMvaSignal << " (<0.3)" << std::endl;
      return 1;
   }
   std::cout << "Mean MVA response on background 0: " << meanMvaBackground0 << std::endl;
   if(meanMvaBackground0 < 0.3){
      std::cout << "[ERROR] Mean response on background 0 is " << meanMvaBackground0 << " (<0.3)" << std::endl;
      return 1;
   }
   std::cout << "Mean MVA response on background 1: " << meanMvaBackground1 << std::endl;
   if(meanMvaBackground0 < 0.3){
      std::cout << "[ERROR] Mean response on background 1 is " << meanMvaBackground1 << " (<0.3)" << std::endl;
      return 1;
   }
   std::cout << "Mean MVA response on background 2: " << meanMvaBackground2 << std::endl;
   if(meanMvaBackground0 < 0.3){
      std::cout << "[ERROR] Mean response on background 2 is " << meanMvaBackground2 << " (<0.3)" << std::endl;
      return 1;
   }

   return 0;
}