void TMVAMulticlassApplication( TString myMethodList = "" ) { TMVA::Tools::Instance(); //--------------------------------------------------------------- // default MVA methods to be trained + tested std::map<std::string,int> Use; Use["MLP"] = 1; Use["BDTG"] = 1; Use["FDA_GA"] = 0; Use["PDEFoam"] = 0; //--------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAMulticlassApplication" << std::endl; if (myMethodList != "") { for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector<TString> mlist = gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i<mlist.size(); i++) { std::string regMethod(mlist[i]); if (Use.find(regMethod) == Use.end()) { std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " " << std::endl; std::cout << std::endl; return; } Use[regMethod] = 1; } } // create the Reader object TMVA::Reader *reader = new TMVA::Reader( "!Color:!Silent" ); // create a set of variables and declare them to the reader // - the variable names must corresponds in name and type to // those given in the weight file(s) that you use Float_t var1, var2, var3, var4; reader->AddVariable( "var1", &var1 ); reader->AddVariable( "var2", &var2 ); reader->AddVariable( "var3", &var3 ); reader->AddVariable( "var4", &var4 ); // book the MVA methods TString dir = "dataset/weights/"; TString prefix = "TMVAMulticlass"; for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) { if (it->second) { TString methodName = TString(it->first) + TString(" method"); TString weightfile = dir + prefix + TString("_") + TString(it->first) + TString(".weights.xml"); reader->BookMVA( methodName, weightfile ); } } // book output histograms UInt_t nbin = 100; TH1F *histMLP_signal(0), *histBDTG_signal(0), *histFDAGA_signal(0), *histPDEFoam_signal(0); if (Use["MLP"]) histMLP_signal = new TH1F( "MVA_MLP_signal", "MVA_MLP_signal", nbin, 0., 1.1 ); if (Use["BDTG"]) histBDTG_signal = new TH1F( "MVA_BDTG_signal", "MVA_BDTG_signal", nbin, 0., 1.1 ); if (Use["FDA_GA"]) histFDAGA_signal = new TH1F( "MVA_FDA_GA_signal", "MVA_FDA_GA_signal", nbin, 0., 1.1 ); if (Use["PDEFoam"]) histPDEFoam_signal = new TH1F( "MVA_PDEFoam_signal", "MVA_PDEFoam_signal", nbin, 0., 1.1 ); TFile *input(0); TString fname = "./tmva_example_multiple_background.root"; if (!gSystem->AccessPathName( fname )) { input = TFile::Open( fname ); // check if file in local directory exists } if (!input) { std::cout << "ERROR: could not open data file, please generate example data first!" << std::endl; exit(1); } std::cout << "--- TMVAMulticlassApp : Using input file: " << input->GetName() << std::endl; // prepare the tree // - here the variable names have to corresponds to your tree // - you can use the same variables as above which is slightly faster, // but of course you can use different ones and copy the values inside the event loop TTree* theTree = (TTree*)input->Get("TreeS"); std::cout << "--- Select signal sample" << std::endl; theTree->SetBranchAddress( "var1", &var1 ); theTree->SetBranchAddress( "var2", &var2 ); theTree->SetBranchAddress( "var3", &var3 ); theTree->SetBranchAddress( "var4", &var4 ); std::cout << "--- Processing: " << theTree->GetEntries() << " events" << std::endl; TStopwatch sw; sw.Start(); for (Long64_t ievt=0; ievt<theTree->GetEntries();ievt++) { if (ievt%1000 == 0){ std::cout << "--- ... Processing event: " << ievt << std::endl; } theTree->GetEntry(ievt); if (Use["MLP"]) histMLP_signal->Fill((reader->EvaluateMulticlass( "MLP method" ))[0]); if (Use["BDTG"]) histBDTG_signal->Fill((reader->EvaluateMulticlass( "BDTG method" ))[0]); if (Use["FDA_GA"]) histFDAGA_signal->Fill((reader->EvaluateMulticlass( "FDA_GA method" ))[0]); if (Use["PDEFoam"]) histPDEFoam_signal->Fill((reader->EvaluateMulticlass( "PDEFoam method" ))[0]); } // get elapsed time sw.Stop(); std::cout << "--- End of event loop: "; sw.Print(); TFile *target = new TFile( "TMVAMulticlassApp.root","RECREATE" ); if (Use["MLP"]) histMLP_signal->Write(); if (Use["BDTG"]) histBDTG_signal->Write(); if (Use["FDA_GA"]) histFDAGA_signal->Write(); if (Use["PDEFoam"]) histPDEFoam_signal->Write(); target->Close(); std::cout << "--- Created root file: \"TMVMulticlassApp.root\" containing the MVA output histograms" << std::endl; delete reader; std::cout << "==> TMVAClassificationApplication is done!" << std::endl << std::endl; }
int main(int argc, char** argv) { if(argc != 2) { std::cerr << ">>>>> analysis.cpp::usage: " << argv[0] << " configFileName" << std::endl ; return 1; } // Parse the config file parseConfigFile (argv[1]) ; std::string treeName = gConfigParser -> readStringOption("Input::treeName"); std::string fileSamples = gConfigParser -> readStringOption("Input::fileSamples"); std::string inputDirectory = gConfigParser -> readStringOption("Input::inputDirectory"); std::string inputBeginningFile = "out_NtupleProducer_"; try { inputBeginningFile = gConfigParser -> readStringOption("Input::inputBeginningFile"); } catch (char const* exceptionString){ std::cerr << " exception = " << exceptionString << std::endl; } std::cout << ">>>>> Input::inputBeginningFile " << inputBeginningFile << std::endl; //==== list of methods std::string MVADirectory = gConfigParser -> readStringOption ("Options::MVADirectory"); std::vector<std::string> vectorMyMethodList = gConfigParser -> readStringListOption("Options::MVAmethods"); std::vector<std::string> vectorMyMethodMassList = gConfigParser -> readStringListOption("Options::MVAmethodsMass"); std::string outputDirectory = gConfigParser -> readStringOption("Output::outputDirectory"); //---- variables float jetpt1; float jetpt2; float mjj; float detajj; float dphilljetjet; float pt1; float pt2; float mll; float dphill; float mth; float dphillmet; float mpmet; float channel; //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TFile* outputRootFile[500]; TTree* cloneTreeJetLepVect[500]; TTree *treeJetLepVect[500]; TFile* file[500]; char *nameSample[1000]; char *nameHumanReadable[1000]; char* xsectionName[1000]; char nameFileIn[1000]; sprintf(nameFileIn,"%s",fileSamples.c_str()); int numberOfSamples = ReadFile(nameFileIn, nameSample, nameHumanReadable, xsectionName); double Normalization[1000]; double xsection[1000]; for (int iSample=0; iSample<numberOfSamples; iSample++){ char nameFile[20000]; sprintf(nameFile,"%s/%s%s.root",inputDirectory.c_str(),inputBeginningFile.c_str(),nameSample[iSample]); file[iSample] = new TFile(nameFile, "READ"); treeJetLepVect[iSample] = (TTree*) file[iSample]->Get(treeName.c_str()); char nameTreeJetLep[100]; sprintf(nameTreeJetLep,"treeJetLep_%d",iSample); treeJetLepVect[iSample]->SetName(nameTreeJetLep); treeJetLepVect[iSample] -> SetBranchAddress("jetpt1", &jetpt1); treeJetLepVect[iSample] -> SetBranchAddress("jetpt2", &jetpt2); treeJetLepVect[iSample] -> SetBranchAddress("mjj", &mjj); treeJetLepVect[iSample] -> SetBranchAddress("detajj", &detajj); treeJetLepVect[iSample] -> SetBranchAddress("dphilljetjet", &dphilljetjet); treeJetLepVect[iSample] -> SetBranchAddress("pt1", &pt1); treeJetLepVect[iSample] -> SetBranchAddress("pt2", &pt2); treeJetLepVect[iSample] -> SetBranchAddress("mll", &mll); treeJetLepVect[iSample] -> SetBranchAddress("dphill", &dphill); treeJetLepVect[iSample] -> SetBranchAddress("mth", &mth); treeJetLepVect[iSample] -> SetBranchAddress("dphillmet", &dphillmet); treeJetLepVect[iSample] -> SetBranchAddress("mpmet", &mpmet); treeJetLepVect[iSample] -> SetBranchAddress("channel", &channel); sprintf(nameFile,"%s/%s%s.root",outputDirectory.c_str(),inputBeginningFile.c_str(),nameSample[iSample]); outputRootFile[iSample] = new TFile ( nameFile, "RECREATE") ; outputRootFile[iSample] -> cd () ; cloneTreeJetLepVect[iSample] = treeJetLepVect[iSample] -> CloneTree (0) ; } /** * cycle on MVA (method-mass) * * cycle on samples * * * cycle on events */ for (int iMVA = 0; iMVA < vectorMyMethodList.size(); iMVA++) { std::cout << " vectorMyMethodList[" << iMVA << "] = " << vectorMyMethodList.at(iMVA) << std::endl; TString myMethodList = Form ("%s",vectorMyMethodList.at(iMVA).c_str()); for (int iMVAMass = 0; iMVAMass < vectorMyMethodMassList.size(); iMVAMass++) { std::cout << " vectorMyMethodMassList[" << iMVAMass << "] = " << vectorMyMethodMassList.at(iMVAMass) << std::endl; TMVA::Reader *TMVAreader = new TMVA::Reader( "!Color:!Silent" ); // TMVAreader->AddVariable("jetpt1", &jetpt1); // TMVAreader->AddVariable("jetpt2", &jetpt2); // TMVAreader->AddVariable("mjj", &mjj); // TMVAreader->AddVariable("detajj", &detajj); // TMVAreader->AddVariable("dphilljetjet", &dphilljetjet); // TMVAreader->AddVariable("pt1", &pt1); // TMVAreader->AddVariable("pt2", &pt2); // TMVAreader->AddVariable("mll", &mll); // TMVAreader->AddVariable("dphill", &dphill); // TMVAreader->AddVariable("mth", &mth); // TMVAreader->AddVariable("dphillmet", &dphillmet); // TMVAreader->AddVariable("mpmet", &mpmet); Float_t input_variables[1000]; // float input_variables[1000]; // TMVAreader->AddVariable("jetpt1", &(input_variables[0])); // TMVAreader->AddVariable("jetpt2", &(input_variables[1])); // TMVAreader->AddVariable("mjj", &(input_variables[2])); // TMVAreader->AddVariable("detajj", &(input_variables[3])); // TMVAreader->AddVariable("dphilljetjet", &(input_variables[4])); // TMVAreader->AddVariable("pt1", &(input_variables[5])); // TMVAreader->AddVariable("pt2", &(input_variables[6])); // TMVAreader->AddVariable("mll", &(input_variables[7])); // TMVAreader->AddVariable("dphill", &(input_variables[8])); // TMVAreader->AddVariable("mth", &(input_variables[9])); // TMVAreader->AddVariable("dphillmet", &(input_variables[10])); // TMVAreader->AddVariable("mpmet", &(input_variables[11])); TMVAreader->AddVariable("jetpt1", &input_variables[0]); TMVAreader->AddVariable("jetpt2", &input_variables[1]); TMVAreader->AddVariable("mjj", &input_variables[2]); TMVAreader->AddVariable("detajj", &input_variables[3]); TMVAreader->AddVariable("dphilljetjet", &input_variables[4]); TMVAreader->AddVariable("pt1", &input_variables[5]); TMVAreader->AddVariable("pt2", &input_variables[6]); TMVAreader->AddVariable("mll", &input_variables[7]); TMVAreader->AddVariable("dphill", &input_variables[8]); TMVAreader->AddVariable("mth", &input_variables[9]); TMVAreader->AddVariable("dphillmet", &input_variables[10]); TMVAreader->AddVariable("mpmet", &input_variables[11]); TMVAreader->AddSpectator("channel", &input_variables[12]); TString myMethodMassList = Form ("%s",vectorMyMethodMassList.at(iMVAMass).c_str()); TString weightfile = Form ("%s/weights_%s_testVariables/TMVAMulticlass_%s.weights.xml",MVADirectory.c_str(),myMethodMassList.Data(),myMethodList.Data()); std::cout << " myMethodList = " << myMethodList.Data() << std::endl; std::cout << " weightfile = " << weightfile.Data() << std::endl; // TString myMethodListBook = Form ("%s",vectorMyMethodList.at(iMVA).c_str()); // TMVAreader->BookMVA( myMethodListBook, weightfile ); TMVAreader->BookMVA( myMethodList, weightfile ); for (int iSample=0; iSample<numberOfSamples; iSample++){ std::cout << " iSample = " << iSample << " :: " << numberOfSamples << std::endl; file[iSample] -> cd(); Double_t MVA_Value; TBranch *newBranch; TString methodName4Tree = Form ("%s_%s_MVAHiggs",myMethodList.Data(),myMethodMassList.Data()); TString methodName4Tree2 = Form ("%s_%s_MVAHiggs/D",myMethodList.Data(),myMethodMassList.Data()); newBranch = cloneTreeJetLepVect[iSample]->Branch(methodName4Tree,&MVA_Value,methodName4Tree2); // newBranch = treeJetLepVect[iSample]->Branch(methodName4Tree,&MVA_Value,methodName4Tree2); ///==== loop ==== Long64_t nentries = treeJetLepVect[iSample]->GetEntries(); for (Long64_t iEntry = 0; iEntry < nentries; iEntry++){ if((iEntry%1000) == 0) std::cout << ">>>>> analysis::GetEntry " << iEntry << " : " << nentries << std::endl; treeJetLepVect[iSample]->GetEntry(iEntry); input_variables[0] = static_cast<Float_t>(jetpt1); input_variables[1] = static_cast<Float_t>(jetpt2); input_variables[2] = static_cast<Float_t>(mjj); input_variables[3] = static_cast<Float_t>(detajj); input_variables[4] = static_cast<Float_t>(dphilljetjet); input_variables[5] = static_cast<Float_t>(pt1); input_variables[6] = static_cast<Float_t>(pt2); input_variables[7] = static_cast<Float_t>(mll); input_variables[8] = static_cast<Float_t>(dphill); input_variables[9] = static_cast<Float_t>(mth); input_variables[10] = static_cast<Float_t>(dphillmet); input_variables[11] = static_cast<Float_t>(mpmet); input_variables[12] = static_cast<Float_t>(channel); int num = TMVAreader->EvaluateMulticlass(myMethodList).size(); double max = -1e9; double tempmax; int numsel = -1; for (int inum = 0; inum<(num-2); inum++) { // il -2 è dovuto a Sig e Bkg che mi salva il training! Uffi! tempmax = (TMVAreader->EvaluateMulticlass(myMethodList))[inum]; if (tempmax > max) { max = tempmax; numsel = inum; } } MVA_Value = max + 3*numsel; // newBranch -> Fill(); cloneTreeJetLepVect[iSample] -> Fill () ; } } } } for (int iSample=0; iSample<numberOfSamples; iSample++){ // save only the new version of the tree // treeJetLepVect[iSample]->Write("", TObject::kOverwrite); cloneTreeJetLepVect[iSample] -> SetName (treeName.c_str()); cloneTreeJetLepVect[iSample] -> AutoSave () ; outputRootFile[iSample] -> Close () ; } }
int testPyGTBMulticlass(){ // Get data file std::cout << "Get test data..." << std::endl; TString fname = "./tmva_example_multiple_background.root"; if (gSystem->AccessPathName(fname)){ // file does not exist in local directory std::cout << "Create multiclass test data..." << std::endl; TString createDataMacro = TString(gROOT->GetTutorialsDir()) + "/tmva/createData.C"; gROOT->ProcessLine(TString::Format(".L %s",createDataMacro.Data())); gROOT->ProcessLine("create_MultipleBackground(200)"); std::cout << "Created " << fname << " for tests of the multiclass features" << std::endl; } TFile *input = TFile::Open(fname); // Setup PyMVA and factory std::cout << "Setup TMVA..." << std::endl; TMVA::PyMethodBase::PyInitialize(); TFile* outputFile = TFile::Open("ResultsTestPyGTBMulticlass.root", "RECREATE"); TMVA::Factory *factory = new TMVA::Factory("testPyGTBMulticlass", outputFile, "!V:Silent:Color:!DrawProgressBar:AnalysisType=multiclass"); // Load data TMVA::DataLoader *dataloader = new TMVA::DataLoader("datasetTestPyGTBMulticlass"); TTree *signal = (TTree*)input->Get("TreeS"); TTree *background0 = (TTree*)input->Get("TreeB0"); TTree *background1 = (TTree*)input->Get("TreeB1"); TTree *background2 = (TTree*)input->Get("TreeB2"); dataloader->AddTree(signal, "Signal"); dataloader->AddTree(background0, "Background_0"); dataloader->AddTree(background1, "Background_1"); dataloader->AddTree(background2, "Background_2"); dataloader->AddVariable("var1"); dataloader->AddVariable("var2"); dataloader->AddVariable("var3"); dataloader->AddVariable("var4"); dataloader->PrepareTrainingAndTestTree("", "SplitMode=Random:NormMode=NumEvents:!V"); // Book and train method factory->BookMethod(dataloader, TMVA::Types::kPyGTB, "PyGTB", "!H:!V:VarTransform=None:NEstimators=100:Verbose=0"); std::cout << "Train classifier..." << std::endl; factory->TrainAllMethods(); // Clean-up delete factory; delete dataloader; delete outputFile; // Setup reader UInt_t numEvents = 100; std::cout << "Run reader and classify " << numEvents << " events..." << std::endl; TMVA::Reader *reader = new TMVA::Reader("!Color:Silent"); Float_t vars[4]; reader->AddVariable("var1", vars+0); reader->AddVariable("var2", vars+1); reader->AddVariable("var3", vars+2); reader->AddVariable("var4", vars+3); reader->BookMVA("PyGTB", "datasetTestPyGTBMulticlass/weights/testPyGTBMulticlass_PyGTB.weights.xml"); // Get mean response of method on signal and background events signal->SetBranchAddress("var1", vars+0); signal->SetBranchAddress("var2", vars+1); signal->SetBranchAddress("var3", vars+2); signal->SetBranchAddress("var4", vars+3); background0->SetBranchAddress("var1", vars+0); background0->SetBranchAddress("var2", vars+1); background0->SetBranchAddress("var3", vars+2); background0->SetBranchAddress("var4", vars+3); background1->SetBranchAddress("var1", vars+0); background1->SetBranchAddress("var2", vars+1); background1->SetBranchAddress("var3", vars+2); background1->SetBranchAddress("var4", vars+3); background2->SetBranchAddress("var1", vars+0); background2->SetBranchAddress("var2", vars+1); background2->SetBranchAddress("var3", vars+2); background2->SetBranchAddress("var4", vars+3); Float_t meanMvaSignal = 0; Float_t meanMvaBackground0 = 0; Float_t meanMvaBackground1 = 0; Float_t meanMvaBackground2 = 0; for(UInt_t i=0; i<numEvents; i++){ signal->GetEntry(i); meanMvaSignal += reader->EvaluateMulticlass("PyGTB")[0]; background0->GetEntry(i); meanMvaBackground0 += reader->EvaluateMulticlass("PyGTB")[1]; background1->GetEntry(i); meanMvaBackground1 += reader->EvaluateMulticlass("PyGTB")[2]; background2->GetEntry(i); meanMvaBackground2 += reader->EvaluateMulticlass("PyGTB")[3]; } meanMvaSignal = meanMvaSignal/float(numEvents); meanMvaBackground0 = meanMvaBackground0/float(numEvents); meanMvaBackground1 = meanMvaBackground1/float(numEvents); meanMvaBackground2 = meanMvaBackground2/float(numEvents); // Check whether the response is obviously better than guessing std::cout << "Mean MVA response on signal: " << meanMvaSignal << std::endl; if(meanMvaSignal < 0.3){ std::cout << "[ERROR] Mean response on signal is " << meanMvaSignal << " (<0.3)" << std::endl; return 1; } std::cout << "Mean MVA response on background 0: " << meanMvaBackground0 << std::endl; if(meanMvaBackground0 < 0.3){ std::cout << "[ERROR] Mean response on background 0 is " << meanMvaBackground0 << " (<0.3)" << std::endl; return 1; } std::cout << "Mean MVA response on background 1: " << meanMvaBackground1 << std::endl; if(meanMvaBackground0 < 0.3){ std::cout << "[ERROR] Mean response on background 1 is " << meanMvaBackground1 << " (<0.3)" << std::endl; return 1; } std::cout << "Mean MVA response on background 2: " << meanMvaBackground2 << std::endl; if(meanMvaBackground0 < 0.3){ std::cout << "[ERROR] Mean response on background 2 is " << meanMvaBackground2 << " (<0.3)" << std::endl; return 1; } return 0; }