void PlotDecisionBoundary( TString weightFile = "weights/TMVAClassification_BDT.weights.xml",TString v0="var0", TString v1="var1", TString dataFileName = "/home/hvoss/TMVA/TMVA_data/data/data_circ.root") { //--------------------------------------------------------------- // default MVA methods to be trained + tested // this loads the library TMVA::Tools::Instance(); std::cout << std::endl; std::cout << "==> Start TMVAClassificationApplication" << std::endl; // // create the Reader object // TMVA::Reader *reader = new TMVA::Reader( "!Color:!Silent" ); // create a set of variables and declare them to the reader // - the variable names must corresponds in name and type to // those given in the weight file(s) that you use Float_t var0, var1; reader->AddVariable( v0, &var0 ); reader->AddVariable( v1, &var1 ); // // book the MVA method // reader->BookMVA( "M1", weightFile ); TFile *f = new TFile(dataFileName); TTree *signal = (TTree*)f->Get("TreeS"); TTree *background = (TTree*)f->Get("TreeB"); //Declaration of leaves types Float_t svar0; Float_t svar1; Float_t bvar0; Float_t bvar1; Float_t sWeight=1.0; // just in case you have weight defined, also set these branchaddresses Float_t bWeight=1.0*signal->GetEntries()/background->GetEntries(); // just in case you have weight defined, also set these branchaddresses // Set branch addresses. signal->SetBranchAddress(v0,&svar0); signal->SetBranchAddress(v1,&svar1); background->SetBranchAddress(v0,&bvar0); background->SetBranchAddress(v1,&bvar1); UInt_t nbin = 50; Float_t xmax = signal->GetMaximum(v0.Data()); Float_t xmin = signal->GetMinimum(v0.Data()); Float_t ymax = signal->GetMaximum(v1.Data()); Float_t ymin = signal->GetMinimum(v1.Data()); xmax = TMath::Max(xmax,background->GetMaximum(v0.Data())); xmin = TMath::Min(xmin,background->GetMinimum(v0.Data())); ymax = TMath::Max(ymax,background->GetMaximum(v1.Data())); ymin = TMath::Min(ymin,background->GetMinimum(v1.Data())); TH2D *hs=new TH2D("hs","",nbin,xmin,xmax,nbin,ymin,ymax); TH2D *hb=new TH2D("hb","",nbin,xmin,xmax,nbin,ymin,ymax); hs->SetXTitle(v0); hs->SetYTitle(v1); hb->SetXTitle(v0); hb->SetYTitle(v1); hs->SetMarkerColor(4); hb->SetMarkerColor(2); TH2F * hist = new TH2F( "MVA", "MVA", nbin,xmin,xmax,nbin,ymin,ymax); // Prepare input tree (this must be replaced by your data source) // in this example, there is a toy tree with signal and one with background events // we'll later on use only the "signal" events for the test in this example. Float_t MinMVA=10000, MaxMVA=-100000; for (Int_t ibin=1; ibin<nbin+1; ibin++){ for (Int_t jbin=1; jbin<nbin+1; jbin++){ var0 = hs->GetXaxis()->GetBinCenter(ibin); var1 = hs->GetYaxis()->GetBinCenter(jbin); Float_t mvaVal=reader->EvaluateMVA( "M1" ) ; if (MinMVA>mvaVal) MinMVA=mvaVal; if (MaxMVA<mvaVal) MaxMVA=mvaVal; hist->SetBinContent(ibin,jbin, mvaVal); } } // creating a fine histograms containing the error rate const Int_t nValBins=100; Double_t sum = 0.; TH1F *mvaS= new TH1F("mvaS","",nValBins,MinMVA,MaxMVA); TH1F *mvaB= new TH1F("mvaB","",nValBins,MinMVA,MaxMVA); TH1F *mvaSC= new TH1F("mvaSC","",nValBins,MinMVA,MaxMVA); TH1F *mvaBC= new TH1F("mvaBC","",nValBins,MinMVA,MaxMVA); Long64_t nentries; nentries = TreeS->GetEntries(); for (Long64_t is=0; is<nentries;is++) { signal->GetEntry(is); sum +=sWeight; var0 = svar0; var1 = svar1; Float_t mvaVal=reader->EvaluateMVA( "M1" ) ; hs->Fill(svar0,svar1); mvaS->Fill(mvaVal,sWeight); } nentries = TreeB->GetEntries(); for (Long64_t ib=0; ib<nentries;ib++) { background->GetEntry(ib); sum +=bWeight; var0 = bvar0; var1 = bvar1; Float_t mvaVal=reader->EvaluateMVA( "M1" ) ; hb->Fill(bvar0,bvar1); mvaB->Fill(mvaVal,bWeight); } //SeparationBase *sepGain = new MisClassificationError(); //SeparationBase *sepGain = new GiniIndex(); SeparationBase *sepGain = new CrossEntropy(); Double_t sTot = mvaS->GetSum(); Double_t bTot = mvaB->GetSum(); mvaSC->SetBinContent(1,mvaS->GetBinContent(1)); mvaBC->SetBinContent(1,mvaB->GetBinContent(1)); Double_t sSel=mvaSC->GetBinContent(1); Double_t bSel=mvaBC->GetBinContent(1); Double_t separationGain=sepGain->GetSeparationGain(sSel,bSel,sTot,bTot); Double_t mvaCut=mvaSC->GetBinCenter(1); Double_t mvaCutOrientation=1; // 1 if mva > mvaCut --> Signal and -1 if mva < mvaCut (i.e. mva*-1 > mvaCut*-1) --> Signal for (Int_t ibin=2;ibin<nValBins;ibin++){ mvaSC->SetBinContent(ibin,mvaS->GetBinContent(ibin)+mvaSC->GetBinContent(ibin-1)); mvaBC->SetBinContent(ibin,mvaB->GetBinContent(ibin)+mvaBC->GetBinContent(ibin-1)); sSel=mvaSC->GetBinContent(ibin); bSel=mvaBC->GetBinContent(ibin); if (separationGain < sepGain->GetSeparationGain(sSel,bSel,sTot,bTot) && mvaSC->GetBinCenter(ibin)<0){ separationGain = sepGain->GetSeparationGain(sSel,bSel,sTot,bTot); mvaCut=mvaSC->GetBinCenter(ibin); if (sSel/bSel > (sTot-sSel)/(bTot-bSel)) mvaCutOrientation=-1; else mvaCutOrientation=1; } } cout << "Min="<<MinMVA << " Max=" << MaxMVA << " sTot=" << sTot << " bTot=" << bTot << " sepGain="<<separationGain << " cut=" << mvaCut << " cutOrientation="<<mvaCutOrientation << endl; delete reader; gStyle->SetPalette(1); plot(hs,hb,hist ,v0,v1,mvaCut); TCanvas *cm=new TCanvas ("cm","",900,1200); cm->cd(); cm->Divide(1,2); cm->cd(1); mvaS->SetLineColor(4); mvaB->SetLineColor(2); mvaS->Draw(); mvaB->Draw("same"); cm->cd(2); mvaSC->SetLineColor(4); mvaBC->SetLineColor(2); mvaBC->Draw(); mvaSC->Draw("same"); // TH1F *add=(TH1F*)mvaBC->Clone("add"); // add->Add(mvaSC); // add->Draw(); // errh->Draw("same"); // // write histograms // TFile *target = new TFile( "TMVAPlotDecisionBoundary.root","RECREATE" ); hs->Write(); hb->Write(); hist->Write(); target->Close(); }
void PlotDecisionBoundary( TString weightFile = "weights/Zprime_vs_QCD_TMVAClassification_BDT.weights.xml",TString v0="lep_pt_ljet", TString v1="met_pt", TString dataFileNameS = "/nfs/dust/cms/user/karavdia/ttbar_semilep_13TeV/RunII_25ns_v1/test_03/uhh2.AnalysisModuleRunner.MC.Zp01w3000.root", TString dataFileNameB = "/nfs/dust/cms/user/karavdia/ttbar_semilep_13TeV/RunII_25ns_v1/test_03/uhh2.AnalysisModuleRunner.MC.QCD_EMEnriched.root") { //--------------------------------------------------------------- // default MVA methods to be trained + tested // this loads the library TMVA::Tools::Instance(); std::cout << std::endl; std::cout << "==> Start TMVAClassificationApplication" << std::endl; // // create the Reader object // TMVA::Reader *reader = new TMVA::Reader( "!Color:!Silent" ); // create a set of variables and declare them to the reader // - the variable names must corresponds in name and type to // those given in the weight file(s) that you use // Float_t var0, var1; // reader->AddVariable( v0, &var0 ); // reader->AddVariable( v1, &var1 ); Float_t lep_pt, lep_fbrem, MwT; Float_t log_ljet_pt, log_met_pt, log_lep_pt_ljet; Float_t log_dR_lep_cljet, log_dR_cljet_ljet; Float_t dPhi_lep_cljet; reader->AddVariable("lep_pt", &lep_pt); reader->AddVariable("lep_fbrem", & lep_fbrem); reader->AddVariable("MwT", &MwT); reader->AddVariable("log(ljet_pt)", &log_ljet_pt); reader->AddVariable("log(met_pt)",&log_met_pt); reader->AddVariable("log(lep_pt_ljet)",&log_lep_pt_ljet); reader->AddVariable("log(dR_lep_cljet)",&log_dR_lep_cljet_trans); reader->AddVariable("log(fabs((dR_cljet_ljet-3.14)/3.14))", &log_dR_cljet_ljet); reader->AddSpectator("dPhi_lep_cljet", &dPhi_lep_cljet); // // book the MVA method // reader->BookMVA( "BDT", weightFile ); TFile *fS = new TFile(dataFileNameS); TTree *signal = (TTree*)fS->Get("AnalysisTree"); TFile *fB = new TFile(dataFileNameS); TTree *background = (TTree*)fB->Get("AnalysisTree"); //Declaration of leaves types Float_t svar0; Float_t svar1; Float_t bvar0; Float_t bvar1; Float_t sWeight=1.0; // just in case you have weight defined, also set these branchaddresses Float_t bWeight=1.0*signal->GetEntries()/background->GetEntries(); // just in case you have weight defined, also set these branchaddresses // Set branch addresses. signal->SetBranchAddress(v0,&svar0); signal->SetBranchAddress(v1,&svar1); background->SetBranchAddress(v0,&bvar0); background->SetBranchAddress(v1,&bvar1); UInt_t nbin = 50; Float_t xmax = signal->GetMaximum(v0.Data()); Float_t xmin = signal->GetMinimum(v0.Data()); Float_t ymax = signal->GetMaximum(v1.Data()); Float_t ymin = signal->GetMinimum(v1.Data()); xmax = TMath::Max(xmax,(Float_t)background->GetMaximum(v0.Data())); xmin = TMath::Min(xmin,(Float_t)background->GetMinimum(v0.Data())); ymax = TMath::Max(ymax,(Float_t)background->GetMaximum(v1.Data())); ymin = TMath::Min(ymin,(Float_t)background->GetMinimum(v1.Data())); TH2D *hs=new TH2D("hs","",nbin,xmin,xmax,nbin,ymin,ymax); TH2D *hb=new TH2D("hb","",nbin,xmin,xmax,nbin,ymin,ymax); hs->SetXTitle(v0); hs->SetYTitle(v1); hb->SetXTitle(v0); hb->SetYTitle(v1); hs->SetMarkerColor(4); hb->SetMarkerColor(2); TH2F * hist = new TH2F( "MVA", "MVA", nbin,xmin,xmax,nbin,ymin,ymax); // Prepare input tree (this must be replaced by your data source) // in this example, there is a toy tree with signal and one with background events // we'll later on use only the "signal" events for the test in this example. Float_t MinMVA=10000, MaxMVA=-100000; for (UInt_t ibin=1; ibin<nbin+1; ibin++){ for (UInt_t jbin=1; jbin<nbin+1; jbin++){ var0 = hs->GetXaxis()->GetBinCenter(ibin); var1 = hs->GetYaxis()->GetBinCenter(jbin); Float_t mvaVal=reader->EvaluateMVA( "BDT" ) ; if (MinMVA>mvaVal) MinMVA=mvaVal; if (MaxMVA<mvaVal) MaxMVA=mvaVal; hist->SetBinContent(ibin,jbin, mvaVal); } } // now you need to try to find the MVA-value at which you cut for the plotting of the decision boundary // (Use the smallest number of misclassifications as criterion) const Int_t nValBins=100; Double_t sum = 0.; TH1F *mvaS= new TH1F("mvaS","",nValBins,MinMVA,MaxMVA); mvaS->SetXTitle("MVA-ouput"); mvaS->SetYTitle("#entries"); TH1F *mvaB= new TH1F("mvaB","",nValBins,MinMVA,MaxMVA); mvaB->SetXTitle("MVA-ouput"); mvaB->SetYTitle("#entries"); TH1F *mvaSC= new TH1F("mvaSC","",nValBins,MinMVA,MaxMVA); mvaSC->SetXTitle("MVA-ouput"); mvaSC->SetYTitle("cummulation"); TH1F *mvaBC= new TH1F("mvaBC","",nValBins,MinMVA,MaxMVA); mvaBC->SetXTitle("MVA-ouput"); mvaBC->SetYTitle("cummulation"); Long64_t nentries; nentries = signal->GetEntries(); for (Long64_t is=0; is<nentries;is++) { signal->GetEntry(is); sum +=sWeight; var0 = svar0; var1 = svar1; Float_t mvaVal=reader->EvaluateMVA( "BDT" ) ; hs->Fill(svar0,svar1); mvaS->Fill(mvaVal,sWeight); } nentries = background->GetEntries(); for (Long64_t ib=0; ib<nentries;ib++) { background->GetEntry(ib); sum +=bWeight; var0 = bvar0; var1 = bvar1; Float_t mvaVal=reader->EvaluateMVA( "BDT" ) ; hb->Fill(bvar0,bvar1); mvaB->Fill(mvaVal,bWeight); } SeparationBase *sepGain = new MisClassificationError(); //SeparationBase *sepGain = new GiniIndex(); //SeparationBase *sepGain = new CrossEntropy(); Double_t sTot = mvaS->GetSum(); Double_t bTot = mvaB->GetSum(); mvaSC->SetBinContent(1,mvaS->GetBinContent(1)); mvaBC->SetBinContent(1,mvaB->GetBinContent(1)); Double_t sSel=mvaSC->GetBinContent(1); Double_t bSel=mvaBC->GetBinContent(1); Double_t sSelBest=0; Double_t bSelBest=0; Double_t separationGain=sepGain->GetSeparationGain(sSel,bSel,sTot,bTot); Double_t mvaCut=mvaSC->GetBinCenter(1); Double_t mvaCutOrientation=1; // 1 if mva > mvaCut --> Signal and -1 if mva < mvaCut (i.e. mva*-1 > mvaCut*-1) --> Signal for (UInt_t ibin=2;ibin<nValBins;ibin++){ mvaSC->SetBinContent(ibin,mvaS->GetBinContent(ibin)+mvaSC->GetBinContent(ibin-1)); mvaBC->SetBinContent(ibin,mvaB->GetBinContent(ibin)+mvaBC->GetBinContent(ibin-1)); sSel=mvaSC->GetBinContent(ibin); bSel=mvaBC->GetBinContent(ibin); if (separationGain < sepGain->GetSeparationGain(sSel,bSel,sTot,bTot)){ separationGain = sepGain->GetSeparationGain(sSel,bSel,sTot,bTot); mvaCut=mvaSC->GetBinCenter(ibin); if (sSel/bSel > (sTot-sSel)/(bTot-bSel)) mvaCutOrientation=-1; else mvaCutOrientation=1; sSelBest=sSel; bSelBest=bSel; } } cout << "Min="<<MinMVA << " Max=" << MaxMVA << " sTot=" << sTot << " bTot=" << bTot << " sSel=" << sSelBest << " bSel=" << bSelBest << " sepGain="<<separationGain << " cut=" << mvaCut << " cutOrientation="<<mvaCutOrientation << endl; delete reader; gStyle->SetPalette(1); plot(hs,hb,hist ,v0,v1,mvaCut); TCanvas *cm=new TCanvas ("cm","",900,1200); cm->cd(); cm->Divide(1,2); cm->cd(1); mvaS->SetLineColor(4); mvaB->SetLineColor(2); mvaS->Draw(); mvaB->Draw("same"); cm->cd(2); mvaSC->SetLineColor(4); mvaBC->SetLineColor(2); mvaBC->Draw(); mvaSC->Draw("same"); // TH1F *add=(TH1F*)mvaBC->Clone("add"); // add->Add(mvaSC); // add->Draw(); // errh->Draw("same"); // // write histograms // TFile *target = new TFile( "TMVAPlotDecisionBoundary.root","RECREATE" ); hs->Write(); hb->Write(); hist->Write(); target->Close(); }