//////////////////////////////////////////////////////////////////////////////// /// Main /// //////////////////////////////////////////////////////////////////////////////// void GrowTree(TString process, std::string regMethod="BDTG", Long64_t beginEntry=0, Long64_t endEntry=-1) { gROOT->SetBatch(1); TH1::SetDefaultSumw2(1); gROOT->LoadMacro("HelperFunctions.h"); //< make functions visible to TTreeFormula if (!TString(gROOT->GetVersion()).Contains("5.34")) { std::cout << "INCORRECT ROOT VERSION! Please use 5.34:" << std::endl; std::cout << "source /uscmst1/prod/sw/cms/slc5_amd64_gcc462/lcg/root/5.34.02-cms/bin/thisroot.csh" << std::endl; std::cout << "Return without doing anything." << std::endl; return; } const TString indir = "/afs/cern.ch/work/d/degrutto/public/MiniAOD/ZnnHbb_Phys14_PU20bx25/skimV11/"; const TString outdir = "/afs/cern.ch/work/d/degrutto/public/MiniAOD/ZnnHbb_Phys14_PU20bx25/skimV11/step3/"; const TString prefix = "skim_"; const TString suffix = ".root"; TFile *input = TFile::Open(indir + prefix + process + suffix); if (!input) { std::cout << "ERROR: Could not open input file." << std::endl; exit(1); } /// Make output directory if it doesn't exist if (gSystem->AccessPathName(outdir)) gSystem->mkdir(outdir); std::cout << "--- GrowTree : Using input file: " << input->GetName() << std::endl; TTree *inTree = (TTree *) input->Get("tree"); TH1F *hcount = (TH1F *) input->Get("Count"); TFile *output(0); if (beginEntry == 0 && endEntry == -1) output = TFile::Open(outdir + "Step3_" + process + suffix, "RECREATE"); else output = TFile::Open(outdir + "Step3_" + process + TString::Format("_%Li_%Li", beginEntry, endEntry) + suffix, "RECREATE"); TTree *outTree = inTree->CloneTree(0); // Do no copy the data yet /// The clone should not delete any shared i/o buffers. ResetDeleteBranches(outTree); ///-- Set branch addresses ------------------------------------------------- EventInfo EVENT; double hJet_pt[MAXJ], hJet_eta[MAXJ], hJet_phi[MAXJ], hJet_m[MAXJ], hJet_ptRaw[MAXJ], hJet_genPt[MAXJ]; int hJCidx[2]; inTree->SetBranchStatus("*", 1); inTree->SetBranchStatus("hJCidx",1); inTree->SetBranchStatus("Jet_*",1); inTree->SetBranchAddress("hJCidx", &hJCidx); inTree->SetBranchAddress("Jet_pt", &hJet_pt); inTree->SetBranchAddress("Jet_eta", &hJet_eta); inTree->SetBranchAddress("Jet_phi", &hJet_phi); inTree->SetBranchAddress("Jet_mass", &hJet_m); inTree->SetBranchAddress("Jet_rawPt", &hJet_ptRaw); inTree->SetBranchAddress("Jet_mcPt", &hJet_genPt); ///-- Make new branches ---------------------------------------------------- int EVENT_run, EVENT_event; // set these as TTree index? float lumi_ = lumi, efflumi, efflumi_old, efflumi_UEPS_up, efflumi_UEPS_down; float hJet_ptReg[2]; float HptNorm, HptGen, HptReg; float HmassNorm, HmassGen, HmassReg; outTree->Branch("EVENT_run", &EVENT_run, "EVENT_run/I"); outTree->Branch("EVENT_event", &EVENT_event, "EVENT_event/I"); outTree->Branch("lumi", &lumi_, "lumi/F"); outTree->Branch("efflumi", &efflumi, "efflumi/F"); outTree->Branch("efflumi_old", &efflumi_old, "efflumi_old/F"); outTree->Branch("efflumi_UEPS_up", &efflumi_UEPS_up, "efflumi_UEPS_up/F"); outTree->Branch("efflumi_UEPS_down", &efflumi_UEPS_down, "efflumi_UEPS_down/F"); outTree->Branch("hJet_ptReg", &hJet_ptReg, "hJet_ptReg[2]/F"); outTree->Branch("HptNorm", &HptNorm, "HptNorm/F"); outTree->Branch("HptGen", &HptGen, "HptGen/F"); outTree->Branch("HptReg", &HptReg, "HptReg/F"); outTree->Branch("HmassNorm", &HmassNorm, "HmassNorm/F"); outTree->Branch("HmassGen", &HmassGen, "HmassGen/F"); outTree->Branch("HmassReg", &HmassReg, "HmassReg/F"); /// Get effective lumis std::map < std::string, float > efflumis = GetLumis(); efflumi = efflumis[process.Data()]; assert(efflumi > 0); efflumi_old = efflumi; efflumi_UEPS_up = efflumi * hcount->GetBinContent(2) / hcount->GetBinContent(3); efflumi_UEPS_down = efflumi * hcount->GetBinContent(2) / hcount->GetBinContent(4); TTreeFormula* ttf_lheweight = new TTreeFormula("ttf_lheweight", Form("%f", efflumi), inTree); #ifdef STITCH std::map < std::string, std::string > lheweights = GetLHEWeights(); TString process_lhe = process; if (process_lhe.BeginsWith("WJets") && process_lhe != "WJetsHW") process_lhe = "WJets"; else if (process_lhe.BeginsWith("ZJets") && process_lhe != "ZJetsHW") process_lhe = "ZJets"; else process_lhe = ""; TString lheweight = lheweights[process_lhe.Data()]; if (lheweight != "") { delete ttf_lheweight; // Bug fix for ZJetsPtZ100 if (process == "ZJetsPtZ100") lheweight.ReplaceAll("lheV_pt", "999"); std::cout << "BUGFIX: " << lheweight << std::endl; ttf_lheweight = new TTreeFormula("ttf_lheweight", lheweight, inTree); } #endif ttf_lheweight->SetQuickLoad(1); // regression stuff here ///-- Setup TMVA Reader ---------------------------------------------------- TMVA::Tools::Instance(); //< This loads the library TMVA::Reader * reader = new TMVA::Reader("!Color:!Silent"); /// Get the variables const std::vector < std::string > & inputExpressionsReg = GetInputExpressionsReg(); const UInt_t nvars = inputExpressionsReg.size(); Float_t readerVars[nvars]; int idx_rawpt = -1, idx_pt = -1, idx_et = -1, idx_mt = -1; for (UInt_t iexpr = 0; iexpr < nvars; iexpr++) { const TString& expr = inputExpressionsReg.at(iexpr); reader->AddVariable(expr, &readerVars[iexpr]); if (expr.BeginsWith("breg_rawptJER := ")) idx_rawpt = iexpr; else if (expr.BeginsWith("breg_pt := ")) idx_pt = iexpr; else if (expr.BeginsWith("breg_et := ")) idx_et = iexpr; else if (expr.BeginsWith("breg_mt := ")) idx_mt = iexpr; } // assert(idx_rawpt!=-1 && idx_pt!=-1 && idx_et!=-1 && idx_mt!=-1); assert(idx_rawpt!=-1 && idx_pt!=-1 ); /// Setup TMVA regression inputs const std::vector < std::string > & inputExpressionsReg0 = GetInputExpressionsReg0(); const std::vector < std::string > & inputExpressionsReg1 = GetInputExpressionsReg1(); assert(inputExpressionsReg0.size() == nvars); assert(inputExpressionsReg1.size() == nvars); /// Load TMVA weights TString weightdir = "weights/"; TString weightfile = weightdir + "TMVARegression_" + regMethod + ".testweights.xml"; reader->BookMVA(regMethod + " method", weightfile); TStopwatch sw; sw.Start(); /// Create TTreeFormulas TTreeFormula *ttf = 0; std::vector < TTreeFormula * >::const_iterator formIt, formItEnd; std::vector < TTreeFormula * > inputFormulasReg0; std::vector < TTreeFormula * > inputFormulasReg1; std::vector < TTreeFormula * > inputFormulasFJReg0; std::vector < TTreeFormula * > inputFormulasFJReg1; std::vector < TTreeFormula * > inputFormulasFJReg2; for (UInt_t iexpr = 0; iexpr < nvars; iexpr++) { ttf = new TTreeFormula(Form("ttfreg%i_0", iexpr), inputExpressionsReg0.at(iexpr).c_str(), inTree); ttf->SetQuickLoad(1); inputFormulasReg0.push_back(ttf); ttf = new TTreeFormula(Form("ttfreg%i_1", iexpr), inputExpressionsReg1.at(iexpr).c_str(), inTree); ttf->SetQuickLoad(1); inputFormulasReg1.push_back(ttf); } ///-- Loop over events ----------------------------------------------------- Int_t curTree = inTree->GetTreeNumber(); const Long64_t nentries = inTree->GetEntries(); if (endEntry < 0) endEntry = nentries; Long64_t ievt = 0; for (ievt=TMath::Max(ievt, beginEntry); ievt<TMath::Min(nentries, endEntry); ievt++) { if (ievt % 2000 == 0) std::cout << "--- ... Processing event: " << ievt << std::endl; const Long64_t local_entry = inTree->LoadTree(ievt); // faster, but only for TTreeFormula if (local_entry < 0) break; inTree->GetEntry(ievt); // same event as received by LoadTree() if (inTree->GetTreeNumber() != curTree) { curTree = inTree->GetTreeNumber(); for (formIt=inputFormulasReg0.begin(), formItEnd=inputFormulasReg0.end(); formIt!=formItEnd; formIt++) (*formIt)->UpdateFormulaLeaves(); // if using TChain for (formIt=inputFormulasReg1.begin(), formItEnd=inputFormulasReg1.end(); formIt!=formItEnd; formIt++) (*formIt)->UpdateFormulaLeaves(); // if using TChain for (formIt=inputFormulasFJReg0.begin(), formItEnd=inputFormulasFJReg0.end(); formIt!=formItEnd; formIt++) (*formIt)->UpdateFormulaLeaves(); // if using TChain for (formIt=inputFormulasFJReg1.begin(), formItEnd=inputFormulasFJReg1.end(); formIt!=formItEnd; formIt++) (*formIt)->UpdateFormulaLeaves(); // if using TChain for (formIt=inputFormulasFJReg2.begin(), formItEnd=inputFormulasFJReg2.end(); formIt!=formItEnd; formIt++) (*formIt)->UpdateFormulaLeaves(); // if using TChain ttf_lheweight->UpdateFormulaLeaves(); } /// These need to be called when arrays of variable size are used in TTree. for (formIt=inputFormulasReg0.begin(), formItEnd=inputFormulasReg0.end(); formIt!=formItEnd; formIt++) (*formIt)->GetNdata(); for (formIt=inputFormulasReg1.begin(), formItEnd=inputFormulasReg1.end(); formIt!=formItEnd; formIt++) (*formIt)->GetNdata(); for (formIt=inputFormulasFJReg0.begin(), formItEnd=inputFormulasFJReg0.end(); formIt!=formItEnd; formIt++) (*formIt)->GetNdata(); for (formIt=inputFormulasFJReg1.begin(), formItEnd=inputFormulasFJReg1.end(); formIt!=formItEnd; formIt++) (*formIt)->GetNdata(); for (formIt=inputFormulasFJReg2.begin(), formItEnd=inputFormulasFJReg2.end(); formIt!=formItEnd; formIt++) (*formIt)->GetNdata(); ttf_lheweight->GetNdata(); /// Fill branches EVENT_run = EVENT.run; EVENT_event = EVENT.event; #ifdef STITCH efflumi = ttf_lheweight->EvalInstance(); // efflumi_UEPS_up = efflumi * hcount->GetBinContent(2) / hcount->GetBinContent(3); //efflumi_UEPS_down = efflumi * hcount->GetBinContent(2) / hcount->GetBinContent(4); #endif bool verbose = false; for (Int_t ihj = 0; ihj < 2; ihj++) { /// Evaluate TMVA regression output for (UInt_t iexpr = 0; iexpr < nvars; iexpr++) { if (ihj==0) { readerVars[iexpr] = inputFormulasReg0.at(iexpr)->EvalInstance(); } else if (ihj==1) { readerVars[iexpr] = inputFormulasReg1.at(iexpr)->EvalInstance(); } } hJet_ptReg[ihj] = (reader->EvaluateRegression(regMethod + " method"))[0]; if (verbose) std::cout << readerVars[idx_pt] << " " << readerVars[idx_rawpt] << " " << hJet_pt[ihj] << " " << hJet_ptReg[ihj] << " " << hJet_genPt[ihj] << std::endl; const TLorentzVector p4Zero = TLorentzVector(0., 0., 0., 0.); // int idx = hJCidx[0] ; // std::cout << "the regressed pt for jet 0 is " << hJet_ptReg[0] << "; the hJCidx is " << hJCidx[0] << ", hence the origianl pt is " << hJet_pt[idx] << std::endl; const TLorentzVector& hJet_p4Norm_0 = makePtEtaPhiM(hJet_pt[hJCidx[0]] , hJet_pt[hJCidx[0]], hJet_eta[hJCidx[0]], hJet_phi[hJCidx[0]], hJet_m[hJCidx[0]]); const TLorentzVector& hJet_p4Norm_1 = makePtEtaPhiM(hJet_pt[hJCidx[1]] , hJet_pt[hJCidx[1]], hJet_eta[hJCidx[1]], hJet_phi[hJCidx[1]], hJet_m[hJCidx[1]]); const TLorentzVector& hJet_p4Gen_0 = hJet_genPt[hJCidx[0]] > 0 ? makePtEtaPhiM(hJet_genPt[hJCidx[0]] , hJet_pt[hJCidx[0]], hJet_eta[hJCidx[0]], hJet_phi[hJCidx[0]], hJet_m[hJCidx[0]]) : p4Zero; const TLorentzVector& hJet_p4Gen_1 = hJet_genPt[hJCidx[1]] > 0 ? makePtEtaPhiM(hJet_genPt[hJCidx[1]] , hJet_pt[hJCidx[1]], hJet_eta[hJCidx[1]], hJet_phi[hJCidx[1]], hJet_m[hJCidx[1]]) : p4Zero; const TLorentzVector& hJet_p4Reg_0 = makePtEtaPhiM(hJet_ptReg[0] , hJet_pt[hJCidx[0]], hJet_eta[hJCidx[0]], hJet_phi[hJCidx[0]], hJet_m[hJCidx[0]]); const TLorentzVector& hJet_p4Reg_1 = makePtEtaPhiM(hJet_ptReg[1] , hJet_pt[hJCidx[1]], hJet_eta[hJCidx[1]], hJet_phi[hJCidx[1]], hJet_m[hJCidx[1]]); HptNorm = (hJet_p4Norm_0 + hJet_p4Norm_1 ).Pt(); HptGen = (hJet_p4Gen_0 + hJet_p4Gen_1 ).Pt(); HptReg = (hJet_p4Reg_0 + hJet_p4Reg_1 ).Pt(); HmassNorm = (hJet_p4Norm_0 + hJet_p4Norm_1 ).M(); HmassGen = (hJet_p4Gen_0 + hJet_p4Gen_1 ).M(); HmassReg = (hJet_p4Reg_0 + hJet_p4Reg_1 ).M(); // std::cout << "HmassReg is " << HmassReg << std::endl; } outTree->Fill(); // fill it! } // end loop over TTree entries /// Get elapsed time sw.Stop(); std::cout << "--- End of event loop: "; sw.Print(); output->cd(); outTree->Write(); output->Close(); input->Close(); delete input; delete output; for (formIt=inputFormulasReg0.begin(), formItEnd=inputFormulasReg0.end(); formIt!=formItEnd; formIt++) delete *formIt; for (formIt=inputFormulasReg1.begin(), formItEnd=inputFormulasReg1.end(); formIt!=formItEnd; formIt++) delete *formIt; for (formIt=inputFormulasFJReg0.begin(), formItEnd=inputFormulasFJReg0.end(); formIt!=formItEnd; formIt++) delete *formIt; for (formIt=inputFormulasFJReg1.begin(), formItEnd=inputFormulasFJReg1.end(); formIt!=formItEnd; formIt++) delete *formIt; for (formIt=inputFormulasFJReg2.begin(), formItEnd=inputFormulasFJReg2.end(); formIt!=formItEnd; formIt++) delete *formIt; delete ttf_lheweight; std::cout << "==> GrowTree is done!" << std::endl << std::endl; return; }
void SkimClassification(TString process="ZnnH125") { gROOT->LoadMacro("HelperFunctions.h" ); // make functions visible to TTreeFormula gROOT->SetBatch(1); //TChain * chain = new TChain("tree"); //TString fname = ""; //TString dijet = "DiJetPt_"; //TString dirMC = "dcache:/pnfs/cms/WAX/resilient/jiafu/ZnunuHbb/" + tagMC + "/"; //TString dirData = "dcache:/pnfs/cms/WAX/resilient/jiafu/ZnunuHbb/" + tagData + "/"; TString indir = "/afs/cern.ch/work/d/degrutto/public/MiniAOD/ZnnHbb_Spring15_PU20bx25/skimV12_v2/step3/"; TString outdir = "/afs/cern.ch/work/d/degrutto/public/MiniAOD/ZnnHbb_Spring15_PU20bx25/skimV12_v2/step3/skim_ZnnH_classification/"; TString prefix = "Step3_"; TString suffix = ".root"; TFile *input = TFile::Open(indir + prefix + process + suffix); if (!input) { std::cout << "ERROR: Could not open input file." << std::endl; exit(1); } TTree *tree = (TTree *) input->Get("tree"); Long64_t entries = tree->GetEntriesFast(); // Make output directory if it doesn't exist if (gSystem->AccessPathName(outdir)) gSystem->mkdir(outdir); TString outname = outdir + prefix + Form("%s.root", process.Data()); TFile* output = TFile::Open(outname, "RECREATE"); // Get selections const std::vector < std::string > & selExpressions = GetSelExpressions("ZnunuHighPt") ; // const UInt_t nsels = 3; // ZnunuHighPt, ZnunuLowPt, ZnunuLowCSV const UInt_t nsels = 1; // just one now... ZnunuHighPt, ZnunuLowPt, ZnunuLowCSV // assert(nsels == selExpressions.size()); <-- fixME // even-number events for training, odd-number events for testing TCut evenselection = "evt %2 == 0"; TCut oddselection = "evt %2 == 1"; TTreeFormula *ttf = 0; std::vector < TTreeFormula * >::const_iterator formIt, formItEnd; // Loop over selections std::vector<Long64_t> ventries; for (unsigned int i = 0; i < nsels; i++) { TString selname = "ZnunuHighPt"; if (i == 1) { selname = "ZnunuMedPt"; // selExpressions = GetSelExpressions("ZnunuMedPt") ; } else if (i == 2) { selname = "ZnunuLowPt"; // selExpressions = GetSelExpressions("ZnunuLowPt") ; } TTree *t1 = (TTree*) tree->CloneTree(0); TTree *t2 = (TTree*) tree->CloneTree(0); t1->SetName(TString::Format("%s_%s_train", tree->GetName(), selname.Data())); t2->SetName(TString::Format("%s_%s", tree->GetName(), selname.Data())); // The clones should not delete any shared i/o buffers. ResetDeleteBranches(t1); ResetDeleteBranches(t2); ttf = new TTreeFormula(Form("ttfsel%i", i), selExpressions.at(i).c_str(), tree); ttf->SetQuickLoad(1); TTreeFormula *ttf1 = new TTreeFormula(Form("ttfeven%i", i), evenselection, tree); ttf1->SetQuickLoad(1); TTreeFormula *ttf2 = new TTreeFormula(Form("ttfodd%i", i), oddselection, tree); ttf2->SetQuickLoad(1); if (!ttf || !ttf->GetNdim()) { std::cerr << "ERROR: Failed to find any TTree variable from the selection: " << selExpressions.at(i) << std::endl; return; } /// Loop over events Int_t curTree = tree->GetTreeNumber(); const Long64_t nentries = tree->GetEntries(); for (Long64_t ievt = 0; ievt < nentries; ievt++) { Long64_t entryNumber = tree->GetEntryNumber(ievt); if (entryNumber < 0) break; Long64_t localEntry = tree->LoadTree(entryNumber); if (localEntry < 0) break; if (tree->GetTreeNumber() != curTree) { curTree = tree->GetTreeNumber(); ttf ->UpdateFormulaLeaves(); // if using TChain ttf1->UpdateFormulaLeaves(); // if using TChain ttf2->UpdateFormulaLeaves(); // if using TChain } const Int_t ndata = ttf->GetNdata(); Bool_t keep = kFALSE; for(Int_t current = 0; current<ndata && !keep; current++) { keep |= (bool(ttf->EvalInstance(current)) != 0); } if (!keep) continue; bool even = (bool) ttf1->EvalInstance(); bool odd = (bool) ttf2->EvalInstance(); if (even && odd) { std::cerr << "ERROR: An event cannot be even and odd at the same time." << std::cout; return; } tree->GetEntry(entryNumber, 1); // get all branches if (even) { t1->Fill(); } else { t2->Fill(); } } // end loop over events t1->Write(); t2->Write(); ventries.push_back(t1->GetEntriesFast() + t2->GetEntriesFast()); delete ttf; delete ttf1; delete ttf2; } std::clog << process << ": skimmed from " << entries << " to " << ventries[0] << " (ZnunuHighPt), " << ventries[1] << " (ZnunuLowPt), " << ventries[2] << " (ZnunuLowCSV) " << " entries." << std::endl; output->Close(); input->Close(); delete output; delete input; return; }