Ejemplo n.º 1
0
////////////////////////////////////////////////////////////////////////////////
/// Main                                                                     ///
////////////////////////////////////////////////////////////////////////////////
void GrowTree(TString process, std::string regMethod="BDTG", Long64_t beginEntry=0, Long64_t endEntry=-1)
{
    gROOT->SetBatch(1);
    TH1::SetDefaultSumw2(1);
    gROOT->LoadMacro("HelperFunctions.h");  //< make functions visible to TTreeFormula

    if (!TString(gROOT->GetVersion()).Contains("5.34")) {
        std::cout << "INCORRECT ROOT VERSION! Please use 5.34:" << std::endl;
        std::cout << "source /uscmst1/prod/sw/cms/slc5_amd64_gcc462/lcg/root/5.34.02-cms/bin/thisroot.csh" << std::endl;
        std::cout << "Return without doing anything." << std::endl;
        return;
    }

    const TString indir   = "/afs/cern.ch/work/d/degrutto/public/MiniAOD/ZnnHbb_Phys14_PU20bx25/skimV11/";
    const TString outdir  = "/afs/cern.ch/work/d/degrutto/public/MiniAOD/ZnnHbb_Phys14_PU20bx25/skimV11/step3/";
    const TString prefix  = "skim_";
    const TString suffix  = ".root";

    TFile *input = TFile::Open(indir + prefix + process + suffix);
    if (!input) {
        std::cout << "ERROR: Could not open input file." << std::endl;
        exit(1);
    }
    /// Make output directory if it doesn't exist
    if (gSystem->AccessPathName(outdir))
        gSystem->mkdir(outdir);

    std::cout << "--- GrowTree                 : Using input file: " << input->GetName() << std::endl;
    TTree *inTree = (TTree *) input->Get("tree");
    TH1F  *hcount = (TH1F *) input->Get("Count");
    TFile *output(0);
    if (beginEntry == 0 && endEntry == -1)
        output = TFile::Open(outdir + "Step3_" + process + suffix, "RECREATE");
    else
        output = TFile::Open(outdir + "Step3_" + process + TString::Format("_%Li_%Li", beginEntry, endEntry) + suffix, "RECREATE");
    TTree *outTree = inTree->CloneTree(0); // Do no copy the data yet
    /// The clone should not delete any shared i/o buffers.
    ResetDeleteBranches(outTree);
    


    ///-- Set branch addresses -------------------------------------------------
    EventInfo EVENT;
    double hJet_pt[MAXJ], hJet_eta[MAXJ], hJet_phi[MAXJ], hJet_m[MAXJ], hJet_ptRaw[MAXJ], hJet_genPt[MAXJ];
    int hJCidx[2];

    inTree->SetBranchStatus("*", 1);

    inTree->SetBranchStatus("hJCidx",1);
    inTree->SetBranchStatus("Jet_*",1);
    inTree->SetBranchAddress("hJCidx", &hJCidx);
    inTree->SetBranchAddress("Jet_pt", &hJet_pt);

    inTree->SetBranchAddress("Jet_eta", &hJet_eta);
    inTree->SetBranchAddress("Jet_phi", &hJet_phi);
    inTree->SetBranchAddress("Jet_mass", &hJet_m);
    inTree->SetBranchAddress("Jet_rawPt", &hJet_ptRaw);

    inTree->SetBranchAddress("Jet_mcPt", &hJet_genPt);


    ///-- Make new branches ----------------------------------------------------
    int EVENT_run, EVENT_event;  // set these as TTree index?
    float lumi_ = lumi, efflumi, efflumi_old, 
        efflumi_UEPS_up, efflumi_UEPS_down;
    float hJet_ptReg[2];
    float HptNorm, HptGen, HptReg;
    float HmassNorm, HmassGen, HmassReg;

    outTree->Branch("EVENT_run", &EVENT_run, "EVENT_run/I");
    outTree->Branch("EVENT_event", &EVENT_event, "EVENT_event/I");
    outTree->Branch("lumi", &lumi_, "lumi/F");
    outTree->Branch("efflumi", &efflumi, "efflumi/F");
    outTree->Branch("efflumi_old", &efflumi_old, "efflumi_old/F");
    outTree->Branch("efflumi_UEPS_up", &efflumi_UEPS_up, "efflumi_UEPS_up/F");
    outTree->Branch("efflumi_UEPS_down", &efflumi_UEPS_down, "efflumi_UEPS_down/F");
    outTree->Branch("hJet_ptReg", &hJet_ptReg, "hJet_ptReg[2]/F");
    
    outTree->Branch("HptNorm", &HptNorm, "HptNorm/F");
    outTree->Branch("HptGen", &HptGen, "HptGen/F");
    outTree->Branch("HptReg", &HptReg, "HptReg/F");
    outTree->Branch("HmassNorm", &HmassNorm, "HmassNorm/F");
    outTree->Branch("HmassGen", &HmassGen, "HmassGen/F");
    outTree->Branch("HmassReg", &HmassReg, "HmassReg/F");

    /// Get effective lumis
    std::map < std::string, float > efflumis = GetLumis();
    efflumi = efflumis[process.Data()];
    assert(efflumi > 0);
    efflumi_old       = efflumi;
    efflumi_UEPS_up   = efflumi * hcount->GetBinContent(2) / hcount->GetBinContent(3);
    efflumi_UEPS_down = efflumi * hcount->GetBinContent(2) / hcount->GetBinContent(4);



    TTreeFormula* ttf_lheweight = new TTreeFormula("ttf_lheweight", Form("%f", efflumi), inTree);
#ifdef STITCH
    std::map < std::string, std::string > lheweights = GetLHEWeights();
    TString process_lhe = process;
    if (process_lhe.BeginsWith("WJets") && process_lhe != "WJetsHW")
        process_lhe = "WJets";
    else if (process_lhe.BeginsWith("ZJets") && process_lhe != "ZJetsHW")
        process_lhe = "ZJets";
    else 
        process_lhe = "";
    TString lheweight = lheweights[process_lhe.Data()];
    if (lheweight != "") {
        delete ttf_lheweight;
        
        // Bug fix for ZJetsPtZ100
        if (process == "ZJetsPtZ100")
            lheweight.ReplaceAll("lheV_pt", "999");
        std::cout << "BUGFIX: " << lheweight << std::endl;
        ttf_lheweight = new TTreeFormula("ttf_lheweight", lheweight, inTree);
    }
#endif
    ttf_lheweight->SetQuickLoad(1);

    // regression stuff here

    
    ///-- Setup TMVA Reader ----------------------------------------------------
    TMVA::Tools::Instance();  //< This loads the library
    TMVA::Reader * reader = new TMVA::Reader("!Color:!Silent");

    /// Get the variables
    const std::vector < std::string > & inputExpressionsReg = GetInputExpressionsReg();
    
   const UInt_t nvars = inputExpressionsReg.size();
   
    Float_t readerVars[nvars];
    int idx_rawpt = -1, idx_pt = -1, idx_et = -1, idx_mt = -1;
   
    for (UInt_t iexpr = 0; iexpr < nvars; iexpr++) {
        const TString& expr = inputExpressionsReg.at(iexpr);
        reader->AddVariable(expr, &readerVars[iexpr]);
        if      (expr.BeginsWith("breg_rawptJER := "))  idx_rawpt = iexpr;
        else if (expr.BeginsWith("breg_pt := "))        idx_pt = iexpr;
        else if (expr.BeginsWith("breg_et := "))        idx_et = iexpr;
        else if (expr.BeginsWith("breg_mt := "))        idx_mt = iexpr;
    }
    //    assert(idx_rawpt!=-1 && idx_pt!=-1 && idx_et!=-1 && idx_mt!=-1);
    assert(idx_rawpt!=-1 && idx_pt!=-1 );

    /// Setup TMVA regression inputs
    const std::vector < std::string > & inputExpressionsReg0 = GetInputExpressionsReg0();
    const std::vector < std::string > & inputExpressionsReg1 = GetInputExpressionsReg1();
    assert(inputExpressionsReg0.size() == nvars);
    assert(inputExpressionsReg1.size() == nvars);

    /// Load TMVA weights
    TString weightdir  = "weights/";
    TString weightfile = weightdir + "TMVARegression_" + regMethod + ".testweights.xml";
    reader->BookMVA(regMethod + " method", weightfile);
    
    TStopwatch sw;
    sw.Start();


    /// Create TTreeFormulas
    TTreeFormula *ttf = 0;
    std::vector < TTreeFormula * >::const_iterator formIt, formItEnd;
    std::vector < TTreeFormula * > inputFormulasReg0;
    std::vector < TTreeFormula * > inputFormulasReg1;
    std::vector < TTreeFormula * > inputFormulasFJReg0;
    std::vector < TTreeFormula * > inputFormulasFJReg1;
    std::vector < TTreeFormula * > inputFormulasFJReg2;

    
    for (UInt_t iexpr = 0; iexpr < nvars; iexpr++) {
        ttf = new TTreeFormula(Form("ttfreg%i_0", iexpr), inputExpressionsReg0.at(iexpr).c_str(), inTree);
        ttf->SetQuickLoad(1);
        inputFormulasReg0.push_back(ttf);
        ttf = new TTreeFormula(Form("ttfreg%i_1", iexpr), inputExpressionsReg1.at(iexpr).c_str(), inTree);
        ttf->SetQuickLoad(1);
        inputFormulasReg1.push_back(ttf);
    }
 


    ///-- Loop over events -----------------------------------------------------
    Int_t curTree = inTree->GetTreeNumber();
    const Long64_t nentries = inTree->GetEntries();
    if (endEntry < 0)  endEntry = nentries;

    Long64_t ievt = 0;
    for (ievt=TMath::Max(ievt, beginEntry); ievt<TMath::Min(nentries, endEntry); ievt++) {
        if (ievt % 2000 == 0)
            std::cout << "--- ... Processing event: " << ievt << std::endl;
    
        const Long64_t local_entry = inTree->LoadTree(ievt);  // faster, but only for TTreeFormula
        if (local_entry < 0)  break;
        inTree->GetEntry(ievt);  // same event as received by LoadTree()

        if (inTree->GetTreeNumber() != curTree) {
            curTree = inTree->GetTreeNumber();

            for (formIt=inputFormulasReg0.begin(), formItEnd=inputFormulasReg0.end(); formIt!=formItEnd; formIt++)
                (*formIt)->UpdateFormulaLeaves();  // if using TChain
            for (formIt=inputFormulasReg1.begin(), formItEnd=inputFormulasReg1.end(); formIt!=formItEnd; formIt++)
                (*formIt)->UpdateFormulaLeaves();  // if using TChain
            for (formIt=inputFormulasFJReg0.begin(), formItEnd=inputFormulasFJReg0.end(); formIt!=formItEnd; formIt++)
                (*formIt)->UpdateFormulaLeaves();  // if using TChain
            for (formIt=inputFormulasFJReg1.begin(), formItEnd=inputFormulasFJReg1.end(); formIt!=formItEnd; formIt++)
                (*formIt)->UpdateFormulaLeaves();  // if using TChain
            for (formIt=inputFormulasFJReg2.begin(), formItEnd=inputFormulasFJReg2.end(); formIt!=formItEnd; formIt++)
                (*formIt)->UpdateFormulaLeaves();  // if using TChain

            ttf_lheweight->UpdateFormulaLeaves();
        }


        /// These need to be called when arrays of variable size are used in TTree.
        for (formIt=inputFormulasReg0.begin(), formItEnd=inputFormulasReg0.end(); formIt!=formItEnd; formIt++)
            (*formIt)->GetNdata();
        for (formIt=inputFormulasReg1.begin(), formItEnd=inputFormulasReg1.end(); formIt!=formItEnd; formIt++)
            (*formIt)->GetNdata();
        for (formIt=inputFormulasFJReg0.begin(), formItEnd=inputFormulasFJReg0.end(); formIt!=formItEnd; formIt++)
            (*formIt)->GetNdata();
        for (formIt=inputFormulasFJReg1.begin(), formItEnd=inputFormulasFJReg1.end(); formIt!=formItEnd; formIt++)
            (*formIt)->GetNdata();
        for (formIt=inputFormulasFJReg2.begin(), formItEnd=inputFormulasFJReg2.end(); formIt!=formItEnd; formIt++)
            (*formIt)->GetNdata();

        ttf_lheweight->GetNdata();
        /// Fill branches
        EVENT_run = EVENT.run;
        EVENT_event = EVENT.event;



#ifdef STITCH        
        efflumi           = ttf_lheweight->EvalInstance();
	//        efflumi_UEPS_up   = efflumi * hcount->GetBinContent(2) / hcount->GetBinContent(3);
        //efflumi_UEPS_down = efflumi * hcount->GetBinContent(2) / hcount->GetBinContent(4);
#endif
    
        bool verbose = false;
 
	for (Int_t ihj = 0; ihj < 2; ihj++) {

   
            /// Evaluate TMVA regression output
            for (UInt_t iexpr = 0; iexpr < nvars; iexpr++) {
                if (ihj==0) {
                    readerVars[iexpr] = inputFormulasReg0.at(iexpr)->EvalInstance();

                } else if (ihj==1) {
                    readerVars[iexpr] = inputFormulasReg1.at(iexpr)->EvalInstance();
                }
            }

	    hJet_ptReg[ihj]               = (reader->EvaluateRegression(regMethod + " method"))[0];
            if (verbose)  std::cout << readerVars[idx_pt] << " " << readerVars[idx_rawpt] <<  " " << hJet_pt[ihj] << " " << hJet_ptReg[ihj] << " " << hJet_genPt[ihj] << std::endl;
        const TLorentzVector p4Zero                     = TLorentzVector(0., 0., 0., 0.);
	//	int idx =  hJCidx[0] ;
	//	std::cout << "the regressed pt for jet 0 is " << hJet_ptReg[0] << "; the hJCidx is " << hJCidx[0] << ", hence the origianl pt is " <<  hJet_pt[idx] << std::endl;

	
       
        const TLorentzVector& hJet_p4Norm_0             = makePtEtaPhiM(hJet_pt[hJCidx[0]]                , hJet_pt[hJCidx[0]], hJet_eta[hJCidx[0]], hJet_phi[hJCidx[0]], hJet_m[hJCidx[0]]);
        const TLorentzVector& hJet_p4Norm_1             = makePtEtaPhiM(hJet_pt[hJCidx[1]]                , hJet_pt[hJCidx[1]], hJet_eta[hJCidx[1]], hJet_phi[hJCidx[1]], hJet_m[hJCidx[1]]);
        const TLorentzVector& hJet_p4Gen_0              = hJet_genPt[hJCidx[0]] > 0 ? 
                                                          makePtEtaPhiM(hJet_genPt[hJCidx[0]]             , hJet_pt[hJCidx[0]], hJet_eta[hJCidx[0]], hJet_phi[hJCidx[0]], hJet_m[hJCidx[0]]) : p4Zero;
        const TLorentzVector& hJet_p4Gen_1              = hJet_genPt[hJCidx[1]] > 0 ? 
                                                          makePtEtaPhiM(hJet_genPt[hJCidx[1]]             , hJet_pt[hJCidx[1]], hJet_eta[hJCidx[1]], hJet_phi[hJCidx[1]], hJet_m[hJCidx[1]]) : p4Zero;
        const TLorentzVector& hJet_p4Reg_0              = makePtEtaPhiM(hJet_ptReg[0]             , hJet_pt[hJCidx[0]], hJet_eta[hJCidx[0]], hJet_phi[hJCidx[0]], hJet_m[hJCidx[0]]);
        const TLorentzVector& hJet_p4Reg_1              = makePtEtaPhiM(hJet_ptReg[1]             , hJet_pt[hJCidx[1]], hJet_eta[hJCidx[1]], hJet_phi[hJCidx[1]], hJet_m[hJCidx[1]]);
        HptNorm             = (hJet_p4Norm_0             + hJet_p4Norm_1            ).Pt();
        HptGen              = (hJet_p4Gen_0              + hJet_p4Gen_1             ).Pt();
        HptReg              = (hJet_p4Reg_0              + hJet_p4Reg_1             ).Pt();
        HmassNorm             = (hJet_p4Norm_0             + hJet_p4Norm_1            ).M();
        HmassGen              = (hJet_p4Gen_0              + hJet_p4Gen_1             ).M();
        HmassReg              = (hJet_p4Reg_0              + hJet_p4Reg_1             ).M();
	//        std::cout << "HmassReg is " << HmassReg << std::endl; 
	
	}
        outTree->Fill();  // fill it!
    }  // end loop over TTree entries

    /// Get elapsed time
    sw.Stop();
    std::cout << "--- End of event loop: ";
    sw.Print();

    output->cd();
    outTree->Write();
    output->Close();
    input->Close();

    delete input;
    delete output;
    for (formIt=inputFormulasReg0.begin(), formItEnd=inputFormulasReg0.end(); formIt!=formItEnd; formIt++)
        delete *formIt;
    for (formIt=inputFormulasReg1.begin(), formItEnd=inputFormulasReg1.end(); formIt!=formItEnd; formIt++)
        delete *formIt;
    for (formIt=inputFormulasFJReg0.begin(), formItEnd=inputFormulasFJReg0.end(); formIt!=formItEnd; formIt++)
        delete *formIt;
    for (formIt=inputFormulasFJReg1.begin(), formItEnd=inputFormulasFJReg1.end(); formIt!=formItEnd; formIt++)
        delete *formIt;
    for (formIt=inputFormulasFJReg2.begin(), formItEnd=inputFormulasFJReg2.end(); formIt!=formItEnd; formIt++)
        delete *formIt;

    delete ttf_lheweight;

    std::cout << "==> GrowTree is done!" << std::endl << std::endl;
    return;
}
Ejemplo n.º 2
0
void SkimClassification(TString process="ZnnH125")
{
    gROOT->LoadMacro("HelperFunctions.h" );  // make functions visible to TTreeFormula
    gROOT->SetBatch(1);

    //TChain * chain  = new TChain("tree");
    //TString fname   = "";
    //TString dijet   = "DiJetPt_";
    //TString dirMC   = "dcache:/pnfs/cms/WAX/resilient/jiafu/ZnunuHbb/" + tagMC + "/";
    //TString dirData = "dcache:/pnfs/cms/WAX/resilient/jiafu/ZnunuHbb/" + tagData + "/";
    TString indir   = "/afs/cern.ch/work/d/degrutto/public/MiniAOD/ZnnHbb_Spring15_PU20bx25/skimV12_v2/step3/";
    TString outdir  = "/afs/cern.ch/work/d/degrutto/public/MiniAOD/ZnnHbb_Spring15_PU20bx25/skimV12_v2/step3/skim_ZnnH_classification/";
    TString prefix  = "Step3_";
    TString suffix  = ".root";

    TFile *input = TFile::Open(indir + prefix + process + suffix);
    if (!input) {
        std::cout << "ERROR: Could not open input file." << std::endl;
        exit(1);
    }
    TTree *tree   = (TTree *) input->Get("tree");
    Long64_t entries = tree->GetEntriesFast();
    
    // Make output directory if it doesn't exist
    if (gSystem->AccessPathName(outdir))
        gSystem->mkdir(outdir);
    TString outname = outdir + prefix + Form("%s.root", process.Data());

    TFile* output = TFile::Open(outname, "RECREATE");

    // Get selections
    const std::vector < std::string > & selExpressions = GetSelExpressions("ZnunuHighPt") ;
    //    const UInt_t nsels = 3;  // ZnunuHighPt, ZnunuLowPt, ZnunuLowCSV
    const UInt_t nsels = 1;  // just one now...  ZnunuHighPt, ZnunuLowPt, ZnunuLowCSV

    //    assert(nsels == selExpressions.size()); <-- fixME
    // even-number events for training, odd-number events for testing
    TCut evenselection = "evt  %2 == 0";
    TCut oddselection  = "evt %2 == 1";

    TTreeFormula *ttf = 0;
    std::vector < TTreeFormula * >::const_iterator formIt, formItEnd;

    // Loop over selections
    std::vector<Long64_t> ventries;
    for (unsigned int i = 0; i < nsels; i++) {
        TString selname = "ZnunuHighPt";
        if (i == 1) {
            selname = "ZnunuMedPt";
	    //	    selExpressions = GetSelExpressions("ZnunuMedPt") ;
        } else if (i == 2) {
            selname = "ZnunuLowPt";
	    // selExpressions = GetSelExpressions("ZnunuLowPt") ;
        }

        TTree *t1 = (TTree*) tree->CloneTree(0);
        TTree *t2 = (TTree*) tree->CloneTree(0);
        
        t1->SetName(TString::Format("%s_%s_train", tree->GetName(), selname.Data()));
        t2->SetName(TString::Format("%s_%s", tree->GetName(), selname.Data()));
        
        // The clones should not delete any shared i/o buffers.
        ResetDeleteBranches(t1);
        ResetDeleteBranches(t2);
        
        ttf = new TTreeFormula(Form("ttfsel%i", i), selExpressions.at(i).c_str(), tree);
        ttf->SetQuickLoad(1);
        TTreeFormula *ttf1 = new TTreeFormula(Form("ttfeven%i", i), evenselection, tree);
        ttf1->SetQuickLoad(1);
        TTreeFormula *ttf2 = new TTreeFormula(Form("ttfodd%i", i), oddselection, tree);
        ttf2->SetQuickLoad(1);
        if (!ttf || !ttf->GetNdim()) {
            std::cerr << "ERROR: Failed to find any TTree variable from the selection: " << selExpressions.at(i) << std::endl;
            return;
        }

        
        /// Loop over events
        Int_t curTree = tree->GetTreeNumber();
        const Long64_t nentries = tree->GetEntries();
        for (Long64_t ievt = 0; ievt < nentries; ievt++) {
            Long64_t entryNumber = tree->GetEntryNumber(ievt);
            if (entryNumber < 0)  break;
            Long64_t localEntry = tree->LoadTree(entryNumber);
            if (localEntry < 0)  break;
            if (tree->GetTreeNumber() != curTree) {
                curTree = tree->GetTreeNumber();
                ttf ->UpdateFormulaLeaves();  // if using TChain
                ttf1->UpdateFormulaLeaves();  // if using TChain
                ttf2->UpdateFormulaLeaves();  // if using TChain
            }
            
            const Int_t ndata = ttf->GetNdata();
            Bool_t keep = kFALSE;
            for(Int_t current = 0; current<ndata && !keep; current++) {
                keep |= (bool(ttf->EvalInstance(current)) != 0);
            }
            if (!keep)  continue;


            bool even  = (bool) ttf1->EvalInstance();
            bool odd   = (bool) ttf2->EvalInstance();
            if (even && odd) {
                std::cerr << "ERROR: An event cannot be even and odd at the same time." << std::cout;
                return;
            }

            tree->GetEntry(entryNumber, 1);  // get all branches
            if (even) {
                t1->Fill();
            } else {
                t2->Fill();
            }
        }  // end loop over events

        t1->Write();
        t2->Write();
        
        ventries.push_back(t1->GetEntriesFast() + t2->GetEntriesFast());
        
        delete ttf;
        delete ttf1;
        delete ttf2;
    }
    
    std::clog << process << ": skimmed from " << entries << " to " << ventries[0] << " (ZnunuHighPt), " << ventries[1] << " (ZnunuLowPt), " << ventries[2] << " (ZnunuLowCSV) " << " entries." << std::endl;

    output->Close();
    input->Close();

    delete output;
    delete input;

    return;
}