void TopicModel::addDocuments(std::vector<std::string>& names, std::vector<std::vector<std::string>>& contents)
{
    try
    {
        size_t docs =names.size();
        for (size_t ind = 0; ind < docs; ++ind)
        {
            std::vector<int> features(contents[ind].size());
            dictionary.addFeatures(contents[ind],features);
            texts.push_back(new Text(names[ind], features));
        }

        numTypes = dictionary.getFeaturesNum();
        typeTotals = dictionary.getFeatureTotals();
        typeTopicCounts.resize(numTypes);
        maxTypeCount = 0;

        for (size_t type = 0; type < numTypes; ++type)
        {
            if (typeTotals[type] > maxTypeCount) { maxTypeCount = typeTotals[type]; }
            typeTopicCounts[type].resize(std::min(numTopics, typeTotals[type]));
        }

        boost::random::mt19937 rng;
        rng.seed(randomSeed);
        boost::random::uniform_int_distribution<> uint_dist(0, numTopics-1);
        for (size_t ind = 0; ind < docs; ++ind)
        {
            std::vector<int> topics = texts[ind]->getFeatureTopic();
            for (size_t position = 0; position < topics.size(); ++position)
            {
                int topic = uint_dist(rng);
                topics[position] = topic;
            }
        }

        buildInitialTypeTopicCounts();
        initializeHistograms();
    }
    catch(...)
    {
        std::cerr << "Can't load documents" << std::endl;

    }
}
예제 #2
0
void doCreateMuonFakeRateMap(const string filename, const string Label = "ZZ") 
{  
  gBenchmark->Start("HZZTemplate");
  string label = Label;
  if (Label != "") label = "_" + Label;

  //--------------------------------------------------------------------------------------------------------------
  // Settings 
  //==============================================================================================================

  //*****************************************************************************************
  //Make some histograms
  //*****************************************************************************************
  TH1F *histDenominatorPt = new TH1F ("histDenominatorPt",";Electron p_{T} [GeV/c^{2}]; Number of Events", 50, 0 , 100);
  TH1F *histNumeratorPt = new TH1F ("histNumeratorPt",";Electron p_{T} [GeV/c^{2}]; Number of Events", 50, 0 , 100);
  TH1F *histDenominatorEta = new TH1F ("histDenominatorEta",";Electron Eta; Number of Events", 50, -2.5 , 2.5);
  TH1F *histNumeratorEta = new TH1F ("histNumeratorEta",";Electron Eta; Number of Events", 50, -2.5 , 2.5);
  TH1F *histDenominatorPhi = new TH1F ("histDenominatorPhi",";Electron Phi; Number of Events", 50, 0 , 3.2);
  TH1F *histNumeratorPhi = new TH1F ("histNumeratorPhi",";Electron Phi; Number of Events", 50, 0 , 3.2);
  TH1F *histDenominatorRho = new TH1F ("histDenominatorRho",";Electron Rho; Number of Events", 50, 0 , 100);
  TH1F *histNumeratorRho = new TH1F ("histNumeratorRho",";Electron Rho; Number of Events", 50, 0 , 100);
  TH1F *histDenominatorNpv = new TH1F ("histDenominatorNpv",";Electron Npv; Number of Events", 50, 0 , 100);
  TH1F *histNumeratorNpv = new TH1F ("histNumeratorNpv",";Electron Npv; Number of Events", 50, 0 , 100);

  TH2F *histDenominatorPtEta = new TH2F ("histDenominatorPtEta",";Photon p_{T} [GeV/c] ; Photon #eta; Number of Events", 50, 0 , 200, 50, 0, 3.0);
  TH2F *histNumeratorPtEta = new TH2F ("histNumeratorPtEta",";Photon p_{T} [GeV/c] ; Photon #eta; Number of Events", 50, 0 , 200, 50, 0, 3.0);

  TH1F *histDenominatorPt_lq = new TH1F ("histDenominatorPt_lq",";Electron p_{T} [GeV/c^{2}]; Number of Events", 25, 0 , 100);
  TH1F *histNumeratorPt_lq = new TH1F ("histNumeratorPt_lq",";Electron p_{T} [GeV/c^{2}]; Number of Events", 25, 0 , 100);
  TH1F *histDenominatorPt_b = new TH1F ("histDenominatorPt_b",";Electron p_{T} [GeV/c^{2}]; Number of Events", 25, 0 , 100);
  TH1F *histNumeratorPt_b = new TH1F ("histNumeratorPt_b",";Electron p_{T} [GeV/c^{2}]; Number of Events", 25, 0 , 100);
  TH1F *histDenominatorPt_g = new TH1F ("histDenominatorPt_g",";Electron p_{T} [GeV/c^{2}]; Number of Events", 25, 0 , 100);
  TH1F *histNumeratorPt_g = new TH1F ("histNumeratorPt_g",";Electron p_{T} [GeV/c^{2}]; Number of Events", 25, 0 , 100);
  TH2F *histDenominatorPtEta_lq = new TH2F ("histDenominatorPtEta_lq",";Photon p_{T} [GeV/c] ; Photon #eta; Number of Events", 50, 0 , 200, 50, 0, 3.0);
  TH2F *histNumeratorPtEta_lq = new TH2F ("histNumeratorPtEta_lq",";Photon p_{T} [GeV/c] ; Photon #eta; Number of Events", 50, 0 , 200, 50, 0, 3.0);
  TH2F *histDenominatorPtEta_b = new TH2F ("histDenominatorPtEta_b",";Photon p_{T} [GeV/c] ; Photon #eta; Number of Events", 50, 0 , 200, 50, 0, 3.0);
  TH2F *histNumeratorPtEta_b = new TH2F ("histNumeratorPtEta_b",";Photon p_{T} [GeV/c] ; Photon #eta; Number of Events", 50, 0 , 200, 50, 0, 3.0);
  TH2F *histDenominatorPtEta_g = new TH2F ("histDenominatorPtEta_g",";Photon p_{T} [GeV/c] ; Photon #eta; Number of Events", 50, 0 , 200, 50, 0, 3.0);
  TH2F *histNumeratorPtEta_g = new TH2F ("histNumeratorPtEta_g",";Photon p_{T} [GeV/c] ; Photon #eta; Number of Events", 50, 0 , 200, 50, 0, 3.0);


  //********************************************************
  // Create Arrays to store the map
  //********************************************************
   const UInt_t NPtBins_Response = 4; 
   const UInt_t NEtaBins_Response = 3;
   double ptBins_Response[NPtBins_Response+1] = { 5, 7, 10, 15, 25 };
   double etaBins_Response[NEtaBins_Response+1] = { 0.0, 1.2, 2.2, 2.4 };


   const UInt_t NPtBins = 7; 
   const UInt_t NEtaBins = 3;
   double ptBins[NPtBins+1] = { 5, 7, 10, 15, 20, 25, 30, 35 };
   double etaBins[NEtaBins+1] = { 0.0, 1.2, 2.2, 2.4 };

   vector<vector<double> > NDenominator_Muons_PtEta;
   vector<vector<double> > NNumerator_Muons_PtEta;
   vector<vector<double> > Efficiency_Muons_PtEta;
   vector<vector<TH1F*> > PtResolution_PtEta_Muons;


   initialize2DArray(NDenominator_Muons_PtEta, NPtBins, NEtaBins);
   initialize2DArray(NNumerator_Muons_PtEta, NPtBins, NEtaBins);
   initialize2DArray(Efficiency_Muons_PtEta, NPtBins, NEtaBins);
   initializeHistograms(PtResolution_PtEta_Muons, "LeptonPtResolution_Muons", NPtBins_Response, NEtaBins_Response, 100, -1.0, 0.25);  
   
   //--------------------------------------------------------------------------------------------------------------
   // Read efficiency map ntuple
   //==============================================================================================================  
   cmsana::MuonTree muTree;
   muTree.LoadTree(filename.c_str());
   muTree.InitTree(cmsana::ElectronTree::kEleTreeLight);

   cout << "Total : " << muTree.tree_->GetEntries() << "\n";
   for(UInt_t i=0; i<muTree.tree_->GetEntries(); i++) {       	
     muTree.tree_->GetEntry(i);
     if (i % 1000000 == 0) cout << "Mu " << i << endl;

    Int_t tmpPtBin = FindBin( muTree.fMuGenPt , ptBins, NPtBins);
    Int_t tmpEtaBin = FindBin( fabs(muTree.fMuGenEta) , etaBins, NEtaBins);
    Int_t tmpPtBin_Response = FindBin( muTree.fMuGenPt , ptBins_Response, NPtBins_Response);
    Int_t tmpEtaBin_Response = FindBin( fabs(muTree.fMuGenEta) , etaBins_Response, NEtaBins_Response);


    //selection cuts
    if (!(muTree.fMuGenPt > 3 && fabs(muTree.fMuGenEta) < 2.5)) continue;

    //**** PT - ETA ****
    NDenominator_Muons_PtEta[tmpPtBin][tmpEtaBin] += 1.0;
    histDenominatorPtEta->Fill(muTree.fMuGenPt,fabs(muTree.fMuGenEta));
    histDenominatorPt->Fill(muTree.fMuGenPt);
    histDenominatorEta->Fill(muTree.fMuGenEta);
    histDenominatorRho->Fill(muTree.fRho);
    histDenominatorNpv->Fill(muTree.fNVertices);

    if (abs(muTree.fPdgId) == 5) {
      histDenominatorPtEta_b->Fill(muTree.fMuGenPt,fabs(muTree.fMuGenEta));
      histDenominatorPt_b->Fill(muTree.fMuGenPt);
    } else if (abs(muTree.fPdgId) == 21) {
      histDenominatorPtEta_g->Fill(muTree.fMuGenPt,fabs(muTree.fMuGenEta));
      histDenominatorPt_g->Fill(muTree.fMuGenPt);
    } else {
      histDenominatorPtEta_lq->Fill(muTree.fMuGenPt,fabs(muTree.fMuGenEta));
      histDenominatorPt_lq->Fill(muTree.fMuGenPt);
    }

    if(muTree.fMuPt > 0) {
      NNumerator_Muons_PtEta[tmpPtBin][tmpEtaBin] += 1.0;      
      histNumeratorPtEta->Fill(muTree.fMuGenPt,fabs(muTree.fMuGenEta));
      histNumeratorPt->Fill(muTree.fMuGenPt);
      histNumeratorEta->Fill(muTree.fMuGenEta);
      histNumeratorRho->Fill(muTree.fRho);
      histNumeratorNpv->Fill(muTree.fNVertices);

      if (abs(muTree.fPdgId) == 5) {
        histNumeratorPtEta_b->Fill(muTree.fMuGenPt,fabs(muTree.fMuGenEta));
        histNumeratorPt_b->Fill(muTree.fMuGenPt);
      } else if (abs(muTree.fPdgId) == 21) {
        histNumeratorPtEta_g->Fill(muTree.fMuGenPt,fabs(muTree.fMuGenEta));        
        histNumeratorPt_g->Fill(muTree.fMuGenPt);
      } else {
        histNumeratorPtEta_lq->Fill(muTree.fMuGenPt,fabs(muTree.fMuGenEta));
        histNumeratorPt_lq->Fill(muTree.fMuGenPt);
      }

      //fill response function
      PtResolution_PtEta_Muons[tmpPtBin_Response][tmpEtaBin_Response]->Fill( (muTree.fMuPt - muTree.fMuGenPt)/muTree.fMuGenPt , 1.0);
      
    }
    
  }

  //--------------------------------------------------------------------------------------------------------------
  // Make Efficiency Plots
  //==============================================================================================================

  TGraphAsymmErrors *efficiency_pt = cmsana::createEfficiencyGraph(histNumeratorPt, histDenominatorPt, "Efficiency_Pt" , vector<double>() ,  -99, -99, 0, 1, false);
  TGraphAsymmErrors *efficiency_eta = cmsana::createEfficiencyGraph(histNumeratorEta, histDenominatorEta, "Efficiency_Eta" , vector<double>() ,  -99, -99, 0, 1, false);
  TGraphAsymmErrors *efficiency_rho = cmsana::createEfficiencyGraph(histNumeratorRho, histDenominatorRho, "Efficiency_Rho" , vector<double>() ,  -99, -99, 0, 1, false);
  TGraphAsymmErrors *efficiency_npv = cmsana::createEfficiencyGraph(histNumeratorNpv, histDenominatorNpv, "Efficiency_Npv" , vector<double>() ,  -99, -99, 0, 1, false);
  TH2F *efficiency_pteta = cmsana::createEfficiencyHist2D(histNumeratorPtEta, histDenominatorPtEta, "Efficiency_PtEta" , vector<double>() ,vector<double>());  


  TGraphAsymmErrors *efficiency_lq_pt = cmsana::createEfficiencyGraph(histNumeratorPt_lq, histDenominatorPt_lq, "Efficiency_lq_Pt" , vector<double>() ,  -99, -99, 0, 1, false);
  TGraphAsymmErrors *efficiency_b_pt = cmsana::createEfficiencyGraph(histNumeratorPt_b, histDenominatorPt_b, "Efficiency_b_Pt" , vector<double>() ,  -99, -99, 0, 1, false);
  TGraphAsymmErrors *efficiency_g_pt = cmsana::createEfficiencyGraph(histNumeratorPt_g, histDenominatorPt_g, "Efficiency_g_Pt" , vector<double>() ,  -99, -99, 0, 1, false);
  TH2F *efficiency_lq_pteta = cmsana::createEfficiencyHist2D(histNumeratorPtEta_lq, histDenominatorPtEta_lq, "Efficiency_lq_PtEta" , vector<double>() ,vector<double>());  
  TH2F *efficiency_b_pteta = cmsana::createEfficiencyHist2D(histNumeratorPtEta_b, histDenominatorPtEta_b, "Efficiency_b_PtEta" , vector<double>() ,vector<double>());  
  TH2F *efficiency_g_pteta = cmsana::createEfficiencyHist2D(histNumeratorPtEta_g, histDenominatorPtEta_g, "Efficiency_g_PtEta" , vector<double>() ,vector<double>());  

  //--------------------------------------------------------------------------------------------------------------
  // Draw
  //==============================================================================================================
  TCanvas *cv =0;

  cv = new TCanvas("cv","cv",800,600);
  efficiency_pt->Draw("AP");
  //efficiency_pt->SetTitle("");
  efficiency_pt->GetYaxis()->SetRangeUser(0.0,0.1);
  cv->SaveAs("Efficiency_Pt.gif");

  cv = new TCanvas("cv","cv",800,600);
  efficiency_eta->Draw("AP");
  //efficiency_eta->SetTitle("");
  efficiency_eta->GetYaxis()->SetRangeUser(0.0,0.1);
  cv->SaveAs("Efficiency_Eta.gif");

  cv = new TCanvas("cv","cv",800,600);
  efficiency_rho->Draw("AP");
  //efficiency_rho->SetTitle("");
  efficiency_rho->GetYaxis()->SetRangeUser(0.0,0.1);
  cv->SaveAs("Efficiency_Rho.gif");

  cv = new TCanvas("cv","cv",800,600);
  efficiency_npv->Draw("AP");
  //efficiency_npv->SetTitle("");
  efficiency_npv->GetYaxis()->SetRangeUser(0.0,0.1);
  cv->SaveAs("Efficiency_Npv.gif");


  //--------------------------------------------------------------------------------------------------------------
  // Output
  //==============================================================================================================
  TFile *file = TFile::Open(("FakeRate"+label+".root").c_str(), "UPDATE");
  file->cd();
  file->WriteTObject(efficiency_pt, "Efficiency_Pt", "WriteDelete");
  file->WriteTObject(efficiency_eta, "Efficiency_Eta", "WriteDelete");
  file->WriteTObject(efficiency_rho, "Efficiency_Rho", "WriteDelete");
  file->WriteTObject(efficiency_npv, "Efficiency_NPV", "WriteDelete");
  file->WriteTObject(efficiency_pteta, "Efficiency_PtEta", "WriteDelete");

  file->WriteTObject(efficiency_lq_pt, "Efficiency_lq_Pt", "WriteDelete");
  file->WriteTObject(efficiency_b_pt, "Efficiency_b_Pt", "WriteDelete");
  file->WriteTObject(efficiency_g_pt, "Efficiency_g_Pt", "WriteDelete");
  file->WriteTObject(efficiency_b_pteta, "Efficiency_b_PtEta", "WriteDelete");
  file->WriteTObject(efficiency_lq_pteta, "Efficiency_lq_PtEta", "WriteDelete");
  file->WriteTObject(efficiency_g_pteta, "Efficiency_g_PtEta", "WriteDelete");


  for (uint i=0; i < NPtBins_Response+2; ++i) {
    for (uint j=0; j < NEtaBins_Response+2; ++j) {
      file->WriteTObject(PtResolution_PtEta_Muons[i][j], PtResolution_PtEta_Muons[i][j]->GetName(), "WriteDelete");
    }
  }

   computeEfficiencyPtEta(NNumerator_Muons_PtEta, NDenominator_Muons_PtEta, Efficiency_Muons_PtEta);


  //********************************************************
  // Produce output lookup table
  //******************************************************** 
  ofstream outf_e("FakeMuonEfficiencyMap.h");

  outf_e << "UInt_t FindMuonEfficiencyBin( double value, double bins[], UInt_t nbins) {" << endl;
  outf_e << "  UInt_t nbinboundaries = nbins+1;" << endl;
  outf_e << "  UInt_t bin = 0;" << endl;
  outf_e << "  for (uint i=0; i < nbinboundaries; ++i) {" << endl;
  outf_e << "    if (i < nbinboundaries-1) {" << endl;
  outf_e << "      if (value >= bins[i] && value < bins[i+1]) {" << endl;
  outf_e << "        bin = i+1;" << endl;
  outf_e << "        break;" << endl;
  outf_e << "      }" << endl;
  outf_e << "    } else if (i == nbinboundaries-1) {" << endl;
  outf_e << "      if (value >= bins[i]) {" << endl;
  outf_e << "        bin = nbinboundaries;" << endl;
  outf_e << "        break;" << endl;
  outf_e << "      }" << endl;
  outf_e << "    }    " << endl;
  outf_e << "  }" << endl;
  outf_e << "  return bin;" << endl;
  outf_e << "}" << endl;

  outf_e << endl;
  outf_e << endl;

  outf_e << "Double_t GetMuonEfficiencyPtEta(Double_t Pt, Double_t Eta) {" << endl;

  outf_e << endl;
  outf_e << "  Double_t ptBins[" << NPtBins+1 << "] = {";
  for (uint i=0; i < NPtBins+1; ++i) {
    outf_e << ptBins[i];
    if (i < NPtBins) {
      outf_e << ",";
    }
  }
  outf_e << "};\n";

  outf_e << "  Double_t etaBins[" << NEtaBins+1 << "] = {";
  for (uint i=0; i < NEtaBins+1; ++i) {
    outf_e << etaBins[i];
    if (i < NEtaBins) {
      outf_e << ",";
    }
  }
  outf_e << "};\n";


  outf_e << endl;
  outf_e << endl;

  outf_e << "  Double_t Efficiency[" << NPtBins+2 << "][" << NEtaBins+2 << "] = {";
  outf_e << endl;

  for (uint i=0; i < NPtBins+2; ++i) {
    outf_e << "    {";
    for (uint j=0; j < NEtaBins+2; ++j) {
      outf_e << Efficiency_Muons_PtEta[i][j];
      if (j< NEtaBins+1) {
        outf_e << ",";
      }
    }
    if (i< NPtBins+1) {
      outf_e << "    },";
    } else {
      outf_e << "}";
    }
    outf_e << endl;
  }
  
  outf_e << "  };" << endl;

  outf_e << endl;
  outf_e << endl;

  outf_e << "  Int_t tmpPtBin = FindMuonEfficiencyBin( Pt , ptBins, " << NPtBins << ");" << endl;
  outf_e << "  Int_t tmpEtaBin = FindMuonEfficiencyBin( Eta , etaBins, " << NEtaBins << ");" << endl;
  outf_e << "  return Efficiency[tmpPtBin][tmpEtaBin];" << endl;
  outf_e << "}" << endl;


  outf_e.close();



  file->Close();
  delete file;       



  gBenchmark->Show("WWTemplate");       
}