Ejemplo n.º 1
0
//____________________________________
void DoHypothesisTest(RooWorkspace* wks){


  // Use a RooStats ProfileLikleihoodCalculator to do the hypothesis test.
  ModelConfig model;
  model.SetWorkspace(*wks);
  model.SetPdf("model");

  //plc.SetData("data");

  ProfileLikelihoodCalculator plc;
  plc.SetData( *(wks->data("data") ));

  // here we explicitly set the value of the parameters for the null.
  // We want no signal contribution, eg. mu = 0
  RooRealVar* mu = wks->var("mu");
//   RooArgSet* nullParams = new RooArgSet("nullParams");
//   nullParams->addClone(*mu);
  RooArgSet poi(*mu);
  RooArgSet * nullParams = (RooArgSet*) poi.snapshot();
  nullParams->setRealValue("mu",0);


  //plc.SetNullParameters(*nullParams);
  plc.SetModel(model);
  // NOTE: using snapshot will import nullparams
  // in the WS and merge with existing "mu"
  // model.SetSnapshot(*nullParams);

  //use instead setNuisanceParameters
  plc.SetNullParameters( *nullParams);



  // We get a HypoTestResult out of the calculator, and we can query it.
  HypoTestResult* htr = plc.GetHypoTest();
  cout << "-------------------------------------------------" << endl;
  cout << "The p-value for the null is " << htr->NullPValue() << endl;
  cout << "Corresponding to a signifcance of " << htr->Significance() << endl;
  cout << "-------------------------------------------------\n\n" << endl;


}
Ejemplo n.º 2
0
void significance(RooWorkspace& w ) {

  ModelConfig* mc = (ModelConfig*)w.obj("mc");
  RooDataSet* data = (RooDataSet*)w.data("data");
  //data->Print();

  // define the S+B snapshot (this is used for computing the expected significance)
  ModelConfig* sbModel = mc->Clone();
  sbModel->SetName("S+B Model");
  RooRealVar* poi = (RooRealVar*) sbModel->GetParametersOfInterest()->first();
  poi->setVal(50);
  sbModel->SetSnapshot(*poi);

  ModelConfig * bModel = (ModelConfig*) sbModel->Clone();
  bModel->SetName("B model");
  poi->setVal(0);
  bModel->SetSnapshot(*poi);

  vector<double> masses;
  vector<double> p0values;
  vector<double> p0valuesExpected;
  vector<double> sigvalues;

  double massMin = 200;
  double massMax = 2500;
  int nbins = 100;

  // loop on the mass values 
  for ( double mass=massMin; mass<=massMax; mass += (massMax-massMin)/nbins ) {

    w.var("mass")->setVal( mass );

    // create the AsymptoticCalculator from data,alt model, null model
    AsymptoticCalculator * ac = new AsymptoticCalculator(*data, *sbModel, *bModel);
    ac->SetOneSidedDiscovery(true);  // for one-side discovery test                                      
    AsymptoticCalculator::SetPrintLevel(-1);

    // run the calculator
    HypoTestResult* asymCalcResult = ac->GetHypoTest();
    asymCalcResult->Print();
     
    double pvalue = asymCalcResult->NullPValue();
    double sigvalue = asymCalcResult->Significance();
    double expectedP0 = AsymptoticCalculator::GetExpectedPValues(asymCalcResult->NullPValue(),asymCalcResult->AlternatePValue(), 0, false);

    masses.push_back(mass);
    p0values.push_back(pvalue);
    p0valuesExpected.push_back(expectedP0);
    sigvalues.push_back(sigvalue);
    std::cout << "** Mass = " << mass << " p0-value = " << expectedP0 << " p-value = " << pvalue << " significance = " << sigvalue << std::endl;

  }

  TGraph* graph1  = new TGraph(masses.size(),&masses[0],&p0values[0]);
  TGraph* graph2  = new TGraph(masses.size(),&masses[0],&p0valuesExpected[0]);
  TGraph* graph3  = new TGraph(masses.size(),&masses[0],&sigvalues[0]);

  TCanvas* c2 = new TCanvas("c2","Significance", 900, 700);
  c2->Divide(1,2);
  c2->cd(1);
  graph1->SetMarkerStyle(10);
  //graph1->Draw("APC");
  graph1->Draw("AC");
  graph2->SetLineStyle(2);
  graph2->Draw("C");
  graph1->GetXaxis()->SetTitle("Mass [GeV]");
  graph1->GetYaxis()->SetTitle("p0 value");
  graph1->SetTitle("P-value vs Mass");
  graph1->SetMinimum(graph2->GetMinimum());
  graph1->SetLineColor(kBlue);
  graph2->SetLineColor(kRed);
  gPad->SetLogy(true);

  c2->cd(2);
  graph3->SetMarkerStyle(10);
  graph3->Draw("AC");
  graph3->SetLineStyle(1);
  graph3->SetLineColor(kRed);
  graph3->GetXaxis()->SetTitle("Mass [GeV]");
  graph3->GetYaxis()->SetTitle("Significance");
  graph3->SetTitle("Significance vs Mass");
  gPad->SetLogy(false);

  c2->SaveAs("significance.pdf");
  c2->SaveAs("significance.png");
}
Ejemplo n.º 3
0
void StandardHypoTestDemo(const char* infile = "",
                          const char* workspaceName = "combined",
                          const char* modelSBName = "ModelConfig",
                          const char* modelBName = "",
                          const char* dataName = "obsData",
                          int calcType = 0, // 0 freq 1 hybrid, 2 asymptotic
                          int testStatType = 3,   // 0 LEP, 1 TeV, 2 LHC, 3 LHC - one sided
                          bool newHypoTest = true,
                          int ntoys = 5000,
                          const char* hypoTestGraphFile = "hypoTestGraph.root",
                          bool useNC = false,
                          const char * nuisPriorName = 0)
{

/*

  Other Parameter to pass in tutorial
  apart from standard for filename, ws, modelconfig and data

  type = 0 Freq calculator
  type = 1 Hybrid calculator
  type = 2 Asymptotic calculator

  testStatType = 0 LEP
  = 1 Tevatron
  = 2 Profile Likelihood
  = 3 Profile Likelihood one sided (i.e. = 0 if mu < mu_hat)

  ntoys:         number of toys to use

  useNumberCounting:  set to true when using number counting events

  nuisPriorName:   name of prior for the nnuisance. This is often expressed as constraint term in the global model
  It is needed only when using the HybridCalculator (type=1)
  If not given by default the prior pdf from ModelConfig is used.

  extra options are available as global paramwters of the macro. They major ones are:

  generateBinned       generate binned data sets for toys (default is false) - be careful not to activate with
  a too large (>=3) number of observables
  nToyRatio            ratio of S+B/B toys (default is 2)
  printLevel

*/

   // disable - can cause some problems
   //ToyMCSampler::SetAlwaysUseMultiGen(true);

   SimpleLikelihoodRatioTestStat::SetAlwaysReuseNLL(true);
   ProfileLikelihoodTestStat::SetAlwaysReuseNLL(true);
   RatioOfProfiledLikelihoodsTestStat::SetAlwaysReuseNLL(true);

   //RooRandom::randomGenerator()->SetSeed(0);

   // to change minimizers
   // ROOT::Math::MinimizerOptions::SetDefaultStrategy(0);
   // ROOT::Math::MinimizerOptions::SetDefaultMinimizer("Minuit2");
   // ROOT::Math::MinimizerOptions::SetDefaultTolerance(1);

  /////////////////////////////////////////////////////////////
  // First part is just to access a user-defined file
  // or create the standard example file if it doesn't exist
  ////////////////////////////////////////////////////////////
   const char* filename = "";
   if (!strcmp(infile,"")) {
      filename = "results/example_combined_GaussExample_model.root";
      bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code
      // if file does not exists generate with histfactory
      if (!fileExist) {
#ifdef _WIN32
         cout << "HistFactory file cannot be generated on Windows - exit" << endl;
         return;
#endif
         // Normally this would be run on the command line
         cout <<"will run standard hist2workspace example"<<endl;
         gROOT->ProcessLine(".! prepareHistFactory .");
         gROOT->ProcessLine(".! hist2workspace config/example.xml");
         cout <<"\n\n---------------------"<<endl;
         cout <<"Done creating example input"<<endl;
         cout <<"---------------------\n\n"<<endl;
      }

   }
   else
      filename = infile;

   // Try to open the file
   TFile *file = TFile::Open(filename);

   // if input file was specified byt not found, quit
   if(!file ){
      cout <<"StandardRooStatsDemoMacro: Input file " << filename << " is not found" << endl;
      return;
   }


  /////////////////////////////////////////////////////////////
  // Tutorial starts here
  ////////////////////////////////////////////////////////////

  // get the workspace out of the file
  RooWorkspace* w = (RooWorkspace*) file->Get(workspaceName);
  if(!w){
    cout <<"workspace not found" << endl;
    return;
  }
  w->Print();

  // get the modelConfig out of the file
  ModelConfig* sbModel = (ModelConfig*) w->obj(modelSBName);


  // get the modelConfig out of the file
  RooAbsData* data = w->data(dataName);

  // make sure ingredients are found
  if(!data || !sbModel){
    w->Print();
    cout << "data or ModelConfig was not found" <<endl;
    return;
  }
  // make b model
  ModelConfig* bModel = (ModelConfig*) w->obj(modelBName);


   // case of no systematics
   // remove nuisance parameters from model
   if (noSystematics) {
      const RooArgSet * nuisPar = sbModel->GetNuisanceParameters();
      if (nuisPar && nuisPar->getSize() > 0) {
         std::cout << "StandardHypoTestInvDemo" << "  -  Switch off all systematics by setting them constant to their initial values" << std::endl;
         RooStats::SetAllConstant(*nuisPar);
      }
      if (bModel) {
         const RooArgSet * bnuisPar = bModel->GetNuisanceParameters();
         if (bnuisPar)
            RooStats::SetAllConstant(*bnuisPar);
      }
   }


  if (!bModel ) {
      Info("StandardHypoTestInvDemo","The background model %s does not exist",modelBName);
      Info("StandardHypoTestInvDemo","Copy it from ModelConfig %s and set POI to zero",modelSBName);
      bModel = (ModelConfig*) sbModel->Clone();
      bModel->SetName(TString(modelSBName)+TString("B_only"));
      RooRealVar * var = dynamic_cast<RooRealVar*>(bModel->GetParametersOfInterest()->first());
      if (!var) return;
      double oldval = var->getVal();
      var->setVal(0);
      //bModel->SetSnapshot( RooArgSet(*var, *w->var("lumi"))  );
      bModel->SetSnapshot( RooArgSet(*var)  );
      var->setVal(oldval);
  }

   if (!sbModel->GetSnapshot() || poiValue > 0) {
      Info("StandardHypoTestDemo","Model %s has no snapshot  - make one using model poi",modelSBName);
      RooRealVar * var = dynamic_cast<RooRealVar*>(sbModel->GetParametersOfInterest()->first());
      if (!var) return;
      double oldval = var->getVal();
      if (poiValue > 0)  var->setVal(poiValue);
      //sbModel->SetSnapshot( RooArgSet(*var, *w->var("lumi") ) );
      sbModel->SetSnapshot( RooArgSet(*var) );
      if (poiValue > 0) var->setVal(oldval);
      //sbModel->SetSnapshot( *sbModel->GetParametersOfInterest() );
   }





   // part 1, hypothesis testing
   SimpleLikelihoodRatioTestStat * slrts = new SimpleLikelihoodRatioTestStat(*bModel->GetPdf(), *sbModel->GetPdf());
   // null parameters must includes snapshot of poi plus the nuisance values
   RooArgSet nullParams(*bModel->GetSnapshot()); //Obtains parameters of the null Hypothesis
   if (bModel->GetNuisanceParameters()) nullParams.add(*bModel->GetNuisanceParameters()); //Add nuisance parameters to the null hypothesis

   slrts->SetNullParameters(nullParams);
   RooArgSet altParams(*sbModel->GetSnapshot()); //Obtains parameters of the alternate Hypothesis
   if (sbModel->GetNuisanceParameters()) altParams.add(*sbModel->GetNuisanceParameters());//Add nuisance parameters to the alternate    hypothesis
   slrts->SetAltParameters(altParams);


   ProfileLikelihoodTestStat * profll = new ProfileLikelihoodTestStat(*bModel->GetPdf());


   RatioOfProfiledLikelihoodsTestStat *
      ropl = new RatioOfProfiledLikelihoodsTestStat(*bModel->GetPdf(), *sbModel->GetPdf(), sbModel->GetSnapshot());
   ropl->SetSubtractMLE(false);

   if (testStatType == 3) profll->SetOneSidedDiscovery(1);
   profll->SetPrintLevel(printLevel);

   // profll.SetReuseNLL(mOptimize);
   // slrts.SetReuseNLL(mOptimize);
   // ropl.SetReuseNLL(mOptimize);

   AsymptoticCalculator::SetPrintLevel(printLevel);

   HypoTestCalculatorGeneric *  hypoCalc = 0;
   // note here Null is B and Alt is S+B
   if (calcType == 0) hypoCalc = new  FrequentistCalculator(*data, *sbModel, *bModel);
   else if (calcType == 1) hypoCalc= new  HybridCalculator(*data, *sbModel, *bModel);
   else if (calcType == 2) hypoCalc= new  AsymptoticCalculator(*data, *sbModel, *bModel);

   if (calcType == 0)
       ((FrequentistCalculator*)hypoCalc)->SetToys(ntoys, ntoys/nToysRatio);
   if (calcType == 1)
       ((HybridCalculator*)hypoCalc)->SetToys(ntoys, ntoys/nToysRatio);
   if (calcType == 2 ) {
      if (testStatType == 3) ((AsymptoticCalculator*) hypoCalc)->SetOneSidedDiscovery(true);
      if (testStatType != 2 && testStatType != 3)
         Warning("StandardHypoTestDemo","Only the PL test statistic can be used with AsymptoticCalculator - use by default a two-sided PL");


   }


   // check for nuisance prior pdf in case of nuisance parameters
   if (calcType == 1 && (bModel->GetNuisanceParameters() || sbModel->GetNuisanceParameters() )) {
         RooAbsPdf * nuisPdf = 0;
         if (nuisPriorName) nuisPdf = w->pdf(nuisPriorName);
         // use prior defined first in bModel (then in SbModel)
         if (!nuisPdf)  {
            Info("StandardHypoTestDemo","No nuisance pdf given for the HybridCalculator - try to deduce  pdf from the   model");
            if (bModel->GetPdf() && bModel->GetObservables() )
               nuisPdf = RooStats::MakeNuisancePdf(*bModel,"nuisancePdf_bmodel");
            else
               nuisPdf = RooStats::MakeNuisancePdf(*sbModel,"nuisancePdf_sbmodel");
         }
         if (!nuisPdf ) {
            if (bModel->GetPriorPdf())  {
               nuisPdf = bModel->GetPriorPdf();
               Info("StandardHypoTestDemo","No nuisance pdf given - try to use %s that is defined as a prior pdf in the B model",nuisPdf->GetName());
            }
            else {
               Error("StandardHypoTestDemo","Cannnot run Hybrid calculator because no prior on the nuisance parameter is specified or can be derived");
               return;
            }
         }
         assert(nuisPdf);
         Info("StandardHypoTestDemo","Using as nuisance Pdf ... " );
         nuisPdf->Print();

         const RooArgSet * nuisParams = (bModel->GetNuisanceParameters() ) ? bModel->GetNuisanceParameters() : sbModel->GetNuisanceParameters();
         RooArgSet * np = nuisPdf->getObservables(*nuisParams);
         if (np->getSize() == 0) {
            Warning("StandardHypoTestDemo","Prior nuisance does not depend on nuisance parameters. They will be smeared in their full range");
         }
         delete np;

         ((HybridCalculator*)hypoCalc)->ForcePriorNuisanceAlt(*nuisPdf);
         ((HybridCalculator*)hypoCalc)->ForcePriorNuisanceNull(*nuisPdf);
   }

   // hypoCalc->ForcePriorNuisanceAlt(*sbModel->GetPriorPdf());
   // hypoCalc->ForcePriorNuisanceNull(*bModel->GetPriorPdf());

   ToyMCSampler * sampler = (ToyMCSampler *)hypoCalc->GetTestStatSampler();

   if (sampler && (calcType == 0 || calcType == 1) ) {

      // look if pdf is number counting or extended
      if (sbModel->GetPdf()->canBeExtended() ) {
         if (useNC)   Warning("StandardHypoTestDemo","Pdf is extended: but number counting flag is set: ignore it ");
      }
      else {
         // for not extended pdf
         if (!useNC)  {
            int nEvents = data->numEntries();
            Info("StandardHypoTestDemo","Pdf is not extended: number of events to generate taken  from observed data set is %d",nEvents);
            sampler->SetNEventsPerToy(nEvents);
         }
         else {
            Info("StandardHypoTestDemo","using a number counting pdf");
            sampler->SetNEventsPerToy(1);
         }
      }

      if (data->isWeighted() && !generateBinned) {
         Info("StandardHypoTestDemo","Data set is weighted, nentries = %d and sum of weights = %8.1f but toy generation is unbinned - it would be faster to set generateBinned to true\n",data->numEntries(), data->sumEntries());
      }
      if (generateBinned)  sampler->SetGenerateBinned(generateBinned);


      // set the test statistic
      if (testStatType == 0) sampler->SetTestStatistic(slrts);
      if (testStatType == 1) sampler->SetTestStatistic(ropl);
      if (testStatType == 2 || testStatType == 3) sampler->SetTestStatistic(profll);

   }

   HypoTestResult *  htr = hypoCalc->GetHypoTest();
   htr->SetPValueIsRightTail(true);
   htr->SetBackgroundAsAlt(false);
   htr->Print(); // how to get meaningfull CLs at this point?

   delete sampler;
   delete slrts;
   delete ropl;
   delete profll;

   if (calcType != 2) {
      HypoTestPlot * plot = new HypoTestPlot(*htr,100);
      plot->SetLogYaxis(true);
      plot->Draw();
      plot->SamplingDistPlot::DumpToFile(hypoTestGraphFile,"RECREATE");
   }
   else {
      std::cout << "Asymptotic results " << std::endl;

   }

   // look at expected significances
   // found median of S+B distribution
   if (calcType != 2) {

      SamplingDistribution * altDist = htr->GetAltDistribution();
      HypoTestResult htExp("Expected Result");
      htExp.Append(htr);
      // find quantiles in alt (S+B) distribution
      double p[5];
      double q[5];
      for (int i = 0; i < 5; ++i) {
         double sig = -2  + i;
         p[i] = ROOT::Math::normal_cdf(sig,1);
      }
      std::vector<double> values = altDist->GetSamplingDistribution();
      TMath::Quantiles( values.size(), 5, &values[0], q, p, false);

      for (int i = 0; i < 5; ++i) {
         htExp.SetTestStatisticData( q[i] );
         double sig = -2  + i;
         std::cout << " Expected p -value and significance at " << sig << " sigma = "
                   << htExp.NullPValue() << " significance " << htExp.Significance() << " sigma " << std::endl;

      }
   }
   else {
      // case of asymptotic calculator
      for (int i = 0; i < 5; ++i) {
         double sig = -2  + i;
         // sigma is inverted here
         double pval = AsymptoticCalculator::GetExpectedPValues( htr->NullPValue(), htr->AlternatePValue(), -sig, false);
         std::cout << " Expected p -value and significance at " << sig << " sigma = "
                   << pval << " significance " << ROOT::Math::normal_quantile_c(pval,1) << " sigma " << std::endl;

      }
   }
    
    ////////////////////////////////////////////////////////////////////////////////////////////////
    //      FROM HERE ON IT HAS BEEN MODIFIED TO SAVE THE RESULTS IN TREES IN A .ROOT FILE
    
    
    //Declare the variable in which the hypothesis test results will be stored
    Double_t p_value, significance_t, cl_b, cl_sb, cl_s ;

    
    if(newHypoTest){
        
        TNtuple *resultsHypoTest = new TNtuple("resultsHypoTest", "resultsHypoTest", "p_value:significance_t:cl_b:cl_sb:cl_s");
        
        //Store the current results
        resultsHypoTest->Fill(htr->NullPValue(),htr->Significance(),htr->CLb(),htr->CLsplusb(),htr->CLs()) ;
        
        // Save the NTuple  to a .root file
        TFile* f_hypoTestResults = new TFile("resultsHypoTestDisc.root","RECREATE") ;
        resultsHypoTest->Write() ;
        f_hypoTestResults->Close() ;
    }
    else{
        // Open the .root that contains the NTuple and add the newly calculated results
        TFile* f_hypoTestResults = new TFile("resultsHypoTestDisc.root","UPDATE") ;

        //Get the NTuple from the file
        TNtuple *resultsHypoTest = (TNtuple*)f_hypoTestResults->Get("resultsHypoTest");
        
        resultsHypoTest->Fill(htr->NullPValue(),htr->Significance(),htr->CLb(),htr->CLsplusb(),htr->CLs()) ;
        
        //IF YOU WANT A DIFFERENT BRANCH FOR EACH TEST
        resultsHypoTest->Write();
        f_hypoTestResults->Close();
        //*/
        
        /*// IF YOU ONLY WNAT ONE BRANCH WITH ALL VALUES INSIDE IT. keep the latest ntuple header only
        resultsHypoTest->Write("",TObject::kOverwrite);
        f_hypoTestResults->Close();
        //*/
    }
    
}