double StandardFrequentistDiscovery(
   const char* infile = "",
   const char* workspaceName = "channel1",
   const char* modelConfigNameSB = "ModelConfig",
   const char* dataName = "obsData",
   int toys = 1000,
   double poiValueForBackground = 0.0,
   double poiValueForSignal = 1.0
) {

   // The workspace contains the model for s+b. The b model is "autogenerated"
   // by copying s+b and setting the one parameter of interest to zero.
   // To keep the script simple, multiple parameters of interest or different
   // functional forms of the b model are not supported.

   // for now, assume there is only one parameter of interest, and these are
   // its values:

   /////////////////////////////////////////////////////////////
   // First part is just to access a user-defined file
   // or create the standard example file if it doesn't exist
   ////////////////////////////////////////////////////////////
   const char* filename = "";
   if (!strcmp(infile,"")) {
      filename = "results/example_channel1_GammaExample_model.root";
      bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code
      // if file does not exists generate with histfactory
      if (!fileExist) {
#ifdef _WIN32
         cout << "HistFactory file cannot be generated on Windows - exit" << endl;
         return -1;
#endif
         // Normally this would be run on the command line
         cout <<"will run standard hist2workspace example"<<endl;
         gROOT->ProcessLine(".! prepareHistFactory .");
         gROOT->ProcessLine(".! hist2workspace config/example.xml");
         cout <<"\n\n---------------------"<<endl;
         cout <<"Done creating example input"<<endl;
         cout <<"---------------------\n\n"<<endl;
      }
      
   }
   else
      filename = infile;
   
   // Try to open the file
   TFile *file = TFile::Open(filename);
   
   // if input file was specified byt not found, quit
   if(!file ){
      cout <<"StandardRooStatsDemoMacro: Input file " << filename << " is not found" << endl;
      return -1;
   } 


   /////////////////////////////////////////////////////////////
   // Tutorial starts here
   ////////////////////////////////////////////////////////////

   TStopwatch *mn_t = new TStopwatch;
   mn_t->Start();

   // get the workspace out of the file
   RooWorkspace* w = (RooWorkspace*) file->Get(workspaceName);
   if (!w) {
      cout << "workspace not found" << endl;
      return -1.0;
   }

   // get the modelConfig out of the file
   ModelConfig* mc = (ModelConfig*) w->obj(modelConfigNameSB);

   // get the data out of the file
   RooAbsData* data = w->data(dataName);

   // make sure ingredients are found
   if (!data || !mc) {
      w->Print();
      cout << "data or ModelConfig was not found" << endl;
      return -1.0;
   }


   RooRealVar* firstPOI = (RooRealVar*) mc->GetParametersOfInterest()->first();
   firstPOI->setVal(poiValueForSignal);
   mc->SetSnapshot(*mc->GetParametersOfInterest());
   // create null model
   ModelConfig *mcNull = mc->Clone("ModelConfigNull");
   firstPOI->setVal(poiValueForBackground);
   mcNull->SetSnapshot(*(RooArgSet*)mcNull->GetParametersOfInterest()->snapshot());



   // ----------------------------------------------------
   // Configure a ProfileLikelihoodTestStat and a SimpleLikelihoodRatioTestStat
   // to use simultaneously with ToyMCSampler
   ProfileLikelihoodTestStat* plts =  new ProfileLikelihoodTestStat(*mc->GetPdf());
   plts->SetOneSidedDiscovery(true);
   plts->SetVarName( "q_{0}/2" );
   
   // ----------------------------------------------------
   // configure the ToyMCImportanceSampler with two test statistics
   ToyMCSampler toymcs(*plts, 50);



   // Since this tool needs to throw toy MC the PDF needs to be
   // extended or the tool needs to know how many entries in a dataset
   // per pseudo experiment.
   // In the 'number counting form' where the entries in the dataset
   // are counts, and not values of discriminating variables, the
   // datasets typically only have one entry and the PDF is not
   // extended.
   if (!mc->GetPdf()->canBeExtended()) {
      if (data->numEntries() == 1) {
         toymcs.SetNEventsPerToy(1);
      } else cout << "Not sure what to do about this model" << endl;
   }

   // We can use PROOF to speed things along in parallel
   // ProofConfig pc(*w, 2, "user@yourfavoriteproofcluster", false);
   ProofConfig pc(*w, 2, "", false);
   //toymcs.SetProofConfig(&pc);    // enable proof


   // instantiate the calculator
   FrequentistCalculator freqCalc(*data, *mc, *mcNull, &toymcs);
   freqCalc.SetToys( toys,toys ); // null toys, alt toys

   // Run the calculator and print result
   HypoTestResult* freqCalcResult = freqCalc.GetHypoTest();
   freqCalcResult->GetNullDistribution()->SetTitle( "b only" );
   freqCalcResult->GetAltDistribution()->SetTitle( "s+b" );
   freqCalcResult->Print();
   double pvalue = freqCalcResult->NullPValue();

   // stop timing
   mn_t->Stop();
   cout << "total CPU time: " << mn_t->CpuTime() << endl;
   cout << "total real time: " << mn_t->RealTime() << endl;

   // plot
   TCanvas* c1 = new TCanvas();
   HypoTestPlot *plot = new HypoTestPlot(*freqCalcResult, 100, -0.49, 9.51 );
   plot->SetLogYaxis(true);
   
   // add chi2 to plot
   int nPOI = 1;
   TF1* f = new TF1("f", TString::Format("1*ROOT::Math::chisquared_pdf(2*x,%d,0)",nPOI), 0,20);
   f->SetLineColor( kBlack );
   f->SetLineStyle( 7 );
   plot->AddTF1( f, TString::Format("#chi^{2}(2x,%d)",nPOI) );
   
   plot->Draw();
   c1->SaveAs("standard_discovery_output.pdf");
   

   return pvalue;
}
Esempio n. 2
0
void StandardHypoTestDemo(const char* infile = "",
                          const char* workspaceName = "combined",
                          const char* modelSBName = "ModelConfig",
                          const char* modelBName = "",
                          const char* dataName = "obsData", 
                          int calcType = 0, // 0 freq 1 hybrid, 2 asymptotic
                          int testStatType = 3,   // 0 LEP, 1 TeV, 2 LHC, 3 LHC - one sided
                          int ntoys = 5000, 
                          bool useNC = false, 
                          const char * nuisPriorName = 0)
{

/*

  Other Parameter to pass in tutorial
  apart from standard for filename, ws, modelconfig and data

  type = 0 Freq calculator 
  type = 1 Hybrid calculator
  type = 2 Asymptotic calculator  

  testStatType = 0 LEP
  = 1 Tevatron 
  = 2 Profile Likelihood
  = 3 Profile Likelihood one sided (i.e. = 0 if mu < mu_hat)

  ntoys:         number of toys to use 

  useNumberCounting:  set to true when using number counting events 

  nuisPriorName:   name of prior for the nnuisance. This is often expressed as constraint term in the global model
  It is needed only when using the HybridCalculator (type=1)
  If not given by default the prior pdf from ModelConfig is used. 

  extra options are available as global paramwters of the macro. They major ones are: 
 
  generateBinned       generate binned data sets for toys (default is false) - be careful not to activate with 
  a too large (>=3) number of observables 
  nToyRatio            ratio of S+B/B toys (default is 2)
  printLevel
  
*/

   // disable - can cause some problems
   //ToyMCSampler::SetAlwaysUseMultiGen(true);

   SimpleLikelihoodRatioTestStat::SetAlwaysReuseNLL(true);
   ProfileLikelihoodTestStat::SetAlwaysReuseNLL(true);
   RatioOfProfiledLikelihoodsTestStat::SetAlwaysReuseNLL(true);

   //RooRandom::randomGenerator()->SetSeed(0);

   // to change minimizers 
   // ROOT::Math::MinimizerOptions::SetDefaultStrategy(0);
   // ROOT::Math::MinimizerOptions::SetDefaultMinimizer("Minuit2");
   // ROOT::Math::MinimizerOptions::SetDefaultTolerance(1);

  /////////////////////////////////////////////////////////////
  // First part is just to access a user-defined file 
  // or create the standard example file if it doesn't exist
  ////////////////////////////////////////////////////////////
  const char* filename = "";
  if (!strcmp(infile,""))
    filename = "results/example_combined_GaussExample_model.root";
  else
    filename = infile;
  // Check if example input file exists
  TFile *file = TFile::Open(filename);

  // if input file was specified byt not found, quit
  if(!file && strcmp(infile,"")){
    cout <<"file not found" << endl;
    return;
  } 

  // if default file not found, try to create it
  if(!file ){
    // Normally this would be run on the command line
    cout <<"will run standard hist2workspace example"<<endl;
    gROOT->ProcessLine(".! prepareHistFactory .");
    gROOT->ProcessLine(".! hist2workspace config/example.xml");
    cout <<"\n\n---------------------"<<endl;
    cout <<"Done creating example input"<<endl;
    cout <<"---------------------\n\n"<<endl;
  }

  // now try to access the file again
  file = TFile::Open(filename);
  if(!file){
    // if it is still not there, then we can't continue
    cout << "Not able to run hist2workspace to create example input" <<endl;
    return;
  }

  
  /////////////////////////////////////////////////////////////
  // Tutorial starts here
  ////////////////////////////////////////////////////////////

  // get the workspace out of the file
  RooWorkspace* w = (RooWorkspace*) file->Get(workspaceName);
  if(!w){
    cout <<"workspace not found" << endl;
    return;
  }
  w->Print();

  // get the modelConfig out of the file
  ModelConfig* sbModel = (ModelConfig*) w->obj(modelSBName);


  // get the modelConfig out of the file
  RooAbsData* data = w->data(dataName);

  // make sure ingredients are found
  if(!data || !sbModel){
    w->Print();
    cout << "data or ModelConfig was not found" <<endl;
    return;
  }
  // make b model
  ModelConfig* bModel = (ModelConfig*) w->obj(modelBName);


   // case of no systematics
   // remove nuisance parameters from model
   if (noSystematics) { 
      const RooArgSet * nuisPar = sbModel->GetNuisanceParameters();
      if (nuisPar && nuisPar->getSize() > 0) { 
         std::cout << "StandardHypoTestInvDemo" << "  -  Switch off all systematics by setting them constant to their initial values" << std::endl;
         RooStats::SetAllConstant(*nuisPar);
      }
      if (bModel) { 
         const RooArgSet * bnuisPar = bModel->GetNuisanceParameters();
         if (bnuisPar) 
            RooStats::SetAllConstant(*bnuisPar);
      }
   }


  if (!bModel ) {
      Info("StandardHypoTestInvDemo","The background model %s does not exist",modelBName);
      Info("StandardHypoTestInvDemo","Copy it from ModelConfig %s and set POI to zero",modelSBName);
      bModel = (ModelConfig*) sbModel->Clone();
      bModel->SetName(TString(modelSBName)+TString("B_only"));      
      RooRealVar * var = dynamic_cast<RooRealVar*>(bModel->GetParametersOfInterest()->first());
      if (!var) return;
      double oldval = var->getVal();
      var->setVal(0);
      //bModel->SetSnapshot( RooArgSet(*var, *w->var("lumi"))  );
      bModel->SetSnapshot( RooArgSet(*var)  );
      var->setVal(oldval);
  }
  
   if (!sbModel->GetSnapshot() || poiValue > 0) { 
      Info("StandardHypoTestDemo","Model %s has no snapshot  - make one using model poi",modelSBName);
      RooRealVar * var = dynamic_cast<RooRealVar*>(sbModel->GetParametersOfInterest()->first());
      if (!var) return;
      double oldval = var->getVal();
      if (poiValue > 0)  var->setVal(poiValue);
      //sbModel->SetSnapshot( RooArgSet(*var, *w->var("lumi") ) );
      sbModel->SetSnapshot( RooArgSet(*var) );
      if (poiValue > 0) var->setVal(oldval);
      //sbModel->SetSnapshot( *sbModel->GetParametersOfInterest() );
   }

   



   // part 1, hypothesis testing 
   SimpleLikelihoodRatioTestStat * slrts = new SimpleLikelihoodRatioTestStat(*bModel->GetPdf(), *sbModel->GetPdf());
   // null parameters must includes snapshot of poi plus the nuisance values 
   RooArgSet nullParams(*bModel->GetSnapshot());
   if (bModel->GetNuisanceParameters()) nullParams.add(*bModel->GetNuisanceParameters());
   
   slrts->SetNullParameters(nullParams);
   RooArgSet altParams(*sbModel->GetSnapshot());
   if (sbModel->GetNuisanceParameters()) altParams.add(*sbModel->GetNuisanceParameters());
   slrts->SetAltParameters(altParams);


   ProfileLikelihoodTestStat * profll = new ProfileLikelihoodTestStat(*bModel->GetPdf());


   RatioOfProfiledLikelihoodsTestStat * 
      ropl = new RatioOfProfiledLikelihoodsTestStat(*bModel->GetPdf(), *sbModel->GetPdf(), sbModel->GetSnapshot());
   ropl->SetSubtractMLE(false);

   if (testStatType == 3) profll->SetOneSidedDiscovery(1);
   profll->SetPrintLevel(printLevel);

   // profll.SetReuseNLL(mOptimize);
   // slrts.SetReuseNLL(mOptimize);
   // ropl.SetReuseNLL(mOptimize);

   AsymptoticCalculator::SetPrintLevel(printLevel);

   HypoTestCalculatorGeneric *  hypoCalc = 0;
   // note here Null is B and Alt is S+B
   if (calcType == 0) hypoCalc = new  FrequentistCalculator(*data, *sbModel, *bModel);
   else if (calcType == 1) hypoCalc= new  HybridCalculator(*data, *sbModel, *bModel);
   else if (calcType == 2) hypoCalc= new  AsymptoticCalculator(*data, *sbModel, *bModel);

   if (calcType == 0) 
       ((FrequentistCalculator*)hypoCalc)->SetToys(ntoys, ntoys/nToysRatio);
   if (calcType == 1) 
       ((HybridCalculator*)hypoCalc)->SetToys(ntoys, ntoys/nToysRatio);
   if (calcType == 2 ) { 
      if (testStatType == 3) ((AsymptoticCalculator*) hypoCalc)->SetOneSidedDiscovery(true);  
      if (testStatType != 2 && testStatType != 3)  
         Warning("StandardHypoTestDemo","Only the PL test statistic can be used with AsymptoticCalculator - use by default a two-sided PL");
      

   }


   // check for nuisance prior pdf in case of nuisance parameters 
   if (calcType == 1 && (bModel->GetNuisanceParameters() || sbModel->GetNuisanceParameters() )) {
         RooAbsPdf * nuisPdf = 0; 
         if (nuisPriorName) nuisPdf = w->pdf(nuisPriorName);
         // use prior defined first in bModel (then in SbModel)
         if (!nuisPdf)  { 
            Info("StandardHypoTestDemo","No nuisance pdf given for the HybridCalculator - try to deduce  pdf from the   model");
            if (bModel->GetPdf() && bModel->GetObservables() ) 
               nuisPdf = RooStats::MakeNuisancePdf(*bModel,"nuisancePdf_bmodel");
            else 
               nuisPdf = RooStats::MakeNuisancePdf(*sbModel,"nuisancePdf_sbmodel");
         }   
         if (!nuisPdf ) {
            if (bModel->GetPriorPdf())  { 
               nuisPdf = bModel->GetPriorPdf();
               Info("StandardHypoTestDemo","No nuisance pdf given - try to use %s that is defined as a prior pdf in the B model",nuisPdf->GetName());            
            }
            else { 
               Error("StandardHypoTestDemo","Cannnot run Hybrid calculator because no prior on the nuisance parameter is specified or can be derived");
               return;
            }
         }
         assert(nuisPdf);
         Info("StandardHypoTestDemo","Using as nuisance Pdf ... " );
         nuisPdf->Print();
      
         const RooArgSet * nuisParams = (bModel->GetNuisanceParameters() ) ? bModel->GetNuisanceParameters() : sbModel->GetNuisanceParameters();
         RooArgSet * np = nuisPdf->getObservables(*nuisParams);
         if (np->getSize() == 0) { 
            Warning("StandardHypoTestDemo","Prior nuisance does not depend on nuisance parameters. They will be smeared in their full range");
         }
         delete np;
      
         ((HybridCalculator*)hypoCalc)->ForcePriorNuisanceAlt(*nuisPdf);
         ((HybridCalculator*)hypoCalc)->ForcePriorNuisanceNull(*nuisPdf);
   }

   // hypoCalc->ForcePriorNuisanceAlt(*sbModel->GetPriorPdf());
   // hypoCalc->ForcePriorNuisanceNull(*bModel->GetPriorPdf());

   ToyMCSampler * sampler = (ToyMCSampler *)hypoCalc->GetTestStatSampler();

   if (sampler && (calcType == 0 || calcType == 1) ) { 

      // look if pdf is number counting or extended
      if (sbModel->GetPdf()->canBeExtended() ) { 
         if (useNC)   Warning("StandardHypoTestDemo","Pdf is extended: but number counting flag is set: ignore it ");
      }
      else {
         // for not extended pdf
         if (!useNC)  { 
            int nEvents = data->numEntries();
            Info("StandardHypoTestDemo","Pdf is not extended: number of events to generate taken  from observed data set is %d",nEvents);
            sampler->SetNEventsPerToy(nEvents);
         }
         else {
            Info("StandardHypoTestDemo","using a number counting pdf");
            sampler->SetNEventsPerToy(1);
         }
      }
      
      if (data->isWeighted() && !generateBinned) { 
         Info("StandardHypoTestDemo","Data set is weighted, nentries = %d and sum of weights = %8.1f but toy generation is unbinned - it would be faster to set generateBinned to true\n",data->numEntries(), data->sumEntries());
      }
      if (generateBinned)  sampler->SetGenerateBinned(generateBinned);


      // set the test statistic
      if (testStatType == 0) sampler->SetTestStatistic(slrts); 
      if (testStatType == 1) sampler->SetTestStatistic(ropl); 
      if (testStatType == 2 || testStatType == 3) sampler->SetTestStatistic(profll); 

   }
   
   HypoTestResult *  htr = hypoCalc->GetHypoTest();
   htr->SetPValueIsRightTail(true);
   htr->SetBackgroundAsAlt(false);
   htr->Print(); // how to get meaningfull CLs at this point?

   delete sampler;
   delete slrts; 
   delete ropl; 
   delete profll;

   if (calcType != 2) {
      HypoTestPlot * plot = new HypoTestPlot(*htr,100);
      plot->SetLogYaxis(true);
      plot->Draw();
   }
   else { 
      std::cout << "Asymptotic results " << std::endl;
      
   }

   // look at expected significances 
   // found median of S+B distribution
   if (calcType != 2) { 

      SamplingDistribution * altDist = htr->GetAltDistribution();   
      HypoTestResult htExp("Expected Result");
      htExp.Append(htr);
      // find quantiles in alt (S+B) distribution 
      double p[5];
      double q[5];
      for (int i = 0; i < 5; ++i) { 
         double sig = -2  + i;
         p[i] = ROOT::Math::normal_cdf(sig,1);
      }
      std::vector<double> values = altDist->GetSamplingDistribution();
      TMath::Quantiles( values.size(), 5, &values[0], q, p, false);  

      for (int i = 0; i < 5; ++i) { 
         htExp.SetTestStatisticData( q[i] );
         double sig = -2  + i;      
         std::cout << " Expected p -value and significance at " << sig << " sigma = " 
                   << htExp.NullPValue() << " significance " << htExp.Significance() << " sigma " << std::endl; 
         
      }
   }
   else { 
      // case of asymptotic calculator 
      for (int i = 0; i < 5; ++i) { 
         double sig = -2  + i;      
         // sigma is inverted here 
         double pval = AsymptoticCalculator::GetExpectedPValues( htr->NullPValue(), htr->AlternatePValue(), -sig, false);
         std::cout << " Expected p -value and significance at " << sig << " sigma = " 
                   << pval << " significance " << ROOT::Math::normal_quantile_c(pval,1) << " sigma " << std::endl; 
         
      }
   }

}