Ejemplo n.º 1
0
//____________________________________
void DoHypothesisTest(RooWorkspace* wks){


  // Use a RooStats ProfileLikleihoodCalculator to do the hypothesis test.
  ModelConfig model;
  model.SetWorkspace(*wks);
  model.SetPdf("model");

  //plc.SetData("data");

  ProfileLikelihoodCalculator plc;
  plc.SetData( *(wks->data("data") ));

  // here we explicitly set the value of the parameters for the null.
  // We want no signal contribution, eg. mu = 0
  RooRealVar* mu = wks->var("mu");
//   RooArgSet* nullParams = new RooArgSet("nullParams");
//   nullParams->addClone(*mu);
  RooArgSet poi(*mu);
  RooArgSet * nullParams = (RooArgSet*) poi.snapshot();
  nullParams->setRealValue("mu",0);


  //plc.SetNullParameters(*nullParams);
  plc.SetModel(model);
  // NOTE: using snapshot will import nullparams
  // in the WS and merge with existing "mu"
  // model.SetSnapshot(*nullParams);

  //use instead setNuisanceParameters
  plc.SetNullParameters( *nullParams);



  // We get a HypoTestResult out of the calculator, and we can query it.
  HypoTestResult* htr = plc.GetHypoTest();
  cout << "-------------------------------------------------" << endl;
  cout << "The p-value for the null is " << htr->NullPValue() << endl;
  cout << "Corresponding to a signifcance of " << htr->Significance() << endl;
  cout << "-------------------------------------------------\n\n" << endl;


}
Ejemplo n.º 2
0
void RA2bHypoTestInvDemo(const char * fileName =0,
			 const char * wsName = "combined",
			 const char * modelSBName = "ModelConfig",
			 const char * modelBName = "",
			 const char * dataName = "obsData",                 
			 int calculatorType = 0,
			 int testStatType = 3, 
			 bool useCls = true ,  
			 int npoints = 5,   
			 double poimin = 0,  
			 double poimax = 5, 
			 int ntoys=1000,
			 int mgl = -1,
			 int mlsp = -1,
			 const char * outFileName = "test")    
{
/*

   Other Parameter to pass in tutorial
   apart from standard for filename, ws, modelconfig and data

    type = 0 Freq calculator 
    type = 1 Hybrid 

    testStatType = 0 LEP
                 = 1 Tevatron 
                 = 2 Profile Likelihood
                 = 3 Profile Likelihood one sided (i.e. = 0 if mu < mu_hat)

    useCLs          scan for CLs (otherwise for CLs+b)    

    npoints:        number of points to scan , for autoscan set npoints = -1 

    poimin,poimax:  min/max value to scan in case of fixed scans 
                    (if min >= max, try to find automatically)                           

    ntoys:         number of toys to use 

    extra options are available as global paramters of the macro. They are: 

    plotHypoTestResult   plot result of tests at each point (TS distributions) 
    useProof = true;
    writeResult = true;
    nworkers = 4;


   */

   if (fileName==0) { 
      fileName = "results/example_combined_GaussExample_model.root";
      std::cout << "Use standard file generated with HistFactory :" << fileName << std::endl;
   }
   TFile * file = new TFile(fileName); 

   RooWorkspace * w = dynamic_cast<RooWorkspace*>( file->Get(wsName) );
   HypoTestInverterResult * r = 0; 
   std::cout << w << "\t" << fileName << std::endl;
   if (w != NULL) {
      r = RunInverter(w, modelSBName, modelBName, dataName, calculatorType, testStatType, npoints, poimin, poimax,  ntoys, useCls );    
      if (!r) { 
         std::cerr << "Error running the HypoTestInverter - Exit " << std::endl;
         return;          
      }
   }
   else 
   { 
      // case workspace is not present look for the inverter result
      std::cout << "Reading an HypoTestInverterResult with name " << wsName << " from file " << fileName << std::endl;
      r = dynamic_cast<HypoTestInverterResult*>( file->Get(wsName) ); //
      if (!r) { 
         std::cerr << "File " << fileName << " does not contain a workspace or an HypoTestInverterResult - Exit " 
                   << std::endl;
         file->ls();
         return; 
      }
   }		
      		


   printf("\n\n") ;
   HypoTestResult* htr = r->GetResult(0) ;
   printf("  Data value for test stat : %7.3f\n", htr->GetTestStatisticData() ) ;
   printf("  CLsplusb : %9.4f\n", r->CLsplusb(0) ) ;
   printf("  CLb      : %9.4f\n", r->CLb(0) ) ;
   printf("  CLs      : %9.4f\n", r->CLs(0) ) ;
   printf("\n\n") ;
   cout << flush ;

   double upperLimit = r->UpperLimit();
   double ulError = r->UpperLimitEstimatedError();


   std::cout << "The computed upper limit is: " << upperLimit << " +/- " << ulError << std::endl;
 
   const int nEntries = r->ArraySize();


   const char *  typeName = (calculatorType == 0) ? "Frequentist" : "Hybrid";
   const char * resultName = (w) ? w->GetName() : r->GetName();
   TString plotTitle = TString::Format("%s CL Scan for workspace %s",typeName,resultName);
   HypoTestInverterPlot *plot = new HypoTestInverterPlot("HTI_Result_Plot",plotTitle,r);
   TCanvas* c1 = new TCanvas() ;
   plot->Draw("CLb 2CL");  // plot all and Clb
   c1->Update() ;
   c1->SaveAs("cls-canv1.png") ;
   c1->SaveAs("cls-canv1.pdf") ;

   if (plotHypoTestResult) { 
      TCanvas * c2 = new TCanvas();
      c2->Divide( 2, TMath::Ceil(nEntries/2));
      for (int i=0; i<nEntries; i++) {
         c2->cd(i+1);
         SamplingDistPlot * pl = plot->MakeTestStatPlot(i);
         pl->SetLogYaxis(true);
         pl->Draw();
      }
      c2->Update() ;
      c2->SaveAs("cls-canv2.png") ;
      c2->SaveAs("cls-canv2.pdf") ;
   }


   std::cout << " expected limit (median) " <<  r->GetExpectedUpperLimit(0) << std::endl;
   std::cout << " expected limit (-1 sig) " << r->GetExpectedUpperLimit(-1) << std::endl;
   std::cout << " expected limit (+1 sig) " << r->GetExpectedUpperLimit(1) << std::endl;


   // save 2d histograms bin to file

   TH2F *result = new TH2F("result","result",22,100,1200,23,50,1200); 
   TH2F *exp_res = new TH2F("exp_res","exp_res",22,100,1200,23,50,1200); 
   TH2F *exp_res_minus = new TH2F("exp_res_minus","exp_res_minus",22,100,1200,23,50,1200); 
   TH2F *exp_res_plus = new TH2F("exp_res_plus","exp_res_plus",22,100,1200,23,50,1200); 

   result->Fill(mgl,mlsp,upperLimit);
   exp_res->Fill(mgl,mlsp,r->GetExpectedUpperLimit(0));
   exp_res_minus->Fill(mgl,mlsp,r->GetExpectedUpperLimit(-1));
   exp_res_plus->Fill(mgl,mlsp,r->GetExpectedUpperLimit(1));


   TFile *f = new TFile(outFileName,"RECREATE");
   f->cd();

   result->Write();
   exp_res->Write();
   exp_res_minus->Write();
   exp_res_plus->Write();

   f->Close();


   if (w != NULL && writeResult) {

      // write to a file the results
      const char *  calcType = (calculatorType == 0) ? "Freq" : "Hybr";
      const char *  limitType = (useCls) ? "CLs" : "Cls+b";
      const char * scanType = (npoints < 0) ? "auto" : "grid";
      TString resultFileName = TString::Format("%s_%s_%s_ts%d_",calcType,limitType,scanType,testStatType);      
      resultFileName += fileName;
      
      TFile * fileOut = new TFile(resultFileName,"RECREATE");
      r->Write();
      fileOut->Close();                                                                     
   }   

}
double StandardFrequentistDiscovery(
   const char* infile = "",
   const char* workspaceName = "channel1",
   const char* modelConfigNameSB = "ModelConfig",
   const char* dataName = "obsData",
   int toys = 1000,
   double poiValueForBackground = 0.0,
   double poiValueForSignal = 1.0
) {

   // The workspace contains the model for s+b. The b model is "autogenerated"
   // by copying s+b and setting the one parameter of interest to zero.
   // To keep the script simple, multiple parameters of interest or different
   // functional forms of the b model are not supported.

   // for now, assume there is only one parameter of interest, and these are
   // its values:

   /////////////////////////////////////////////////////////////
   // First part is just to access a user-defined file
   // or create the standard example file if it doesn't exist
   ////////////////////////////////////////////////////////////
   const char* filename = "";
   if (!strcmp(infile,"")) {
      filename = "results/example_channel1_GammaExample_model.root";
      bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code
      // if file does not exists generate with histfactory
      if (!fileExist) {
#ifdef _WIN32
         cout << "HistFactory file cannot be generated on Windows - exit" << endl;
         return -1;
#endif
         // Normally this would be run on the command line
         cout <<"will run standard hist2workspace example"<<endl;
         gROOT->ProcessLine(".! prepareHistFactory .");
         gROOT->ProcessLine(".! hist2workspace config/example.xml");
         cout <<"\n\n---------------------"<<endl;
         cout <<"Done creating example input"<<endl;
         cout <<"---------------------\n\n"<<endl;
      }
      
   }
   else
      filename = infile;
   
   // Try to open the file
   TFile *file = TFile::Open(filename);
   
   // if input file was specified byt not found, quit
   if(!file ){
      cout <<"StandardRooStatsDemoMacro: Input file " << filename << " is not found" << endl;
      return -1;
   } 


   /////////////////////////////////////////////////////////////
   // Tutorial starts here
   ////////////////////////////////////////////////////////////

   TStopwatch *mn_t = new TStopwatch;
   mn_t->Start();

   // get the workspace out of the file
   RooWorkspace* w = (RooWorkspace*) file->Get(workspaceName);
   if (!w) {
      cout << "workspace not found" << endl;
      return -1.0;
   }

   // get the modelConfig out of the file
   ModelConfig* mc = (ModelConfig*) w->obj(modelConfigNameSB);

   // get the data out of the file
   RooAbsData* data = w->data(dataName);

   // make sure ingredients are found
   if (!data || !mc) {
      w->Print();
      cout << "data or ModelConfig was not found" << endl;
      return -1.0;
   }


   RooRealVar* firstPOI = (RooRealVar*) mc->GetParametersOfInterest()->first();
   firstPOI->setVal(poiValueForSignal);
   mc->SetSnapshot(*mc->GetParametersOfInterest());
   // create null model
   ModelConfig *mcNull = mc->Clone("ModelConfigNull");
   firstPOI->setVal(poiValueForBackground);
   mcNull->SetSnapshot(*(RooArgSet*)mcNull->GetParametersOfInterest()->snapshot());



   // ----------------------------------------------------
   // Configure a ProfileLikelihoodTestStat and a SimpleLikelihoodRatioTestStat
   // to use simultaneously with ToyMCSampler
   ProfileLikelihoodTestStat* plts =  new ProfileLikelihoodTestStat(*mc->GetPdf());
   plts->SetOneSidedDiscovery(true);
   plts->SetVarName( "q_{0}/2" );
   
   // ----------------------------------------------------
   // configure the ToyMCImportanceSampler with two test statistics
   ToyMCSampler toymcs(*plts, 50);



   // Since this tool needs to throw toy MC the PDF needs to be
   // extended or the tool needs to know how many entries in a dataset
   // per pseudo experiment.
   // In the 'number counting form' where the entries in the dataset
   // are counts, and not values of discriminating variables, the
   // datasets typically only have one entry and the PDF is not
   // extended.
   if (!mc->GetPdf()->canBeExtended()) {
      if (data->numEntries() == 1) {
         toymcs.SetNEventsPerToy(1);
      } else cout << "Not sure what to do about this model" << endl;
   }

   // We can use PROOF to speed things along in parallel
   // ProofConfig pc(*w, 2, "user@yourfavoriteproofcluster", false);
   ProofConfig pc(*w, 2, "", false);
   //toymcs.SetProofConfig(&pc);    // enable proof


   // instantiate the calculator
   FrequentistCalculator freqCalc(*data, *mc, *mcNull, &toymcs);
   freqCalc.SetToys( toys,toys ); // null toys, alt toys

   // Run the calculator and print result
   HypoTestResult* freqCalcResult = freqCalc.GetHypoTest();
   freqCalcResult->GetNullDistribution()->SetTitle( "b only" );
   freqCalcResult->GetAltDistribution()->SetTitle( "s+b" );
   freqCalcResult->Print();
   double pvalue = freqCalcResult->NullPValue();

   // stop timing
   mn_t->Stop();
   cout << "total CPU time: " << mn_t->CpuTime() << endl;
   cout << "total real time: " << mn_t->RealTime() << endl;

   // plot
   TCanvas* c1 = new TCanvas();
   HypoTestPlot *plot = new HypoTestPlot(*freqCalcResult, 100, -0.49, 9.51 );
   plot->SetLogYaxis(true);
   
   // add chi2 to plot
   int nPOI = 1;
   TF1* f = new TF1("f", TString::Format("1*ROOT::Math::chisquared_pdf(2*x,%d,0)",nPOI), 0,20);
   f->SetLineColor( kBlack );
   f->SetLineStyle( 7 );
   plot->AddTF1( f, TString::Format("#chi^{2}(2x,%d)",nPOI) );
   
   plot->Draw();
   c1->SaveAs("standard_discovery_output.pdf");
   

   return pvalue;
}
Ejemplo n.º 4
0
void StandardHypoTestDemo(const char* infile = "",
                          const char* workspaceName = "combined",
                          const char* modelSBName = "ModelConfig",
                          const char* modelBName = "",
                          const char* dataName = "obsData", 
                          int calcType = 0, // 0 freq 1 hybrid, 2 asymptotic
                          int testStatType = 3,   // 0 LEP, 1 TeV, 2 LHC, 3 LHC - one sided
                          int ntoys = 5000, 
                          bool useNC = false, 
                          const char * nuisPriorName = 0)
{

/*

  Other Parameter to pass in tutorial
  apart from standard for filename, ws, modelconfig and data

  type = 0 Freq calculator 
  type = 1 Hybrid calculator
  type = 2 Asymptotic calculator  

  testStatType = 0 LEP
  = 1 Tevatron 
  = 2 Profile Likelihood
  = 3 Profile Likelihood one sided (i.e. = 0 if mu < mu_hat)

  ntoys:         number of toys to use 

  useNumberCounting:  set to true when using number counting events 

  nuisPriorName:   name of prior for the nnuisance. This is often expressed as constraint term in the global model
  It is needed only when using the HybridCalculator (type=1)
  If not given by default the prior pdf from ModelConfig is used. 

  extra options are available as global paramwters of the macro. They major ones are: 
 
  generateBinned       generate binned data sets for toys (default is false) - be careful not to activate with 
  a too large (>=3) number of observables 
  nToyRatio            ratio of S+B/B toys (default is 2)
  printLevel
  
*/

   // disable - can cause some problems
   //ToyMCSampler::SetAlwaysUseMultiGen(true);

   SimpleLikelihoodRatioTestStat::SetAlwaysReuseNLL(true);
   ProfileLikelihoodTestStat::SetAlwaysReuseNLL(true);
   RatioOfProfiledLikelihoodsTestStat::SetAlwaysReuseNLL(true);

   //RooRandom::randomGenerator()->SetSeed(0);

   // to change minimizers 
   // ROOT::Math::MinimizerOptions::SetDefaultStrategy(0);
   // ROOT::Math::MinimizerOptions::SetDefaultMinimizer("Minuit2");
   // ROOT::Math::MinimizerOptions::SetDefaultTolerance(1);

  /////////////////////////////////////////////////////////////
  // First part is just to access a user-defined file 
  // or create the standard example file if it doesn't exist
  ////////////////////////////////////////////////////////////
  const char* filename = "";
  if (!strcmp(infile,""))
    filename = "results/example_combined_GaussExample_model.root";
  else
    filename = infile;
  // Check if example input file exists
  TFile *file = TFile::Open(filename);

  // if input file was specified byt not found, quit
  if(!file && strcmp(infile,"")){
    cout <<"file not found" << endl;
    return;
  } 

  // if default file not found, try to create it
  if(!file ){
    // Normally this would be run on the command line
    cout <<"will run standard hist2workspace example"<<endl;
    gROOT->ProcessLine(".! prepareHistFactory .");
    gROOT->ProcessLine(".! hist2workspace config/example.xml");
    cout <<"\n\n---------------------"<<endl;
    cout <<"Done creating example input"<<endl;
    cout <<"---------------------\n\n"<<endl;
  }

  // now try to access the file again
  file = TFile::Open(filename);
  if(!file){
    // if it is still not there, then we can't continue
    cout << "Not able to run hist2workspace to create example input" <<endl;
    return;
  }

  
  /////////////////////////////////////////////////////////////
  // Tutorial starts here
  ////////////////////////////////////////////////////////////

  // get the workspace out of the file
  RooWorkspace* w = (RooWorkspace*) file->Get(workspaceName);
  if(!w){
    cout <<"workspace not found" << endl;
    return;
  }
  w->Print();

  // get the modelConfig out of the file
  ModelConfig* sbModel = (ModelConfig*) w->obj(modelSBName);


  // get the modelConfig out of the file
  RooAbsData* data = w->data(dataName);

  // make sure ingredients are found
  if(!data || !sbModel){
    w->Print();
    cout << "data or ModelConfig was not found" <<endl;
    return;
  }
  // make b model
  ModelConfig* bModel = (ModelConfig*) w->obj(modelBName);


   // case of no systematics
   // remove nuisance parameters from model
   if (noSystematics) { 
      const RooArgSet * nuisPar = sbModel->GetNuisanceParameters();
      if (nuisPar && nuisPar->getSize() > 0) { 
         std::cout << "StandardHypoTestInvDemo" << "  -  Switch off all systematics by setting them constant to their initial values" << std::endl;
         RooStats::SetAllConstant(*nuisPar);
      }
      if (bModel) { 
         const RooArgSet * bnuisPar = bModel->GetNuisanceParameters();
         if (bnuisPar) 
            RooStats::SetAllConstant(*bnuisPar);
      }
   }


  if (!bModel ) {
      Info("StandardHypoTestInvDemo","The background model %s does not exist",modelBName);
      Info("StandardHypoTestInvDemo","Copy it from ModelConfig %s and set POI to zero",modelSBName);
      bModel = (ModelConfig*) sbModel->Clone();
      bModel->SetName(TString(modelSBName)+TString("B_only"));      
      RooRealVar * var = dynamic_cast<RooRealVar*>(bModel->GetParametersOfInterest()->first());
      if (!var) return;
      double oldval = var->getVal();
      var->setVal(0);
      //bModel->SetSnapshot( RooArgSet(*var, *w->var("lumi"))  );
      bModel->SetSnapshot( RooArgSet(*var)  );
      var->setVal(oldval);
  }
  
   if (!sbModel->GetSnapshot() || poiValue > 0) { 
      Info("StandardHypoTestDemo","Model %s has no snapshot  - make one using model poi",modelSBName);
      RooRealVar * var = dynamic_cast<RooRealVar*>(sbModel->GetParametersOfInterest()->first());
      if (!var) return;
      double oldval = var->getVal();
      if (poiValue > 0)  var->setVal(poiValue);
      //sbModel->SetSnapshot( RooArgSet(*var, *w->var("lumi") ) );
      sbModel->SetSnapshot( RooArgSet(*var) );
      if (poiValue > 0) var->setVal(oldval);
      //sbModel->SetSnapshot( *sbModel->GetParametersOfInterest() );
   }

   



   // part 1, hypothesis testing 
   SimpleLikelihoodRatioTestStat * slrts = new SimpleLikelihoodRatioTestStat(*bModel->GetPdf(), *sbModel->GetPdf());
   // null parameters must includes snapshot of poi plus the nuisance values 
   RooArgSet nullParams(*bModel->GetSnapshot());
   if (bModel->GetNuisanceParameters()) nullParams.add(*bModel->GetNuisanceParameters());
   
   slrts->SetNullParameters(nullParams);
   RooArgSet altParams(*sbModel->GetSnapshot());
   if (sbModel->GetNuisanceParameters()) altParams.add(*sbModel->GetNuisanceParameters());
   slrts->SetAltParameters(altParams);


   ProfileLikelihoodTestStat * profll = new ProfileLikelihoodTestStat(*bModel->GetPdf());


   RatioOfProfiledLikelihoodsTestStat * 
      ropl = new RatioOfProfiledLikelihoodsTestStat(*bModel->GetPdf(), *sbModel->GetPdf(), sbModel->GetSnapshot());
   ropl->SetSubtractMLE(false);

   if (testStatType == 3) profll->SetOneSidedDiscovery(1);
   profll->SetPrintLevel(printLevel);

   // profll.SetReuseNLL(mOptimize);
   // slrts.SetReuseNLL(mOptimize);
   // ropl.SetReuseNLL(mOptimize);

   AsymptoticCalculator::SetPrintLevel(printLevel);

   HypoTestCalculatorGeneric *  hypoCalc = 0;
   // note here Null is B and Alt is S+B
   if (calcType == 0) hypoCalc = new  FrequentistCalculator(*data, *sbModel, *bModel);
   else if (calcType == 1) hypoCalc= new  HybridCalculator(*data, *sbModel, *bModel);
   else if (calcType == 2) hypoCalc= new  AsymptoticCalculator(*data, *sbModel, *bModel);

   if (calcType == 0) 
       ((FrequentistCalculator*)hypoCalc)->SetToys(ntoys, ntoys/nToysRatio);
   if (calcType == 1) 
       ((HybridCalculator*)hypoCalc)->SetToys(ntoys, ntoys/nToysRatio);
   if (calcType == 2 ) { 
      if (testStatType == 3) ((AsymptoticCalculator*) hypoCalc)->SetOneSidedDiscovery(true);  
      if (testStatType != 2 && testStatType != 3)  
         Warning("StandardHypoTestDemo","Only the PL test statistic can be used with AsymptoticCalculator - use by default a two-sided PL");
      

   }


   // check for nuisance prior pdf in case of nuisance parameters 
   if (calcType == 1 && (bModel->GetNuisanceParameters() || sbModel->GetNuisanceParameters() )) {
         RooAbsPdf * nuisPdf = 0; 
         if (nuisPriorName) nuisPdf = w->pdf(nuisPriorName);
         // use prior defined first in bModel (then in SbModel)
         if (!nuisPdf)  { 
            Info("StandardHypoTestDemo","No nuisance pdf given for the HybridCalculator - try to deduce  pdf from the   model");
            if (bModel->GetPdf() && bModel->GetObservables() ) 
               nuisPdf = RooStats::MakeNuisancePdf(*bModel,"nuisancePdf_bmodel");
            else 
               nuisPdf = RooStats::MakeNuisancePdf(*sbModel,"nuisancePdf_sbmodel");
         }   
         if (!nuisPdf ) {
            if (bModel->GetPriorPdf())  { 
               nuisPdf = bModel->GetPriorPdf();
               Info("StandardHypoTestDemo","No nuisance pdf given - try to use %s that is defined as a prior pdf in the B model",nuisPdf->GetName());            
            }
            else { 
               Error("StandardHypoTestDemo","Cannnot run Hybrid calculator because no prior on the nuisance parameter is specified or can be derived");
               return;
            }
         }
         assert(nuisPdf);
         Info("StandardHypoTestDemo","Using as nuisance Pdf ... " );
         nuisPdf->Print();
      
         const RooArgSet * nuisParams = (bModel->GetNuisanceParameters() ) ? bModel->GetNuisanceParameters() : sbModel->GetNuisanceParameters();
         RooArgSet * np = nuisPdf->getObservables(*nuisParams);
         if (np->getSize() == 0) { 
            Warning("StandardHypoTestDemo","Prior nuisance does not depend on nuisance parameters. They will be smeared in their full range");
         }
         delete np;
      
         ((HybridCalculator*)hypoCalc)->ForcePriorNuisanceAlt(*nuisPdf);
         ((HybridCalculator*)hypoCalc)->ForcePriorNuisanceNull(*nuisPdf);
   }

   // hypoCalc->ForcePriorNuisanceAlt(*sbModel->GetPriorPdf());
   // hypoCalc->ForcePriorNuisanceNull(*bModel->GetPriorPdf());

   ToyMCSampler * sampler = (ToyMCSampler *)hypoCalc->GetTestStatSampler();

   if (sampler && (calcType == 0 || calcType == 1) ) { 

      // look if pdf is number counting or extended
      if (sbModel->GetPdf()->canBeExtended() ) { 
         if (useNC)   Warning("StandardHypoTestDemo","Pdf is extended: but number counting flag is set: ignore it ");
      }
      else {
         // for not extended pdf
         if (!useNC)  { 
            int nEvents = data->numEntries();
            Info("StandardHypoTestDemo","Pdf is not extended: number of events to generate taken  from observed data set is %d",nEvents);
            sampler->SetNEventsPerToy(nEvents);
         }
         else {
            Info("StandardHypoTestDemo","using a number counting pdf");
            sampler->SetNEventsPerToy(1);
         }
      }
      
      if (data->isWeighted() && !generateBinned) { 
         Info("StandardHypoTestDemo","Data set is weighted, nentries = %d and sum of weights = %8.1f but toy generation is unbinned - it would be faster to set generateBinned to true\n",data->numEntries(), data->sumEntries());
      }
      if (generateBinned)  sampler->SetGenerateBinned(generateBinned);


      // set the test statistic
      if (testStatType == 0) sampler->SetTestStatistic(slrts); 
      if (testStatType == 1) sampler->SetTestStatistic(ropl); 
      if (testStatType == 2 || testStatType == 3) sampler->SetTestStatistic(profll); 

   }
   
   HypoTestResult *  htr = hypoCalc->GetHypoTest();
   htr->SetPValueIsRightTail(true);
   htr->SetBackgroundAsAlt(false);
   htr->Print(); // how to get meaningfull CLs at this point?

   delete sampler;
   delete slrts; 
   delete ropl; 
   delete profll;

   if (calcType != 2) {
      HypoTestPlot * plot = new HypoTestPlot(*htr,100);
      plot->SetLogYaxis(true);
      plot->Draw();
   }
   else { 
      std::cout << "Asymptotic results " << std::endl;
      
   }

   // look at expected significances 
   // found median of S+B distribution
   if (calcType != 2) { 

      SamplingDistribution * altDist = htr->GetAltDistribution();   
      HypoTestResult htExp("Expected Result");
      htExp.Append(htr);
      // find quantiles in alt (S+B) distribution 
      double p[5];
      double q[5];
      for (int i = 0; i < 5; ++i) { 
         double sig = -2  + i;
         p[i] = ROOT::Math::normal_cdf(sig,1);
      }
      std::vector<double> values = altDist->GetSamplingDistribution();
      TMath::Quantiles( values.size(), 5, &values[0], q, p, false);  

      for (int i = 0; i < 5; ++i) { 
         htExp.SetTestStatisticData( q[i] );
         double sig = -2  + i;      
         std::cout << " Expected p -value and significance at " << sig << " sigma = " 
                   << htExp.NullPValue() << " significance " << htExp.Significance() << " sigma " << std::endl; 
         
      }
   }
   else { 
      // case of asymptotic calculator 
      for (int i = 0; i < 5; ++i) { 
         double sig = -2  + i;      
         // sigma is inverted here 
         double pval = AsymptoticCalculator::GetExpectedPValues( htr->NullPValue(), htr->AlternatePValue(), -sig, false);
         std::cout << " Expected p -value and significance at " << sig << " sigma = " 
                   << pval << " significance " << ROOT::Math::normal_quantile_c(pval,1) << " sigma " << std::endl; 
         
      }
   }

}
Ejemplo n.º 5
0
void significance(RooWorkspace& w ) {

  ModelConfig* mc = (ModelConfig*)w.obj("mc");
  RooDataSet* data = (RooDataSet*)w.data("data");
  //data->Print();

  // define the S+B snapshot (this is used for computing the expected significance)
  ModelConfig* sbModel = mc->Clone();
  sbModel->SetName("S+B Model");
  RooRealVar* poi = (RooRealVar*) sbModel->GetParametersOfInterest()->first();
  poi->setVal(50);
  sbModel->SetSnapshot(*poi);

  ModelConfig * bModel = (ModelConfig*) sbModel->Clone();
  bModel->SetName("B model");
  poi->setVal(0);
  bModel->SetSnapshot(*poi);

  vector<double> masses;
  vector<double> p0values;
  vector<double> p0valuesExpected;
  vector<double> sigvalues;

  double massMin = 200;
  double massMax = 2500;
  int nbins = 100;

  // loop on the mass values 
  for ( double mass=massMin; mass<=massMax; mass += (massMax-massMin)/nbins ) {

    w.var("mass")->setVal( mass );

    // create the AsymptoticCalculator from data,alt model, null model
    AsymptoticCalculator * ac = new AsymptoticCalculator(*data, *sbModel, *bModel);
    ac->SetOneSidedDiscovery(true);  // for one-side discovery test                                      
    AsymptoticCalculator::SetPrintLevel(-1);

    // run the calculator
    HypoTestResult* asymCalcResult = ac->GetHypoTest();
    asymCalcResult->Print();
     
    double pvalue = asymCalcResult->NullPValue();
    double sigvalue = asymCalcResult->Significance();
    double expectedP0 = AsymptoticCalculator::GetExpectedPValues(asymCalcResult->NullPValue(),asymCalcResult->AlternatePValue(), 0, false);

    masses.push_back(mass);
    p0values.push_back(pvalue);
    p0valuesExpected.push_back(expectedP0);
    sigvalues.push_back(sigvalue);
    std::cout << "** Mass = " << mass << " p0-value = " << expectedP0 << " p-value = " << pvalue << " significance = " << sigvalue << std::endl;

  }

  TGraph* graph1  = new TGraph(masses.size(),&masses[0],&p0values[0]);
  TGraph* graph2  = new TGraph(masses.size(),&masses[0],&p0valuesExpected[0]);
  TGraph* graph3  = new TGraph(masses.size(),&masses[0],&sigvalues[0]);

  TCanvas* c2 = new TCanvas("c2","Significance", 900, 700);
  c2->Divide(1,2);
  c2->cd(1);
  graph1->SetMarkerStyle(10);
  //graph1->Draw("APC");
  graph1->Draw("AC");
  graph2->SetLineStyle(2);
  graph2->Draw("C");
  graph1->GetXaxis()->SetTitle("Mass [GeV]");
  graph1->GetYaxis()->SetTitle("p0 value");
  graph1->SetTitle("P-value vs Mass");
  graph1->SetMinimum(graph2->GetMinimum());
  graph1->SetLineColor(kBlue);
  graph2->SetLineColor(kRed);
  gPad->SetLogy(true);

  c2->cd(2);
  graph3->SetMarkerStyle(10);
  graph3->Draw("AC");
  graph3->SetLineStyle(1);
  graph3->SetLineColor(kRed);
  graph3->GetXaxis()->SetTitle("Mass [GeV]");
  graph3->GetYaxis()->SetTitle("Significance");
  graph3->SetTitle("Significance vs Mass");
  gPad->SetLogy(false);

  c2->SaveAs("significance.pdf");
  c2->SaveAs("significance.png");
}
void HypoTestInvDemo(const char * fileName ="GausModel_b.root",
                     const char * wsName = "w",
                     const char * modelSBName = "model_sb",
                     const char * modelBName = "model_b",
                     const char * dataName = "data_obs",                  
                     int type = 0,  // calculator type 
                     int testStatType = 0, // test stat type
                     int npoints = 10,   
                     int ntoys=1000,
                     bool useCls = true )
{ 
   /*
    type = 0 Freq calculator 
    type = 1 Hybrid 

    testStatType = 0 LEP
                 = 1 Tevatron 
                 = 2 PL


   */

   if (fileName==0) { 
      std::cout << "give input filename " << std::endl;
      return; 
   }
   TFile * file = new TFile(fileName); 

   RooWorkspace * w = dynamic_cast<RooWorkspace*>( file->Get(wsName) );
   if (!w) {      
      return; 
   }
   w->Print();


   RooAbsData * data = w->data(dataName); 
   if (!data) { 
      Error("HypoTestDemo","Not existing data %s",dataName);
   }

   
   // get models from WS
  // get the modelConfig out of the file
  ModelConfig* bModel = (ModelConfig*) w->obj(modelBName);
  ModelConfig* sbModel = (ModelConfig*) w->obj(modelSBName);


   SimpleLikelihoodRatioTestStat slrts(*bModel->GetPdf(),*sbModel->GetPdf());
   slrts.SetNullParameters(*bModel->GetSnapshot());
   slrts.SetAltParameters(*sbModel->GetSnapshot());

   RatioOfProfiledLikelihoodsTestStat 
   ropl(*bModel->GetPdf(), *sbModel->GetPdf(), sbModel->GetSnapshot());
   ropl.SetSubtractMLE(false);
   
   ProfileLikelihoodTestStat profll(*sbModel->GetPdf());
   profll.SetOneSided(0);

   TestStatistic * testStat = &slrts;
   if (testStatType == 1) testStat = &ropl;
   if (testStatType == 2) testStat = &profll;
  
   
   HypoTestCalculatorGeneric *  hc = 0;
   if (type == 0) hc = new FrequentistCalculator(*data, *sbModel, *bModel);
   else new HybridCalculator(*data, *sbModel, *bModel);

   ToyMCSampler *toymcs = (ToyMCSampler*)hc->GetTestStatSampler();
   //toymcs->SetNEventsPerToy(1);
   toymcs->SetTestStatistic(testStat);


    if (type == 1) { 
      HybridCalculator *hhc = (HybridCalculator*) hc;
      hhc->SetToys(ntoys,ntoys); 
      // hhc->ForcePriorNuisanceAlt(*pdfNuis);
      // hhc->ForcePriorNuisanceNull(*pdfNuis);
   } 
   else 
      ((FrequentistCalculator*) hc)->SetToys(ntoys,ntoys); 

  // Get the result
   RooMsgService::instance().getStream(1).removeTopic(RooFit::NumIntegration);


   TStopwatch tw; tw.Start(); 
   const RooArgSet * poiSet = sbModel->GetParametersOfInterest();
   RooRealVar *poi = (RooRealVar*)poiSet->first();

   // fit the data first
   sbModel->GetPdf()->fitTo(*data);
   double poihat  = poi->getVal();
   //poi->setVal(30);
   //poi->setError(10);


   HypoTestInverter calc(*hc);
   // GENA: for two-sided interval
   //calc.SetConfidenceLevel(0.95);
   // GENA: for 95% upper limit
   calc.SetConfidenceLevel(0.90);

   calc.UseCLs(useCls);
   calc.SetVerbose(true);

   // can spped up using proof
   ProofConfig pc(*w, 2, "workers=2", kFALSE);
   //ProofConfig pc(*w, 30, "localhost", kFALSE);
   //ToyMCSampler * toymcs = dynamic_cast<ToyMCSampler *> (calc.GetHypoTestCalculator()->GetTestStatSampler() );
   // GENA: disable proof for now
   //toymcs->SetProofConfig(&pc);    // enable proof

   
   if (npoints > 0) {
     // GENA
     double poimin = TMath::Max(poihat -   4 * poi->getError(), 0.0);
     //poimin = poihat;
     double poimax = poihat +  4 * poi->getError();
     poimin = 0; 
     poimax = 20;
     //double poimin = poi->getMin();
     //double poimax = poi->getMax();
     std::cout << "Doing a fixed scan  in interval : " << poimin << " , " << poimax << std::endl;
     calc.SetFixedScan(npoints,poimin,poimax);
   }

   HypoTestInverterResult * r = calc.GetInterval();

   // write to a file the results
   TString resultFileName = (useCls) ? "CLs_" : "Cls+b_";
   resultFileName += fileName;

   // GENA
   //TFile * file = new TFile(resultFileName,"RECREATE");
   file = new TFile(resultFileName,"RECREATE");
   r->Write();
   file->Close();                                                                     

  double ulError = r->UpperLimitEstimatedError();
  double upperLimit = r->UpperLimit();
  std::cout << "The computed upper limit is: " << upperLimit << std::endl;
  std::cout << "an estimated error on this upper limit is: " << ulError << std::endl;

  // check using interpolation
  // double interpLimit = r->FindInterpolatedLimit(1.-r->ConfidenceLevel() );
  // cout << "The computer interpolated limits is " << interpLimit << endl;

  const int nEntries = r->ArraySize();

  std::vector<Double_t> xArray(nEntries);
  std::vector<Double_t> yArray(nEntries);
  std::vector<Double_t> yErrArray(nEntries);
  for (int i=0; i<nEntries; i++) {
    xArray[i] = r->GetXValue(i);
    yArray[i] = r->GetYValue(i);
    yErrArray[i] = r->GetYError(i);
    std::cout << xArray[i] << " , " << yArray[i] << " err = " << yErrArray[i] << std::endl;
  }
 

   // see expected result (bands)
   TGraph * g0 = new TGraph(nEntries);
   TGraphAsymmErrors * g1 = new TGraphAsymmErrors(nEntries);
   TGraphAsymmErrors * g2l = new TGraphAsymmErrors(nEntries);
   TGraphAsymmErrors * g2u = new TGraphAsymmErrors(nEntries);
   double p[7]; 
   double q[7];
   p[0] = ROOT::Math::normal_cdf(-2);
   p[1] = ROOT::Math::normal_cdf(-1.5);
   p[2] = ROOT::Math::normal_cdf(-1);
   p[3] = 0.5;
   p[4] = ROOT::Math::normal_cdf(1);
   p[5] = ROOT::Math::normal_cdf(1.5);
   p[6] = ROOT::Math::normal_cdf(2);
   for (int i=0; i<nEntries; i++) {
      SamplingDistribution * s = r->GetExpectedDistribution(i);
      // GENA
      //const std::vector<double> & values = s->GetSamplingDistribution();
      const std::vector<Double_t> & cValues = s->GetSamplingDistribution();
      std::vector<Double_t> values;
      for (std::vector<Double_t>::const_iterator val = cValues.begin();
	   val != cValues.end();
	   ++val) values.push_back(*val);
      TMath::Quantiles(values.size(), 7, &values[0],q,p,false);
      double p0 = q[3];
      double p2l =  q[1];
      double p2u =  q[5];
      g0->SetPoint(i, r->GetXValue(i), p0 ) ;
      g1->SetPoint(i, r->GetXValue(i),  p0);
      g2l->SetPoint(i, r->GetXValue(i), p2l);
      g2u->SetPoint(i, r->GetXValue(i), p2u);
      //g2->SetPoint(i, r->GetXValue(i), s->InverseCDF(0.50));
      g1->SetPointEYlow(i, q[3] - q[2]); // -1 sigma errorr   
      g1->SetPointEYhigh(i, q[4] - q[3]);//+1 sigma error

      g2l->SetPointEYlow(i, q[1]-q[0]);   // -2 -- -1 sigma error
      g2l->SetPointEYhigh(i, q[2]-q[1]);

      g2u->SetPointEYlow(i, q[5]-q[4]);
      g2u->SetPointEYhigh(i, q[6]-q[5]);


      if (plotHypoTestResult) { 
         HypoTestResult * hr = new HypoTestResult();
         hr->SetNullDistribution( r->GetBackgroundDistribution() );
         hr->SetAltDistribution( r->GetSignalAndBackgroundDistribution(i) );
         new TCanvas();
         HypoTestPlot * pl = new HypoTestPlot(*hr);
         pl->Draw();
      }
  }

   HypoTestInverterPlot *plot = new HypoTestInverterPlot("result","",r);
   TGraphErrors * g = plot->MakePlot();

   g->Draw("APL");   
   g2l->SetFillColor(kYellow);
   g2l->Draw("3");
   g2u->SetFillColor(kYellow);
   g2u->Draw("3");
   g1->SetFillColor(kGreen);
   g1->Draw("3");
   g0->SetLineColor(kBlue);
   g0->SetLineStyle(2);
   g0->SetLineWidth(2);
   g0->Draw("L");

   //g1->Draw("P");
   //g2->Draw("P");
   g->SetLineWidth(2);
   g->Draw("PL");   

   // GENA: two-sided interval
   //double alpha = 1.-r->ConfidenceLevel();
   // GENA: upper limit
   double alpha = (1.-r->ConfidenceLevel())/2.0;
   double x1 = g->GetXaxis()->GetXmin();
   double x2 = g->GetXaxis()->GetXmax();
   TLine * line = new TLine(x1, alpha, x2,alpha);
   line->SetLineColor(kRed);
   line->Draw();

   // see the expected limit and -1 +1 sigma bands
   // SamplingDistribution * limits = r->GetUpperLimitDistribution();

   // std::cout << " expected limit (median) " << limits->InverseCDF(0.50) << std::endl;
   // std::cout << " expected limit (-1 sig) " << limits->InverseCDF((ROOT::Math::normal_cdf(-1))) << std::endl;
   // std::cout << " expected limit (+1 sig) " << limits->InverseCDF((ROOT::Math::normal_cdf(+1))) << std::endl;
   
   tw.Print();

}