void StandardTestStatDistributionDemo(const char* infile = "", const char* workspaceName = "combined", const char* modelConfigName = "ModelConfig", const char* dataName = "obsData"){ // the number of toy MC used to generate the distribution int nToyMC = 1000; // The parameter below is needed for asymptotic distribution to be chi-square, // but set to false if your model is not numerically stable if mu<0 bool allowNegativeMu=true; ///////////////////////////////////////////////////////////// // First part is just to access a user-defined file // or create the standard example file if it doesn't exist //////////////////////////////////////////////////////////// const char* filename = ""; if (!strcmp(infile,"")) { filename = "results/example_combined_GaussExample_model.root"; bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code // if file does not exists generate with histfactory if (!fileExist) { #ifdef _WIN32 cout << "HistFactory file cannot be generated on Windows - exit" << endl; return; #endif // Normally this would be run on the command line cout <<"will run standard hist2workspace example"<<endl; gROOT->ProcessLine(".! prepareHistFactory ."); gROOT->ProcessLine(".! hist2workspace config/example.xml"); cout <<"\n\n---------------------"<<endl; cout <<"Done creating example input"<<endl; cout <<"---------------------\n\n"<<endl; } } else filename = infile; // Try to open the file TFile *file = TFile::Open(filename); // if input file was specified byt not found, quit if(!file ){ cout <<"StandardRooStatsDemoMacro: Input file " << filename << " is not found" << endl; return; } ///////////////////////////////////////////////////////////// // Now get the data and workspace //////////////////////////////////////////////////////////// // get the workspace out of the file RooWorkspace* w = (RooWorkspace*) file->Get(workspaceName); if(!w){ cout <<"workspace not found" << endl; return; } // get the modelConfig out of the file ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName); // get the modelConfig out of the file RooAbsData* data = w->data(dataName); // make sure ingredients are found if(!data || !mc){ w->Print(); cout << "data or ModelConfig was not found" <<endl; return; } mc->Print(); ///////////////////////////////////////////////////////////// // Now find the upper limit based on the asymptotic results //////////////////////////////////////////////////////////// RooRealVar* firstPOI = (RooRealVar*) mc->GetParametersOfInterest()->first(); ProfileLikelihoodCalculator plc(*data,*mc); LikelihoodInterval* interval = plc.GetInterval(); double plcUpperLimit = interval->UpperLimit(*firstPOI); delete interval; cout << "\n\n--------------------------------------"<<endl; cout <<"Will generate sampling distribution at " << firstPOI->GetName() << " = " << plcUpperLimit <<endl; int nPOI = mc->GetParametersOfInterest()->getSize(); if(nPOI>1){ cout <<"not sure what to do with other parameters of interest, but here are their values"<<endl; mc->GetParametersOfInterest()->Print("v"); } ///////////////////////////////////////////// // create thte test stat sampler ProfileLikelihoodTestStat ts(*mc->GetPdf()); // to avoid effects from boundary and simplify asymptotic comparison, set min=-max if(allowNegativeMu) firstPOI->setMin(-1*firstPOI->getMax()); // temporary RooArgSet RooArgSet poi; poi.add(*mc->GetParametersOfInterest()); // create and configure the ToyMCSampler ToyMCSampler sampler(ts,nToyMC); sampler.SetPdf(*mc->GetPdf()); sampler.SetObservables(*mc->GetObservables()); sampler.SetGlobalObservables(*mc->GetGlobalObservables()); if(!mc->GetPdf()->canBeExtended() && (data->numEntries()==1)){ cout << "tell it to use 1 event"<<endl; sampler.SetNEventsPerToy(1); } firstPOI->setVal(plcUpperLimit); // set POI value for generation sampler.SetParametersForTestStat(*mc->GetParametersOfInterest()); // set POI value for evaluation if (useProof) { ProofConfig pc(*w, nworkers, "",false); sampler.SetProofConfig(&pc); // enable proof } firstPOI->setVal(plcUpperLimit); RooArgSet allParameters; allParameters.add(*mc->GetParametersOfInterest()); allParameters.add(*mc->GetNuisanceParameters()); allParameters.Print("v"); SamplingDistribution* sampDist = sampler.GetSamplingDistribution(allParameters); SamplingDistPlot plot; plot.AddSamplingDistribution(sampDist); plot.GetTH1F(sampDist)->GetYaxis()->SetTitle(Form("f(-log #lambda(#mu=%.2f) | #mu=%.2f)",plcUpperLimit,plcUpperLimit)); plot.SetAxisTitle(Form("-log #lambda(#mu=%.2f)",plcUpperLimit)); TCanvas* c1 = new TCanvas("c1"); c1->SetLogy(); plot.Draw(); double min = plot.GetTH1F(sampDist)->GetXaxis()->GetXmin(); double max = plot.GetTH1F(sampDist)->GetXaxis()->GetXmax(); TF1* f = new TF1("f",Form("2*ROOT::Math::chisquared_pdf(2*x,%d,0)",nPOI),min,max); f->Draw("same"); c1->SaveAs("standard_test_stat_distribution.pdf"); }
void OneSidedFrequentistUpperLimitWithBands_intermediate(const char* infile = "", const char* workspaceName = "combined", const char* modelConfigName = "ModelConfig", const char* dataName = "obsData"){ double confidenceLevel=0.95; // degrade/improve number of pseudo-experiments used to define the confidence belt. // value of 1 corresponds to default number of toys in the tail, which is 50/(1-confidenceLevel) double additionalToysFac = 1.; int nPointsToScan = 30; // number of steps in the parameter of interest int nToyMC = 100; // number of toys used to define the expected limit and band TStopwatch t; t.Start(); ///////////////////////////////////////////////////////////// // First part is just to access a user-defined file // or create the standard example file if it doesn't exist //////////////////////////////////////////////////////////// const char* filename = ""; if (!strcmp(infile,"")) filename = "results/example_combined_GaussExample_model.root"; else filename = infile; // Check if example input file exists TFile *file = TFile::Open(filename); // if input file was specified byt not found, quit if(!file && strcmp(infile,"")){ cout <<"file not found" << endl; return; } // if default file not found, try to create it if(!file ){ // Normally this would be run on the command line cout <<"will run standard hist2workspace example"<<endl; gROOT->ProcessLine(".! prepareHistFactory ."); gROOT->ProcessLine(".! hist2workspace config/example.xml"); cout <<"\n\n---------------------"<<endl; cout <<"Done creating example input"<<endl; cout <<"---------------------\n\n"<<endl; } // now try to access the file again file = TFile::Open(filename); if(!file){ // if it is still not there, then we can't continue cout << "Not able to run hist2workspace to create example input" <<endl; return; } ///////////////////////////////////////////////////////////// // Now get the data and workspace //////////////////////////////////////////////////////////// // get the workspace out of the file RooWorkspace* w = (RooWorkspace*) file->Get(workspaceName); if(!w){ cout <<"workspace not found" << endl; return; } // get the modelConfig out of the file ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName); // get the modelConfig out of the file RooAbsData* data = w->data(dataName); // make sure ingredients are found if(!data || !mc){ w->Print(); cout << "data or ModelConfig was not found" <<endl; return; } cout << "Found data and ModelConfig:" <<endl; mc->Print(); ///////////////////////////////////////////////////////////// // Now get the POI for convenience // you may want to adjust the range of your POI //////////////////////////////////////////////////////////// RooRealVar* firstPOI = (RooRealVar*) mc->GetParametersOfInterest()->first(); // firstPOI->setMin(0); // firstPOI->setMax(10); ///////////////////////////////////////////// // create and use the FeldmanCousins tool // to find and plot the 95% confidence interval // on the parameter of interest as specified // in the model config // REMEMBER, we will change the test statistic // so this is NOT a Feldman-Cousins interval FeldmanCousins fc(*data,*mc); fc.SetConfidenceLevel(confidenceLevel); fc.AdditionalNToysFactor(additionalToysFac); // improve sampling that defines confidence belt // fc.UseAdaptiveSampling(true); // speed it up a bit, but don't use for expectd limits fc.SetNBins(nPointsToScan); // set how many points per parameter of interest to scan fc.CreateConfBelt(true); // save the information in the belt for plotting ///////////////////////////////////////////// // Feldman-Cousins is a unified limit by definition // but the tool takes care of a few things for us like which values // of the nuisance parameters should be used to generate toys. // so let's just change the test statistic and realize this is // no longer "Feldman-Cousins" but is a fully frequentist Neyman-Construction. // ProfileLikelihoodTestStatModified onesided(*mc->GetPdf()); // fc.GetTestStatSampler()->SetTestStatistic(&onesided); // ((ToyMCSampler*) fc.GetTestStatSampler())->SetGenerateBinned(true); ToyMCSampler* toymcsampler = (ToyMCSampler*) fc.GetTestStatSampler(); ProfileLikelihoodTestStat* testStat = dynamic_cast<ProfileLikelihoodTestStat*>(toymcsampler->GetTestStatistic()); testStat->SetOneSided(true); // test speedups: testStat->SetReuseNLL(true); // toymcsampler->setUseMultiGen(true); // not fully validated // Since this tool needs to throw toy MC the PDF needs to be // extended or the tool needs to know how many entries in a dataset // per pseudo experiment. // In the 'number counting form' where the entries in the dataset // are counts, and not values of discriminating variables, the // datasets typically only have one entry and the PDF is not // extended. if(!mc->GetPdf()->canBeExtended()){ if(data->numEntries()==1) fc.FluctuateNumDataEntries(false); else cout <<"Not sure what to do about this model" <<endl; } // We can use PROOF to speed things along in parallel ProofConfig pc(*w, 4, "",false); if(mc->GetGlobalObservables()){ cout << "will use global observables for unconditional ensemble"<<endl; mc->GetGlobalObservables()->Print(); toymcsampler->SetGlobalObservables(*mc->GetGlobalObservables()); } toymcsampler->SetProofConfig(&pc); // enable proof // Now get the interval PointSetInterval* interval = fc.GetInterval(); ConfidenceBelt* belt = fc.GetConfidenceBelt(); // print out the iterval on the first Parameter of Interest cout << "\n95% interval on " <<firstPOI->GetName()<<" is : ["<< interval->LowerLimit(*firstPOI) << ", "<< interval->UpperLimit(*firstPOI) <<"] "<<endl; // get observed UL and value of test statistic evaluated there RooArgSet tmpPOI(*firstPOI); double observedUL = interval->UpperLimit(*firstPOI); firstPOI->setVal(observedUL); double obsTSatObsUL = fc.GetTestStatSampler()->EvaluateTestStatistic(*data,tmpPOI); // Ask the calculator which points were scanned RooDataSet* parameterScan = (RooDataSet*) fc.GetPointsToScan(); RooArgSet* tmpPoint; // make a histogram of parameter vs. threshold TH1F* histOfThresholds = new TH1F("histOfThresholds","", parameterScan->numEntries(), firstPOI->getMin(), firstPOI->getMax()); histOfThresholds->GetXaxis()->SetTitle(firstPOI->GetName()); histOfThresholds->GetYaxis()->SetTitle("Threshold"); // loop through the points that were tested and ask confidence belt // what the upper/lower thresholds were. // For FeldmanCousins, the lower cut off is always 0 for(Int_t i=0; i<parameterScan->numEntries(); ++i){ tmpPoint = (RooArgSet*) parameterScan->get(i)->clone("temp"); double arMax = belt->GetAcceptanceRegionMax(*tmpPoint); double poiVal = tmpPoint->getRealValue(firstPOI->GetName()) ; histOfThresholds->Fill(poiVal,arMax); } TCanvas* c1 = new TCanvas(); c1->Divide(2); c1->cd(1); histOfThresholds->SetMinimum(0); histOfThresholds->Draw(); c1->cd(2); ///////////////////////////////////////////////////////////// // Now we generate the expected bands and power-constriant //////////////////////////////////////////////////////////// // First: find parameter point for mu=0, with conditional MLEs for nuisance parameters RooAbsReal* nll = mc->GetPdf()->createNLL(*data); RooAbsReal* profile = nll->createProfile(*mc->GetParametersOfInterest()); firstPOI->setVal(0.); profile->getVal(); // this will do fit and set nuisance parameters to profiled values RooArgSet* poiAndNuisance = new RooArgSet(); if(mc->GetNuisanceParameters()) poiAndNuisance->add(*mc->GetNuisanceParameters()); poiAndNuisance->add(*mc->GetParametersOfInterest()); w->saveSnapshot("paramsToGenerateData",*poiAndNuisance); RooArgSet* paramsToGenerateData = (RooArgSet*) poiAndNuisance->snapshot(); cout << "\nWill use these parameter points to generate pseudo data for bkg only" << endl; paramsToGenerateData->Print("v"); double CLb=0; double CLbinclusive=0; // Now we generate background only and find distribution of upper limits TH1F* histOfUL = new TH1F("histOfUL","",100,0,firstPOI->getMax()); histOfUL->GetXaxis()->SetTitle("Upper Limit (background only)"); histOfUL->GetYaxis()->SetTitle("Entries"); for(int imc=0; imc<nToyMC; ++imc){ // set parameters back to values for generating pseudo data w->loadSnapshot("paramsToGenerateData"); // in 5.30 there is a nicer way to generate toy data & randomize global obs RooAbsData* toyData = toymcsampler->GenerateToyData(*paramsToGenerateData); // get test stat at observed UL in observed data firstPOI->setVal(observedUL); double toyTSatObsUL = fc.GetTestStatSampler()->EvaluateTestStatistic(*toyData,tmpPOI); // toyData->get()->Print("v"); // cout <<"obsTSatObsUL " <<obsTSatObsUL << "toyTS " << toyTSatObsUL << endl; if(obsTSatObsUL < toyTSatObsUL) // (should be checked) CLb+= (1.)/nToyMC; if(obsTSatObsUL <= toyTSatObsUL) // (should be checked) CLbinclusive+= (1.)/nToyMC; // loop over points in belt to find upper limit for this toy data double thisUL = 0; for(Int_t i=0; i<parameterScan->numEntries(); ++i){ tmpPoint = (RooArgSet*) parameterScan->get(i)->clone("temp"); double arMax = belt->GetAcceptanceRegionMax(*tmpPoint); firstPOI->setVal( tmpPoint->getRealValue(firstPOI->GetName()) ); double thisTS = fc.GetTestStatSampler()->EvaluateTestStatistic(*toyData,tmpPOI); if(thisTS<=arMax){ thisUL = firstPOI->getVal(); } else{ break; } } histOfUL->Fill(thisUL); delete toyData; } histOfUL->Draw(); c1->SaveAs("one-sided_upper_limit_output.pdf"); // if you want to see a plot of the sampling distribution for a particular scan point: // Now find bands and power constraint Double_t* bins = histOfUL->GetIntegral(); TH1F* cumulative = (TH1F*) histOfUL->Clone("cumulative"); cumulative->SetContent(bins); double band2sigDown=0, band1sigDown=0, bandMedian=0, band1sigUp=0,band2sigUp=0; for(int i=1; i<=cumulative->GetNbinsX(); ++i){ if(bins[i]<RooStats::SignificanceToPValue(2)) band2sigDown=cumulative->GetBinCenter(i); if(bins[i]<RooStats::SignificanceToPValue(1)) band1sigDown=cumulative->GetBinCenter(i); if(bins[i]<0.5) bandMedian=cumulative->GetBinCenter(i); if(bins[i]<RooStats::SignificanceToPValue(-1)) band1sigUp=cumulative->GetBinCenter(i); if(bins[i]<RooStats::SignificanceToPValue(-2)) band2sigUp=cumulative->GetBinCenter(i); } t.Stop(); t.Print(); cout << "-2 sigma band " << band2sigDown << endl; cout << "-1 sigma band " << band1sigDown << endl; cout << "median of band " << bandMedian << " [Power Constriant)]" << endl; cout << "+1 sigma band " << band1sigUp << endl; cout << "+2 sigma band " << band2sigUp << endl; // print out the iterval on the first Parameter of Interest cout << "\nobserved 95% upper-limit "<< interval->UpperLimit(*firstPOI) <<endl; cout << "CLb strict [P(toy>obs|0)] for observed 95% upper-limit "<< CLb <<endl; cout << "CLb inclusive [P(toy>=obs|0)] for observed 95% upper-limit "<< CLbinclusive <<endl; delete profile; delete nll; }
float ComputeTestStat(TString wsfile, double mu_susy_sig_val) { gROOT->Reset(); TFile* wstf = new TFile( wsfile ) ; RooWorkspace* ws = dynamic_cast<RooWorkspace*>( wstf->Get("ws") ); ws->Print() ; ModelConfig* modelConfig = (ModelConfig*) ws->obj( "SbModel" ) ; modelConfig->Print() ; RooDataSet* rds = (RooDataSet*) ws->obj( "ra2b_observed_rds" ) ; rds->Print() ; rds->printMultiline(cout, 1, kTRUE, "") ; RooAbsPdf* likelihood = modelConfig->GetPdf() ; RooRealVar* rrv_mu_susy_sig = ws->var("mu_susy_all0lep") ; if ( rrv_mu_susy_sig == 0x0 ) { printf("\n\n\n *** can't find mu_susy_all0lep in workspace. Quitting.\n\n\n") ; return ; } else { printf(" current value is : %8.3f\n", rrv_mu_susy_sig->getVal() ) ; cout << flush ; rrv_mu_susy_sig->setConstant(kFALSE) ; } /* // check the impact of varying the qcd normalization: RooRealVar *rrv_qcd_0lepLDP_ratioH1 = ws->var("qcd_0lepLDP_ratio_H1"); RooRealVar *rrv_qcd_0lepLDP_ratioH2 = ws->var("qcd_0lepLDP_ratio_H2"); RooRealVar *rrv_qcd_0lepLDP_ratioH3 = ws->var("qcd_0lepLDP_ratio_H3"); rrv_qcd_0lepLDP_ratioH1->setVal(0.3); rrv_qcd_0lepLDP_ratioH2->setVal(0.3); rrv_qcd_0lepLDP_ratioH3->setVal(0.3); rrv_qcd_0lepLDP_ratioH1->setConstant(kTRUE); rrv_qcd_0lepLDP_ratioH2->setConstant(kTRUE); rrv_qcd_0lepLDP_ratioH3->setConstant(kTRUE); */ printf("\n\n\n ===== Doing a fit with SUSY component floating ====================\n\n") ; RooFitResult* fitResult = likelihood->fitTo( *rds, Save(true), PrintLevel(0) ) ; double logLikelihoodSusyFloat = fitResult->minNll() ; double logLikelihoodSusyFixed(0.) ; double testStatVal(-1.) ; if ( mu_susy_sig_val >= 0. ) { printf("\n\n\n ===== Doing a fit with SUSY fixed ====================\n\n") ; printf(" fixing mu_susy_sig to %8.2f.\n", mu_susy_sig_val ) ; rrv_mu_susy_sig->setVal( mu_susy_sig_val ) ; rrv_mu_susy_sig->setConstant(kTRUE) ; fitResult = likelihood->fitTo( *rds, Save(true), PrintLevel(0) ) ; logLikelihoodSusyFixed = fitResult->minNll() ; testStatVal = 2.*(logLikelihoodSusyFixed - logLikelihoodSusyFloat) ; printf("\n\n\n ======= test statistic : -2 * ln (L_fixed / ln L_max) = %8.3f\n\n\n", testStatVal ) ; } return testStatVal ; }