double upper_limit_FC(Model* model,double confidence){ cout<<"///////////////////////////////////////////////////////////////////////////////////////////"<<endl; cout<<"Calculating upper limit with the FC method"<<endl; cout<<"///////////////////////////////////////////////////////////////////////////////////////////"<<endl; RooWorkspace* wspace = new RooWorkspace("wspace"); ModelConfig* modelConfig = new ModelConfig("FC"); modelConfig->SetWorkspace(*wspace); modelConfig->SetPdf(*model->get_complete_likelihood()); modelConfig->SetPriorPdf(*model->get_POI_prior()); modelConfig->SetParametersOfInterest(*model->get_POI_set()); //modelConfig->SetParametersOfInterest(*wspace->set("poi")); //modelConfig->SetNuisanceParameters(*wspace->set("nuis")); // modelConfig->SetNuisanceParameters(); modelConfig->SetNuisanceParameters(*model->get_nuisance_set()); RooDataSet* data = model->get_data(); RooArgSet* poi= model->get_POI_set(); //configure the calculator //model->Print(); cout<<" POI size "<<model->get_POI_set()->getSize()<<endl; // use FeldmaCousins (takes ~20 min) FeldmanCousins fc(*data, *modelConfig); fc.SetConfidenceLevel(0.95); //0.9 central limit=0.95 upper limit //fc.SetTestSize(.1); // set size of test //number counting: dataset always has 1 entry with N events observed fc.FluctuateNumDataEntries(false); fc.UseAdaptiveSampling(true); fc.SetNBins(200); PointSetInterval* fcInt = NULL; //ConfInterval* interval = 0; RooRealVar* firstPOI = (RooRealVar*) modelConfig->GetParametersOfInterest()->first(); // if(doFeldmanCousins){ // takes 7 minutes fcInt = (PointSetInterval*) fc.GetInterval(); // fix cast //xs} //interval = (PointSetInterval*) fc.GetInterval(); cout<<" ["<<fcInt->LowerLimit( *firstPOI ) << ", " << fcInt->UpperLimit( *firstPOI ) << "]" << endl; cout<<" ["<<fcInt->LowerLimit( *firstPOI ) << ", " << fcInt->UpperLimit( *firstPOI ) << "]" << endl; double ul=fcInt->UpperLimit( *firstPOI ); //double ul=interval->UpperLimit( *firstPOI ); return ul; }
// // calculation of the limit: assumes that wspace is set up and observations // contained in data // MyLimit computeLimit (RooWorkspace* wspace, RooDataSet* data, StatMethod method, bool draw) { // let's time this challenging example TStopwatch t; // // get nominal signal // RooRealVar exp_sig(*wspace->var("s")); double exp_sig_val = exp_sig.getVal(); std::cout << "exp_sig = " << exp_sig_val << std::endl; ///////////////////////////////////////////////////// // Now the statistical tests // model config std::cout << wspace->pdf("model") << " " << wspace->pdf("prior") << " " << wspace->set("poi") << " " << wspace->set("nuis") << std::endl; ModelConfig modelConfig("RA4abcd"); modelConfig.SetWorkspace(*wspace); modelConfig.SetPdf(*wspace->pdf("model")); modelConfig.SetPriorPdf(*wspace->pdf("prior")); modelConfig.SetParametersOfInterest(*wspace->set("poi")); modelConfig.SetNuisanceParameters(*wspace->set("nuis")); ////////////////////////////////////////////////// // If you want to see the covariance matrix uncomment // wspace->pdf("model")->fitTo(*data); // use ProfileLikelihood if ( method == ProfileLikelihoodMethod ) { ProfileLikelihoodCalculator plc(*data, modelConfig); plc.SetConfidenceLevel(0.95); RooFit::MsgLevel msglevel = RooMsgService::instance().globalKillBelow(); RooMsgService::instance().setGlobalKillBelow(RooFit::FATAL); LikelihoodInterval* plInt = plc.GetInterval(); RooMsgService::instance().setGlobalKillBelow(RooFit::FATAL); plInt->LowerLimit( *wspace->var("s") ); // get ugly print out of the way. Fix. // RooMsgService::instance().setGlobalKillBelow(RooFit::DEBUG); if ( draw ) { TCanvas* c = new TCanvas("ProfileLikelihood"); LikelihoodIntervalPlot* lrplot = new LikelihoodIntervalPlot(plInt); lrplot->Draw(); } // RooMsgService::instance().setGlobalKillBelow(msglevel); double lowLim = plInt->LowerLimit(*wspace->var("s")); double uppLim = plInt->UpperLimit(*wspace->var("s")); // double exp_sig_val = wspace->var("s")->getVal(); // double exp_sig_val = exp_sig.getVal(); cout << "Profile Likelihood interval on s = [" << lowLim << ", " << uppLim << "]" << " " << exp_sig_val << endl; // MyLimit result(plInt->IsInInterval(exp_sig), MyLimit result(exp_sig_val>lowLim&&exp_sig_val<uppLim,lowLim,uppLim); // std::cout << "isIn " << result << std::endl; delete plInt; // delete modelConfig; return result; } // use FeldmaCousins (takes ~20 min) if ( method == FeldmanCousinsMethod ) { FeldmanCousins fc(*data, modelConfig); fc.SetConfidenceLevel(0.95); //number counting: dataset always has 1 entry with N events observed fc.FluctuateNumDataEntries(false); fc.UseAdaptiveSampling(true); fc.SetNBins(100); PointSetInterval* fcInt = NULL; fcInt = (PointSetInterval*) fc.GetInterval(); // fix cast double lowLim = fcInt->LowerLimit(*wspace->var("s")); double uppLim = fcInt->UpperLimit(*wspace->var("s")); // double exp_sig_val = wspace->var("s")->getVal(); cout << "Feldman Cousins interval on s = [" << lowLim << " " << uppLim << endl; // std::cout << "isIn " << result << std::endl; MyLimit result(exp_sig_val>lowLim&&exp_sig_val<uppLim, fcInt->LowerLimit(*wspace->var("s")),fcInt->UpperLimit(*wspace->var("s"))); delete fcInt; return result; } // use BayesianCalculator (only 1-d parameter of interest, slow for this problem) if ( method == BayesianMethod ) { BayesianCalculator bc(*data, modelConfig); bc.SetConfidenceLevel(0.95); bc.SetLeftSideTailFraction(0.5); SimpleInterval* bInt = NULL; if( wspace->set("poi")->getSize() == 1) { bInt = bc.GetInterval(); if ( draw ) { TCanvas* c = new TCanvas("Bayesian"); // the plot takes a long time and print lots of error // using a scan it is better bc.SetScanOfPosterior(50); RooPlot* bplot = bc.GetPosteriorPlot(); bplot->Draw(); } cout << "Bayesian interval on s = [" << bInt->LowerLimit( ) << ", " << bInt->UpperLimit( ) << "]" << endl; // std::cout << "isIn " << result << std::endl; MyLimit result(bInt->IsInInterval(exp_sig), bInt->LowerLimit(),bInt->UpperLimit()); delete bInt; return result; } else { cout << "Bayesian Calc. only supports on parameter of interest" << endl; return MyLimit(); } } // use MCMCCalculator (takes about 1 min) // Want an efficient proposal function, so derive it from covariance // matrix of fit if ( method == MCMCMethod ) { RooFitResult* fit = wspace->pdf("model")->fitTo(*data,Save()); ProposalHelper ph; ph.SetVariables((RooArgSet&)fit->floatParsFinal()); ph.SetCovMatrix(fit->covarianceMatrix()); ph.SetUpdateProposalParameters(kTRUE); // auto-create mean vars and add mappings ph.SetCacheSize(100); ProposalFunction* pf = ph.GetProposalFunction(); MCMCCalculator mc(*data, modelConfig); mc.SetConfidenceLevel(0.95); mc.SetProposalFunction(*pf); mc.SetNumBurnInSteps(100); // first N steps to be ignored as burn-in mc.SetNumIters(100000); mc.SetLeftSideTailFraction(0.5); // make a central interval MCMCInterval* mcInt = NULL; mcInt = mc.GetInterval(); MCMCIntervalPlot mcPlot(*mcInt); mcPlot.Draw(); cout << "MCMC interval on s = [" << mcInt->LowerLimit(*wspace->var("s") ) << ", " << mcInt->UpperLimit(*wspace->var("s") ) << "]" << endl; // std::cout << "isIn " << result << std::endl; MyLimit result(mcInt->IsInInterval(exp_sig), mcInt->LowerLimit(*wspace->var("s")),mcInt->UpperLimit(*wspace->var("s"))); delete mcInt; return result; } t.Print(); // delete modelConfig; return MyLimit(); }
void StandardFeldmanCousinsDemo(const char* infile = "", const char* workspaceName = "combined", const char* modelConfigName = "ModelConfig", const char* dataName = "obsData"){ // ------------------------------------------------------- // First part is just to access a user-defined file // or create the standard example file if it doesn't exist const char* filename = ""; if (!strcmp(infile,"")) { filename = "results/example_combined_GaussExample_model.root"; bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code // if file does not exists generate with histfactory if (!fileExist) { #ifdef _WIN32 cout << "HistFactory file cannot be generated on Windows - exit" << endl; return; #endif // Normally this would be run on the command line cout <<"will run standard hist2workspace example"<<endl; gROOT->ProcessLine(".! prepareHistFactory ."); gROOT->ProcessLine(".! hist2workspace config/example.xml"); cout <<"\n\n---------------------"<<endl; cout <<"Done creating example input"<<endl; cout <<"---------------------\n\n"<<endl; } } else filename = infile; // Try to open the file TFile *file = TFile::Open(filename); // if input file was specified byt not found, quit if(!file ){ cout <<"StandardRooStatsDemoMacro: Input file " << filename << " is not found" << endl; return; } // ------------------------------------------------------- // Tutorial starts here // ------------------------------------------------------- // get the workspace out of the file RooWorkspace* w = (RooWorkspace*) file->Get(workspaceName); if(!w){ cout <<"workspace not found" << endl; return; } // get the modelConfig out of the file ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName); // get the modelConfig out of the file RooAbsData* data = w->data(dataName); // make sure ingredients are found if(!data || !mc){ w->Print(); cout << "data or ModelConfig was not found" <<endl; return; } // ------------------------------------------------------- // create and use the FeldmanCousins tool // to find and plot the 95% confidence interval // on the parameter of interest as specified // in the model config FeldmanCousins fc(*data,*mc); fc.SetConfidenceLevel(0.95); // 95% interval //fc.AdditionalNToysFactor(0.1); // to speed up the result fc.UseAdaptiveSampling(true); // speed it up a bit fc.SetNBins(10); // set how many points per parameter of interest to scan fc.CreateConfBelt(true); // save the information in the belt for plotting // Since this tool needs to throw toy MC the PDF needs to be // extended or the tool needs to know how many entries in a dataset // per pseudo experiment. // In the 'number counting form' where the entries in the dataset // are counts, and not values of discriminating variables, the // datasets typically only have one entry and the PDF is not // extended. if(!mc->GetPdf()->canBeExtended()){ if(data->numEntries()==1) fc.FluctuateNumDataEntries(false); else cout <<"Not sure what to do about this model" <<endl; } // We can use PROOF to speed things along in parallel // ProofConfig pc(*w, 1, "workers=4", kFALSE); // ToyMCSampler* toymcsampler = (ToyMCSampler*) fc.GetTestStatSampler(); // toymcsampler->SetProofConfig(&pc); // enable proof // Now get the interval PointSetInterval* interval = fc.GetInterval(); ConfidenceBelt* belt = fc.GetConfidenceBelt(); // print out the iterval on the first Parameter of Interest RooRealVar* firstPOI = (RooRealVar*) mc->GetParametersOfInterest()->first(); cout << "\n95% interval on " <<firstPOI->GetName()<<" is : ["<< interval->LowerLimit(*firstPOI) << ", "<< interval->UpperLimit(*firstPOI) <<"] "<<endl; // --------------------------------------------- // No nice plots yet, so plot the belt by hand // Ask the calculator which points were scanned RooDataSet* parameterScan = (RooDataSet*) fc.GetPointsToScan(); RooArgSet* tmpPoint; // make a histogram of parameter vs. threshold TH1F* histOfThresholds = new TH1F("histOfThresholds","", parameterScan->numEntries(), firstPOI->getMin(), firstPOI->getMax()); // loop through the points that were tested and ask confidence belt // what the upper/lower thresholds were. // For FeldmanCousins, the lower cut off is always 0 for(Int_t i=0; i<parameterScan->numEntries(); ++i){ tmpPoint = (RooArgSet*) parameterScan->get(i)->clone("temp"); double arMax = belt->GetAcceptanceRegionMax(*tmpPoint); double arMin = belt->GetAcceptanceRegionMax(*tmpPoint); double poiVal = tmpPoint->getRealValue(firstPOI->GetName()) ; histOfThresholds->Fill(poiVal,arMax); } histOfThresholds->SetMinimum(0); histOfThresholds->Draw(); }
void OneSidedFrequentistUpperLimitWithBands(const char* infile = "", const char* workspaceName = "combined", const char* modelConfigName = "ModelConfig", const char* dataName = "obsData") { double confidenceLevel=0.95; int nPointsToScan = 20; int nToyMC = 200; // ------------------------------------------------------- // First part is just to access a user-defined file // or create the standard example file if it doesn't exist const char* filename = ""; if (!strcmp(infile,"")) { filename = "results/example_combined_GaussExample_model.root"; bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code // if file does not exists generate with histfactory if (!fileExist) { #ifdef _WIN32 cout << "HistFactory file cannot be generated on Windows - exit" << endl; return; #endif // Normally this would be run on the command line cout <<"will run standard hist2workspace example"<<endl; gROOT->ProcessLine(".! prepareHistFactory ."); gROOT->ProcessLine(".! hist2workspace config/example.xml"); cout <<"\n\n---------------------"<<endl; cout <<"Done creating example input"<<endl; cout <<"---------------------\n\n"<<endl; } } else filename = infile; // Try to open the file TFile *file = TFile::Open(filename); // if input file was specified byt not found, quit if(!file ){ cout <<"StandardRooStatsDemoMacro: Input file " << filename << " is not found" << endl; return; } // ------------------------------------------------------- // Now get the data and workspace // get the workspace out of the file RooWorkspace* w = (RooWorkspace*) file->Get(workspaceName); if(!w){ cout <<"workspace not found" << endl; return; } // get the modelConfig out of the file ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName); // get the modelConfig out of the file RooAbsData* data = w->data(dataName); // make sure ingredients are found if(!data || !mc){ w->Print(); cout << "data or ModelConfig was not found" <<endl; return; } // ------------------------------------------------------- // Now get the POI for convenience // you may want to adjust the range of your POI RooRealVar* firstPOI = (RooRealVar*) mc->GetParametersOfInterest()->first(); /* firstPOI->setMin(0);*/ /* firstPOI->setMax(10);*/ // -------------------------------------------- // Create and use the FeldmanCousins tool // to find and plot the 95% confidence interval // on the parameter of interest as specified // in the model config // REMEMBER, we will change the test statistic // so this is NOT a Feldman-Cousins interval FeldmanCousins fc(*data,*mc); fc.SetConfidenceLevel(confidenceLevel); /* fc.AdditionalNToysFactor(0.25); // degrade/improve sampling that defines confidence belt*/ /* fc.UseAdaptiveSampling(true); // speed it up a bit, don't use for expected limits*/ fc.SetNBins(nPointsToScan); // set how many points per parameter of interest to scan fc.CreateConfBelt(true); // save the information in the belt for plotting // ------------------------------------------------------- // Feldman-Cousins is a unified limit by definition // but the tool takes care of a few things for us like which values // of the nuisance parameters should be used to generate toys. // so let's just change the test statistic and realize this is // no longer "Feldman-Cousins" but is a fully frequentist Neyman-Construction. /* ProfileLikelihoodTestStatModified onesided(*mc->GetPdf());*/ /* fc.GetTestStatSampler()->SetTestStatistic(&onesided);*/ /* ((ToyMCSampler*) fc.GetTestStatSampler())->SetGenerateBinned(true); */ ToyMCSampler* toymcsampler = (ToyMCSampler*) fc.GetTestStatSampler(); ProfileLikelihoodTestStat* testStat = dynamic_cast<ProfileLikelihoodTestStat*>(toymcsampler->GetTestStatistic()); testStat->SetOneSided(true); // Since this tool needs to throw toy MC the PDF needs to be // extended or the tool needs to know how many entries in a dataset // per pseudo experiment. // In the 'number counting form' where the entries in the dataset // are counts, and not values of discriminating variables, the // datasets typically only have one entry and the PDF is not // extended. if(!mc->GetPdf()->canBeExtended()){ if(data->numEntries()==1) fc.FluctuateNumDataEntries(false); else cout <<"Not sure what to do about this model" <<endl; } // We can use PROOF to speed things along in parallel // However, the test statistic has to be installed on the workers // so either turn off PROOF or include the modified test statistic // in your `$ROOTSYS/roofit/roostats/inc` directory, // add the additional line to the LinkDef.h file, // and recompile root. if (useProof) { ProofConfig pc(*w, nworkers, "", false); toymcsampler->SetProofConfig(&pc); // enable proof } if(mc->GetGlobalObservables()){ cout << "will use global observables for unconditional ensemble"<<endl; mc->GetGlobalObservables()->Print(); toymcsampler->SetGlobalObservables(*mc->GetGlobalObservables()); } // Now get the interval PointSetInterval* interval = fc.GetInterval(); ConfidenceBelt* belt = fc.GetConfidenceBelt(); // print out the interval on the first Parameter of Interest cout << "\n95% interval on " <<firstPOI->GetName()<<" is : ["<< interval->LowerLimit(*firstPOI) << ", "<< interval->UpperLimit(*firstPOI) <<"] "<<endl; // get observed UL and value of test statistic evaluated there RooArgSet tmpPOI(*firstPOI); double observedUL = interval->UpperLimit(*firstPOI); firstPOI->setVal(observedUL); double obsTSatObsUL = fc.GetTestStatSampler()->EvaluateTestStatistic(*data,tmpPOI); // Ask the calculator which points were scanned RooDataSet* parameterScan = (RooDataSet*) fc.GetPointsToScan(); RooArgSet* tmpPoint; // make a histogram of parameter vs. threshold TH1F* histOfThresholds = new TH1F("histOfThresholds","", parameterScan->numEntries(), firstPOI->getMin(), firstPOI->getMax()); histOfThresholds->GetXaxis()->SetTitle(firstPOI->GetName()); histOfThresholds->GetYaxis()->SetTitle("Threshold"); // loop through the points that were tested and ask confidence belt // what the upper/lower thresholds were. // For FeldmanCousins, the lower cut off is always 0 for(Int_t i=0; i<parameterScan->numEntries(); ++i){ tmpPoint = (RooArgSet*) parameterScan->get(i)->clone("temp"); //cout <<"get threshold"<<endl; double arMax = belt->GetAcceptanceRegionMax(*tmpPoint); double poiVal = tmpPoint->getRealValue(firstPOI->GetName()) ; histOfThresholds->Fill(poiVal,arMax); } TCanvas* c1 = new TCanvas(); c1->Divide(2); c1->cd(1); histOfThresholds->SetMinimum(0); histOfThresholds->Draw(); c1->cd(2); // ------------------------------------------------------- // Now we generate the expected bands and power-constraint // First: find parameter point for mu=0, with conditional MLEs for nuisance parameters RooAbsReal* nll = mc->GetPdf()->createNLL(*data); RooAbsReal* profile = nll->createProfile(*mc->GetParametersOfInterest()); firstPOI->setVal(0.); profile->getVal(); // this will do fit and set nuisance parameters to profiled values RooArgSet* poiAndNuisance = new RooArgSet(); if(mc->GetNuisanceParameters()) poiAndNuisance->add(*mc->GetNuisanceParameters()); poiAndNuisance->add(*mc->GetParametersOfInterest()); w->saveSnapshot("paramsToGenerateData",*poiAndNuisance); RooArgSet* paramsToGenerateData = (RooArgSet*) poiAndNuisance->snapshot(); cout << "\nWill use these parameter points to generate pseudo data for bkg only" << endl; paramsToGenerateData->Print("v"); RooArgSet unconditionalObs; unconditionalObs.add(*mc->GetObservables()); unconditionalObs.add(*mc->GetGlobalObservables()); // comment this out for the original conditional ensemble double CLb=0; double CLbinclusive=0; // Now we generate background only and find distribution of upper limits TH1F* histOfUL = new TH1F("histOfUL","",100,0,firstPOI->getMax()); histOfUL->GetXaxis()->SetTitle("Upper Limit (background only)"); histOfUL->GetYaxis()->SetTitle("Entries"); for(int imc=0; imc<nToyMC; ++imc){ // set parameters back to values for generating pseudo data // cout << "\n get current nuis, set vals, print again" << endl; w->loadSnapshot("paramsToGenerateData"); // poiAndNuisance->Print("v"); RooDataSet* toyData = 0; // now generate a toy dataset if(!mc->GetPdf()->canBeExtended()){ if(data->numEntries()==1) toyData = mc->GetPdf()->generate(*mc->GetObservables(),1); else cout <<"Not sure what to do about this model" <<endl; } else{ // cout << "generating extended dataset"<<endl; toyData = mc->GetPdf()->generate(*mc->GetObservables(),Extended()); } // generate global observables // need to be careful for simpdf // RooDataSet* globalData = mc->GetPdf()->generate(*mc->GetGlobalObservables(),1); RooSimultaneous* simPdf = dynamic_cast<RooSimultaneous*>(mc->GetPdf()); if(!simPdf){ RooDataSet *one = mc->GetPdf()->generate(*mc->GetGlobalObservables(), 1); const RooArgSet *values = one->get(); RooArgSet *allVars = mc->GetPdf()->getVariables(); *allVars = *values; delete allVars; delete values; delete one; } else { //try fix for sim pdf TIterator* iter = simPdf->indexCat().typeIterator() ; RooCatType* tt = NULL; while((tt=(RooCatType*) iter->Next())) { // Get pdf associated with state from simpdf RooAbsPdf* pdftmp = simPdf->getPdf(tt->GetName()) ; // Generate only global variables defined by the pdf associated with this state RooArgSet* globtmp = pdftmp->getObservables(*mc->GetGlobalObservables()) ; RooDataSet* tmp = pdftmp->generate(*globtmp,1) ; // Transfer values to output placeholder *globtmp = *tmp->get(0) ; // Cleanup delete globtmp ; delete tmp ; } } // globalData->Print("v"); // unconditionalObs = *globalData->get(); // mc->GetGlobalObservables()->Print("v"); // delete globalData; // cout << "toy data = " << endl; // toyData->get()->Print("v"); // get test stat at observed UL in observed data firstPOI->setVal(observedUL); double toyTSatObsUL = fc.GetTestStatSampler()->EvaluateTestStatistic(*toyData,tmpPOI); // toyData->get()->Print("v"); // cout <<"obsTSatObsUL " <<obsTSatObsUL << "toyTS " << toyTSatObsUL << endl; if(obsTSatObsUL < toyTSatObsUL) // not sure about <= part yet CLb+= (1.)/nToyMC; if(obsTSatObsUL <= toyTSatObsUL) // not sure about <= part yet CLbinclusive+= (1.)/nToyMC; // loop over points in belt to find upper limit for this toy data double thisUL = 0; for(Int_t i=0; i<parameterScan->numEntries(); ++i){ tmpPoint = (RooArgSet*) parameterScan->get(i)->clone("temp"); double arMax = belt->GetAcceptanceRegionMax(*tmpPoint); firstPOI->setVal( tmpPoint->getRealValue(firstPOI->GetName()) ); // double thisTS = profile->getVal(); double thisTS = fc.GetTestStatSampler()->EvaluateTestStatistic(*toyData,tmpPOI); // cout << "poi = " << firstPOI->getVal() // << " max is " << arMax << " this profile = " << thisTS << endl; // cout << "thisTS = " << thisTS<<endl; if(thisTS<=arMax){ thisUL = firstPOI->getVal(); } else{ break; } } /* // loop over points in belt to find upper limit for this toy data double thisUL = 0; for(Int_t i=0; i<histOfThresholds->GetNbinsX(); ++i){ tmpPoint = (RooArgSet*) parameterScan->get(i)->clone("temp"); cout <<"---------------- "<<i<<endl; tmpPoint->Print("v"); cout << "from hist " << histOfThresholds->GetBinCenter(i+1) <<endl; double arMax = histOfThresholds->GetBinContent(i+1); // cout << " threhold from Hist = aMax " << arMax<<endl; // double arMax2 = belt->GetAcceptanceRegionMax(*tmpPoint); // cout << "from scan arMax2 = "<< arMax2 << endl; // not the same due to TH1F not TH1D // cout << "scan - hist" << arMax2-arMax << endl; firstPOI->setVal( histOfThresholds->GetBinCenter(i+1)); // double thisTS = profile->getVal(); double thisTS = fc.GetTestStatSampler()->EvaluateTestStatistic(*toyData,tmpPOI); // cout << "poi = " << firstPOI->getVal() // << " max is " << arMax << " this profile = " << thisTS << endl; // cout << "thisTS = " << thisTS<<endl; // NOTE: need to add a small epsilon term for single precision vs. double precision if(thisTS<=arMax + 1e-7){ thisUL = firstPOI->getVal(); } else{ break; } } */ histOfUL->Fill(thisUL); // for few events, data is often the same, and UL is often the same // cout << "thisUL = " << thisUL<<endl; delete toyData; } histOfUL->Draw(); c1->SaveAs("one-sided_upper_limit_output.pdf"); // if you want to see a plot of the sampling distribution for a particular scan point: /* SamplingDistPlot sampPlot; int indexInScan = 0; tmpPoint = (RooArgSet*) parameterScan->get(indexInScan)->clone("temp"); firstPOI->setVal( tmpPoint->getRealValue(firstPOI->GetName()) ); toymcsampler->SetParametersForTestStat(tmpPOI); SamplingDistribution* samp = toymcsampler->GetSamplingDistribution(*tmpPoint); sampPlot.AddSamplingDistribution(samp); sampPlot.Draw(); */ // Now find bands and power constraint Double_t* bins = histOfUL->GetIntegral(); TH1F* cumulative = (TH1F*) histOfUL->Clone("cumulative"); cumulative->SetContent(bins); double band2sigDown, band1sigDown, bandMedian, band1sigUp,band2sigUp; for(int i=1; i<=cumulative->GetNbinsX(); ++i){ if(bins[i]<RooStats::SignificanceToPValue(2)) band2sigDown=cumulative->GetBinCenter(i); if(bins[i]<RooStats::SignificanceToPValue(1)) band1sigDown=cumulative->GetBinCenter(i); if(bins[i]<0.5) bandMedian=cumulative->GetBinCenter(i); if(bins[i]<RooStats::SignificanceToPValue(-1)) band1sigUp=cumulative->GetBinCenter(i); if(bins[i]<RooStats::SignificanceToPValue(-2)) band2sigUp=cumulative->GetBinCenter(i); } cout << "-2 sigma band " << band2sigDown << endl; cout << "-1 sigma band " << band1sigDown << " [Power Constraint)]" << endl; cout << "median of band " << bandMedian << endl; cout << "+1 sigma band " << band1sigUp << endl; cout << "+2 sigma band " << band2sigUp << endl; // print out the interval on the first Parameter of Interest cout << "\nobserved 95% upper-limit "<< interval->UpperLimit(*firstPOI) <<endl; cout << "CLb strict [P(toy>obs|0)] for observed 95% upper-limit "<< CLb <<endl; cout << "CLb inclusive [P(toy>=obs|0)] for observed 95% upper-limit "<< CLbinclusive <<endl; delete profile; delete nll; }
void IntervalExamples() { // Time this macro TStopwatch t; t.Start(); // set RooFit random seed for reproducible results RooRandom::randomGenerator()->SetSeed(3001); // make a simple model via the workspace factory RooWorkspace* wspace = new RooWorkspace(); wspace->factory("Gaussian::normal(x[-10,10],mu[-1,1],sigma[1])"); wspace->defineSet("poi","mu"); wspace->defineSet("obs","x"); // specify components of model for statistical tools ModelConfig* modelConfig = new ModelConfig("Example G(x|mu,1)"); modelConfig->SetWorkspace(*wspace); modelConfig->SetPdf( *wspace->pdf("normal") ); modelConfig->SetParametersOfInterest( *wspace->set("poi") ); modelConfig->SetObservables( *wspace->set("obs") ); // create a toy dataset RooDataSet* data = wspace->pdf("normal")->generate(*wspace->set("obs"),100); data->Print(); // for convenience later on RooRealVar* x = wspace->var("x"); RooRealVar* mu = wspace->var("mu"); // set confidence level double confidenceLevel = 0.95; // example use profile likelihood calculator ProfileLikelihoodCalculator plc(*data, *modelConfig); plc.SetConfidenceLevel( confidenceLevel); LikelihoodInterval* plInt = plc.GetInterval(); // example use of Feldman-Cousins FeldmanCousins fc(*data, *modelConfig); fc.SetConfidenceLevel( confidenceLevel); fc.SetNBins(100); // number of points to test per parameter fc.UseAdaptiveSampling(true); // make it go faster // Here, we consider only ensembles with 100 events // The PDF could be extended and this could be removed fc.FluctuateNumDataEntries(false); // Proof // ProofConfig pc(*wspace, 4, "workers=4", kFALSE); // proof-lite //ProofConfig pc(w, 8, "localhost"); // proof cluster at "localhost" // ToyMCSampler* toymcsampler = (ToyMCSampler*) fc.GetTestStatSampler(); // toymcsampler->SetProofConfig(&pc); // enable proof PointSetInterval* interval = (PointSetInterval*) fc.GetInterval(); // example use of BayesianCalculator // now we also need to specify a prior in the ModelConfig wspace->factory("Uniform::prior(mu)"); modelConfig->SetPriorPdf(*wspace->pdf("prior")); // example usage of BayesianCalculator BayesianCalculator bc(*data, *modelConfig); bc.SetConfidenceLevel( confidenceLevel); SimpleInterval* bcInt = bc.GetInterval(); // example use of MCMCInterval MCMCCalculator mc(*data, *modelConfig); mc.SetConfidenceLevel( confidenceLevel); // special options mc.SetNumBins(200); // bins used internally for representing posterior mc.SetNumBurnInSteps(500); // first N steps to be ignored as burn-in mc.SetNumIters(100000); // how long to run chain mc.SetLeftSideTailFraction(0.5); // for central interval MCMCInterval* mcInt = mc.GetInterval(); // for this example we know the expected intervals double expectedLL = data->mean(*x) + ROOT::Math::normal_quantile( (1-confidenceLevel)/2,1) / sqrt(data->numEntries()); double expectedUL = data->mean(*x) + ROOT::Math::normal_quantile_c((1-confidenceLevel)/2,1) / sqrt(data->numEntries()) ; // Use the intervals std::cout << "expected interval is [" << expectedLL << ", " << expectedUL << "]" << endl; cout << "plc interval is [" << plInt->LowerLimit(*mu) << ", " << plInt->UpperLimit(*mu) << "]" << endl; std::cout << "fc interval is ["<< interval->LowerLimit(*mu) << " , " << interval->UpperLimit(*mu) << "]" << endl; cout << "bc interval is [" << bcInt->LowerLimit() << ", " << bcInt->UpperLimit() << "]" << endl; cout << "mc interval is [" << mcInt->LowerLimit(*mu) << ", " << mcInt->UpperLimit(*mu) << "]" << endl; mu->setVal(0); cout << "is mu=0 in the interval? " << plInt->IsInInterval(RooArgSet(*mu)) << endl; // make a reasonable style gStyle->SetCanvasColor(0); gStyle->SetCanvasBorderMode(0); gStyle->SetPadBorderMode(0); gStyle->SetPadColor(0); gStyle->SetCanvasColor(0); gStyle->SetTitleFillColor(0); gStyle->SetFillColor(0); gStyle->SetFrameFillColor(0); gStyle->SetStatColor(0); // some plots TCanvas* canvas = new TCanvas("canvas"); canvas->Divide(2,2); // plot the data canvas->cd(1); RooPlot* frame = x->frame(); data->plotOn(frame); data->statOn(frame); frame->Draw(); // plot the profile likelihood canvas->cd(2); LikelihoodIntervalPlot plot(plInt); plot.Draw(); // plot the MCMC interval canvas->cd(3); MCMCIntervalPlot* mcPlot = new MCMCIntervalPlot(*mcInt); mcPlot->SetLineColor(kGreen); mcPlot->SetLineWidth(2); mcPlot->Draw(); canvas->cd(4); RooPlot * bcPlot = bc.GetPosteriorPlot(); bcPlot->Draw(); canvas->Update(); t.Stop(); t.Print(); }
void OneSidedFrequentistUpperLimitWithBands_intermediate(const char* infile = "", const char* workspaceName = "combined", const char* modelConfigName = "ModelConfig", const char* dataName = "obsData"){ double confidenceLevel=0.95; // degrade/improve number of pseudo-experiments used to define the confidence belt. // value of 1 corresponds to default number of toys in the tail, which is 50/(1-confidenceLevel) double additionalToysFac = 1.; int nPointsToScan = 30; // number of steps in the parameter of interest int nToyMC = 100; // number of toys used to define the expected limit and band TStopwatch t; t.Start(); ///////////////////////////////////////////////////////////// // First part is just to access a user-defined file // or create the standard example file if it doesn't exist //////////////////////////////////////////////////////////// const char* filename = ""; if (!strcmp(infile,"")) filename = "results/example_combined_GaussExample_model.root"; else filename = infile; // Check if example input file exists TFile *file = TFile::Open(filename); // if input file was specified byt not found, quit if(!file && strcmp(infile,"")){ cout <<"file not found" << endl; return; } // if default file not found, try to create it if(!file ){ // Normally this would be run on the command line cout <<"will run standard hist2workspace example"<<endl; gROOT->ProcessLine(".! prepareHistFactory ."); gROOT->ProcessLine(".! hist2workspace config/example.xml"); cout <<"\n\n---------------------"<<endl; cout <<"Done creating example input"<<endl; cout <<"---------------------\n\n"<<endl; } // now try to access the file again file = TFile::Open(filename); if(!file){ // if it is still not there, then we can't continue cout << "Not able to run hist2workspace to create example input" <<endl; return; } ///////////////////////////////////////////////////////////// // Now get the data and workspace //////////////////////////////////////////////////////////// // get the workspace out of the file RooWorkspace* w = (RooWorkspace*) file->Get(workspaceName); if(!w){ cout <<"workspace not found" << endl; return; } // get the modelConfig out of the file ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName); // get the modelConfig out of the file RooAbsData* data = w->data(dataName); // make sure ingredients are found if(!data || !mc){ w->Print(); cout << "data or ModelConfig was not found" <<endl; return; } cout << "Found data and ModelConfig:" <<endl; mc->Print(); ///////////////////////////////////////////////////////////// // Now get the POI for convenience // you may want to adjust the range of your POI //////////////////////////////////////////////////////////// RooRealVar* firstPOI = (RooRealVar*) mc->GetParametersOfInterest()->first(); // firstPOI->setMin(0); // firstPOI->setMax(10); ///////////////////////////////////////////// // create and use the FeldmanCousins tool // to find and plot the 95% confidence interval // on the parameter of interest as specified // in the model config // REMEMBER, we will change the test statistic // so this is NOT a Feldman-Cousins interval FeldmanCousins fc(*data,*mc); fc.SetConfidenceLevel(confidenceLevel); fc.AdditionalNToysFactor(additionalToysFac); // improve sampling that defines confidence belt // fc.UseAdaptiveSampling(true); // speed it up a bit, but don't use for expectd limits fc.SetNBins(nPointsToScan); // set how many points per parameter of interest to scan fc.CreateConfBelt(true); // save the information in the belt for plotting ///////////////////////////////////////////// // Feldman-Cousins is a unified limit by definition // but the tool takes care of a few things for us like which values // of the nuisance parameters should be used to generate toys. // so let's just change the test statistic and realize this is // no longer "Feldman-Cousins" but is a fully frequentist Neyman-Construction. // ProfileLikelihoodTestStatModified onesided(*mc->GetPdf()); // fc.GetTestStatSampler()->SetTestStatistic(&onesided); // ((ToyMCSampler*) fc.GetTestStatSampler())->SetGenerateBinned(true); ToyMCSampler* toymcsampler = (ToyMCSampler*) fc.GetTestStatSampler(); ProfileLikelihoodTestStat* testStat = dynamic_cast<ProfileLikelihoodTestStat*>(toymcsampler->GetTestStatistic()); testStat->SetOneSided(true); // test speedups: testStat->SetReuseNLL(true); // toymcsampler->setUseMultiGen(true); // not fully validated // Since this tool needs to throw toy MC the PDF needs to be // extended or the tool needs to know how many entries in a dataset // per pseudo experiment. // In the 'number counting form' where the entries in the dataset // are counts, and not values of discriminating variables, the // datasets typically only have one entry and the PDF is not // extended. if(!mc->GetPdf()->canBeExtended()){ if(data->numEntries()==1) fc.FluctuateNumDataEntries(false); else cout <<"Not sure what to do about this model" <<endl; } // We can use PROOF to speed things along in parallel ProofConfig pc(*w, 4, "",false); if(mc->GetGlobalObservables()){ cout << "will use global observables for unconditional ensemble"<<endl; mc->GetGlobalObservables()->Print(); toymcsampler->SetGlobalObservables(*mc->GetGlobalObservables()); } toymcsampler->SetProofConfig(&pc); // enable proof // Now get the interval PointSetInterval* interval = fc.GetInterval(); ConfidenceBelt* belt = fc.GetConfidenceBelt(); // print out the iterval on the first Parameter of Interest cout << "\n95% interval on " <<firstPOI->GetName()<<" is : ["<< interval->LowerLimit(*firstPOI) << ", "<< interval->UpperLimit(*firstPOI) <<"] "<<endl; // get observed UL and value of test statistic evaluated there RooArgSet tmpPOI(*firstPOI); double observedUL = interval->UpperLimit(*firstPOI); firstPOI->setVal(observedUL); double obsTSatObsUL = fc.GetTestStatSampler()->EvaluateTestStatistic(*data,tmpPOI); // Ask the calculator which points were scanned RooDataSet* parameterScan = (RooDataSet*) fc.GetPointsToScan(); RooArgSet* tmpPoint; // make a histogram of parameter vs. threshold TH1F* histOfThresholds = new TH1F("histOfThresholds","", parameterScan->numEntries(), firstPOI->getMin(), firstPOI->getMax()); histOfThresholds->GetXaxis()->SetTitle(firstPOI->GetName()); histOfThresholds->GetYaxis()->SetTitle("Threshold"); // loop through the points that were tested and ask confidence belt // what the upper/lower thresholds were. // For FeldmanCousins, the lower cut off is always 0 for(Int_t i=0; i<parameterScan->numEntries(); ++i){ tmpPoint = (RooArgSet*) parameterScan->get(i)->clone("temp"); double arMax = belt->GetAcceptanceRegionMax(*tmpPoint); double poiVal = tmpPoint->getRealValue(firstPOI->GetName()) ; histOfThresholds->Fill(poiVal,arMax); } TCanvas* c1 = new TCanvas(); c1->Divide(2); c1->cd(1); histOfThresholds->SetMinimum(0); histOfThresholds->Draw(); c1->cd(2); ///////////////////////////////////////////////////////////// // Now we generate the expected bands and power-constriant //////////////////////////////////////////////////////////// // First: find parameter point for mu=0, with conditional MLEs for nuisance parameters RooAbsReal* nll = mc->GetPdf()->createNLL(*data); RooAbsReal* profile = nll->createProfile(*mc->GetParametersOfInterest()); firstPOI->setVal(0.); profile->getVal(); // this will do fit and set nuisance parameters to profiled values RooArgSet* poiAndNuisance = new RooArgSet(); if(mc->GetNuisanceParameters()) poiAndNuisance->add(*mc->GetNuisanceParameters()); poiAndNuisance->add(*mc->GetParametersOfInterest()); w->saveSnapshot("paramsToGenerateData",*poiAndNuisance); RooArgSet* paramsToGenerateData = (RooArgSet*) poiAndNuisance->snapshot(); cout << "\nWill use these parameter points to generate pseudo data for bkg only" << endl; paramsToGenerateData->Print("v"); double CLb=0; double CLbinclusive=0; // Now we generate background only and find distribution of upper limits TH1F* histOfUL = new TH1F("histOfUL","",100,0,firstPOI->getMax()); histOfUL->GetXaxis()->SetTitle("Upper Limit (background only)"); histOfUL->GetYaxis()->SetTitle("Entries"); for(int imc=0; imc<nToyMC; ++imc){ // set parameters back to values for generating pseudo data w->loadSnapshot("paramsToGenerateData"); // in 5.30 there is a nicer way to generate toy data & randomize global obs RooAbsData* toyData = toymcsampler->GenerateToyData(*paramsToGenerateData); // get test stat at observed UL in observed data firstPOI->setVal(observedUL); double toyTSatObsUL = fc.GetTestStatSampler()->EvaluateTestStatistic(*toyData,tmpPOI); // toyData->get()->Print("v"); // cout <<"obsTSatObsUL " <<obsTSatObsUL << "toyTS " << toyTSatObsUL << endl; if(obsTSatObsUL < toyTSatObsUL) // (should be checked) CLb+= (1.)/nToyMC; if(obsTSatObsUL <= toyTSatObsUL) // (should be checked) CLbinclusive+= (1.)/nToyMC; // loop over points in belt to find upper limit for this toy data double thisUL = 0; for(Int_t i=0; i<parameterScan->numEntries(); ++i){ tmpPoint = (RooArgSet*) parameterScan->get(i)->clone("temp"); double arMax = belt->GetAcceptanceRegionMax(*tmpPoint); firstPOI->setVal( tmpPoint->getRealValue(firstPOI->GetName()) ); double thisTS = fc.GetTestStatSampler()->EvaluateTestStatistic(*toyData,tmpPOI); if(thisTS<=arMax){ thisUL = firstPOI->getVal(); } else{ break; } } histOfUL->Fill(thisUL); delete toyData; } histOfUL->Draw(); c1->SaveAs("one-sided_upper_limit_output.pdf"); // if you want to see a plot of the sampling distribution for a particular scan point: // Now find bands and power constraint Double_t* bins = histOfUL->GetIntegral(); TH1F* cumulative = (TH1F*) histOfUL->Clone("cumulative"); cumulative->SetContent(bins); double band2sigDown=0, band1sigDown=0, bandMedian=0, band1sigUp=0,band2sigUp=0; for(int i=1; i<=cumulative->GetNbinsX(); ++i){ if(bins[i]<RooStats::SignificanceToPValue(2)) band2sigDown=cumulative->GetBinCenter(i); if(bins[i]<RooStats::SignificanceToPValue(1)) band1sigDown=cumulative->GetBinCenter(i); if(bins[i]<0.5) bandMedian=cumulative->GetBinCenter(i); if(bins[i]<RooStats::SignificanceToPValue(-1)) band1sigUp=cumulative->GetBinCenter(i); if(bins[i]<RooStats::SignificanceToPValue(-2)) band2sigUp=cumulative->GetBinCenter(i); } t.Stop(); t.Print(); cout << "-2 sigma band " << band2sigDown << endl; cout << "-1 sigma band " << band1sigDown << endl; cout << "median of band " << bandMedian << " [Power Constriant)]" << endl; cout << "+1 sigma band " << band1sigUp << endl; cout << "+2 sigma band " << band2sigUp << endl; // print out the iterval on the first Parameter of Interest cout << "\nobserved 95% upper-limit "<< interval->UpperLimit(*firstPOI) <<endl; cout << "CLb strict [P(toy>obs|0)] for observed 95% upper-limit "<< CLb <<endl; cout << "CLb inclusive [P(toy>=obs|0)] for observed 95% upper-limit "<< CLbinclusive <<endl; delete profile; delete nll; }