void IntervalExamples() { // Time this macro TStopwatch t; t.Start(); // set RooFit random seed for reproducible results RooRandom::randomGenerator()->SetSeed(3001); // make a simple model via the workspace factory RooWorkspace* wspace = new RooWorkspace(); wspace->factory("Gaussian::normal(x[-10,10],mu[-1,1],sigma[1])"); wspace->defineSet("poi","mu"); wspace->defineSet("obs","x"); // specify components of model for statistical tools ModelConfig* modelConfig = new ModelConfig("Example G(x|mu,1)"); modelConfig->SetWorkspace(*wspace); modelConfig->SetPdf( *wspace->pdf("normal") ); modelConfig->SetParametersOfInterest( *wspace->set("poi") ); modelConfig->SetObservables( *wspace->set("obs") ); // create a toy dataset RooDataSet* data = wspace->pdf("normal")->generate(*wspace->set("obs"),100); data->Print(); // for convenience later on RooRealVar* x = wspace->var("x"); RooRealVar* mu = wspace->var("mu"); // set confidence level double confidenceLevel = 0.95; // example use profile likelihood calculator ProfileLikelihoodCalculator plc(*data, *modelConfig); plc.SetConfidenceLevel( confidenceLevel); LikelihoodInterval* plInt = plc.GetInterval(); // example use of Feldman-Cousins FeldmanCousins fc(*data, *modelConfig); fc.SetConfidenceLevel( confidenceLevel); fc.SetNBins(100); // number of points to test per parameter fc.UseAdaptiveSampling(true); // make it go faster // Here, we consider only ensembles with 100 events // The PDF could be extended and this could be removed fc.FluctuateNumDataEntries(false); // Proof // ProofConfig pc(*wspace, 4, "workers=4", kFALSE); // proof-lite //ProofConfig pc(w, 8, "localhost"); // proof cluster at "localhost" // ToyMCSampler* toymcsampler = (ToyMCSampler*) fc.GetTestStatSampler(); // toymcsampler->SetProofConfig(&pc); // enable proof PointSetInterval* interval = (PointSetInterval*) fc.GetInterval(); // example use of BayesianCalculator // now we also need to specify a prior in the ModelConfig wspace->factory("Uniform::prior(mu)"); modelConfig->SetPriorPdf(*wspace->pdf("prior")); // example usage of BayesianCalculator BayesianCalculator bc(*data, *modelConfig); bc.SetConfidenceLevel( confidenceLevel); SimpleInterval* bcInt = bc.GetInterval(); // example use of MCMCInterval MCMCCalculator mc(*data, *modelConfig); mc.SetConfidenceLevel( confidenceLevel); // special options mc.SetNumBins(200); // bins used internally for representing posterior mc.SetNumBurnInSteps(500); // first N steps to be ignored as burn-in mc.SetNumIters(100000); // how long to run chain mc.SetLeftSideTailFraction(0.5); // for central interval MCMCInterval* mcInt = mc.GetInterval(); // for this example we know the expected intervals double expectedLL = data->mean(*x) + ROOT::Math::normal_quantile( (1-confidenceLevel)/2,1) / sqrt(data->numEntries()); double expectedUL = data->mean(*x) + ROOT::Math::normal_quantile_c((1-confidenceLevel)/2,1) / sqrt(data->numEntries()) ; // Use the intervals std::cout << "expected interval is [" << expectedLL << ", " << expectedUL << "]" << endl; cout << "plc interval is [" << plInt->LowerLimit(*mu) << ", " << plInt->UpperLimit(*mu) << "]" << endl; std::cout << "fc interval is ["<< interval->LowerLimit(*mu) << " , " << interval->UpperLimit(*mu) << "]" << endl; cout << "bc interval is [" << bcInt->LowerLimit() << ", " << bcInt->UpperLimit() << "]" << endl; cout << "mc interval is [" << mcInt->LowerLimit(*mu) << ", " << mcInt->UpperLimit(*mu) << "]" << endl; mu->setVal(0); cout << "is mu=0 in the interval? " << plInt->IsInInterval(RooArgSet(*mu)) << endl; // make a reasonable style gStyle->SetCanvasColor(0); gStyle->SetCanvasBorderMode(0); gStyle->SetPadBorderMode(0); gStyle->SetPadColor(0); gStyle->SetCanvasColor(0); gStyle->SetTitleFillColor(0); gStyle->SetFillColor(0); gStyle->SetFrameFillColor(0); gStyle->SetStatColor(0); // some plots TCanvas* canvas = new TCanvas("canvas"); canvas->Divide(2,2); // plot the data canvas->cd(1); RooPlot* frame = x->frame(); data->plotOn(frame); data->statOn(frame); frame->Draw(); // plot the profile likelihood canvas->cd(2); LikelihoodIntervalPlot plot(plInt); plot.Draw(); // plot the MCMC interval canvas->cd(3); MCMCIntervalPlot* mcPlot = new MCMCIntervalPlot(*mcInt); mcPlot->SetLineColor(kGreen); mcPlot->SetLineWidth(2); mcPlot->Draw(); canvas->cd(4); RooPlot * bcPlot = bc.GetPosteriorPlot(); bcPlot->Draw(); canvas->Update(); t.Stop(); t.Print(); }
// // calculation of the limit: assumes that wspace is set up and observations // contained in data // MyLimit computeLimit (RooWorkspace* wspace, RooDataSet* data, StatMethod method, bool draw) { // let's time this challenging example TStopwatch t; // // get nominal signal // RooRealVar exp_sig(*wspace->var("s")); double exp_sig_val = exp_sig.getVal(); std::cout << "exp_sig = " << exp_sig_val << std::endl; ///////////////////////////////////////////////////// // Now the statistical tests // model config std::cout << wspace->pdf("model") << " " << wspace->pdf("prior") << " " << wspace->set("poi") << " " << wspace->set("nuis") << std::endl; ModelConfig modelConfig("RA4abcd"); modelConfig.SetWorkspace(*wspace); modelConfig.SetPdf(*wspace->pdf("model")); modelConfig.SetPriorPdf(*wspace->pdf("prior")); modelConfig.SetParametersOfInterest(*wspace->set("poi")); modelConfig.SetNuisanceParameters(*wspace->set("nuis")); ////////////////////////////////////////////////// // If you want to see the covariance matrix uncomment // wspace->pdf("model")->fitTo(*data); // use ProfileLikelihood if ( method == ProfileLikelihoodMethod ) { ProfileLikelihoodCalculator plc(*data, modelConfig); plc.SetConfidenceLevel(0.95); RooFit::MsgLevel msglevel = RooMsgService::instance().globalKillBelow(); RooMsgService::instance().setGlobalKillBelow(RooFit::FATAL); LikelihoodInterval* plInt = plc.GetInterval(); RooMsgService::instance().setGlobalKillBelow(RooFit::FATAL); plInt->LowerLimit( *wspace->var("s") ); // get ugly print out of the way. Fix. // RooMsgService::instance().setGlobalKillBelow(RooFit::DEBUG); if ( draw ) { TCanvas* c = new TCanvas("ProfileLikelihood"); LikelihoodIntervalPlot* lrplot = new LikelihoodIntervalPlot(plInt); lrplot->Draw(); } // RooMsgService::instance().setGlobalKillBelow(msglevel); double lowLim = plInt->LowerLimit(*wspace->var("s")); double uppLim = plInt->UpperLimit(*wspace->var("s")); // double exp_sig_val = wspace->var("s")->getVal(); // double exp_sig_val = exp_sig.getVal(); cout << "Profile Likelihood interval on s = [" << lowLim << ", " << uppLim << "]" << " " << exp_sig_val << endl; // MyLimit result(plInt->IsInInterval(exp_sig), MyLimit result(exp_sig_val>lowLim&&exp_sig_val<uppLim,lowLim,uppLim); // std::cout << "isIn " << result << std::endl; delete plInt; // delete modelConfig; return result; } // use FeldmaCousins (takes ~20 min) if ( method == FeldmanCousinsMethod ) { FeldmanCousins fc(*data, modelConfig); fc.SetConfidenceLevel(0.95); //number counting: dataset always has 1 entry with N events observed fc.FluctuateNumDataEntries(false); fc.UseAdaptiveSampling(true); fc.SetNBins(100); PointSetInterval* fcInt = NULL; fcInt = (PointSetInterval*) fc.GetInterval(); // fix cast double lowLim = fcInt->LowerLimit(*wspace->var("s")); double uppLim = fcInt->UpperLimit(*wspace->var("s")); // double exp_sig_val = wspace->var("s")->getVal(); cout << "Feldman Cousins interval on s = [" << lowLim << " " << uppLim << endl; // std::cout << "isIn " << result << std::endl; MyLimit result(exp_sig_val>lowLim&&exp_sig_val<uppLim, fcInt->LowerLimit(*wspace->var("s")),fcInt->UpperLimit(*wspace->var("s"))); delete fcInt; return result; } // use BayesianCalculator (only 1-d parameter of interest, slow for this problem) if ( method == BayesianMethod ) { BayesianCalculator bc(*data, modelConfig); bc.SetConfidenceLevel(0.95); bc.SetLeftSideTailFraction(0.5); SimpleInterval* bInt = NULL; if( wspace->set("poi")->getSize() == 1) { bInt = bc.GetInterval(); if ( draw ) { TCanvas* c = new TCanvas("Bayesian"); // the plot takes a long time and print lots of error // using a scan it is better bc.SetScanOfPosterior(50); RooPlot* bplot = bc.GetPosteriorPlot(); bplot->Draw(); } cout << "Bayesian interval on s = [" << bInt->LowerLimit( ) << ", " << bInt->UpperLimit( ) << "]" << endl; // std::cout << "isIn " << result << std::endl; MyLimit result(bInt->IsInInterval(exp_sig), bInt->LowerLimit(),bInt->UpperLimit()); delete bInt; return result; } else { cout << "Bayesian Calc. only supports on parameter of interest" << endl; return MyLimit(); } } // use MCMCCalculator (takes about 1 min) // Want an efficient proposal function, so derive it from covariance // matrix of fit if ( method == MCMCMethod ) { RooFitResult* fit = wspace->pdf("model")->fitTo(*data,Save()); ProposalHelper ph; ph.SetVariables((RooArgSet&)fit->floatParsFinal()); ph.SetCovMatrix(fit->covarianceMatrix()); ph.SetUpdateProposalParameters(kTRUE); // auto-create mean vars and add mappings ph.SetCacheSize(100); ProposalFunction* pf = ph.GetProposalFunction(); MCMCCalculator mc(*data, modelConfig); mc.SetConfidenceLevel(0.95); mc.SetProposalFunction(*pf); mc.SetNumBurnInSteps(100); // first N steps to be ignored as burn-in mc.SetNumIters(100000); mc.SetLeftSideTailFraction(0.5); // make a central interval MCMCInterval* mcInt = NULL; mcInt = mc.GetInterval(); MCMCIntervalPlot mcPlot(*mcInt); mcPlot.Draw(); cout << "MCMC interval on s = [" << mcInt->LowerLimit(*wspace->var("s") ) << ", " << mcInt->UpperLimit(*wspace->var("s") ) << "]" << endl; // std::cout << "isIn " << result << std::endl; MyLimit result(mcInt->IsInInterval(exp_sig), mcInt->LowerLimit(*wspace->var("s")),mcInt->UpperLimit(*wspace->var("s"))); delete mcInt; return result; } t.Print(); // delete modelConfig; return MyLimit(); }
void rs101_limitexample() { // -------------------------------------- // An example of setting a limit in a number counting experiment with uncertainty on background and signal // to time the macro TStopwatch t; t.Start(); // -------------------------------------- // The Model building stage // -------------------------------------- RooWorkspace* wspace = new RooWorkspace(); wspace->factory("Poisson::countingModel(obs[150,0,300], sum(s[50,0,120]*ratioSigEff[1.,0,3.],b[100]*ratioBkgEff[1.,0.,3.]))"); // counting model // wspace->factory("Gaussian::sigConstraint(ratioSigEff,1,0.05)"); // 5% signal efficiency uncertainty // wspace->factory("Gaussian::bkgConstraint(ratioBkgEff,1,0.1)"); // 10% background efficiency uncertainty wspace->factory("Gaussian::sigConstraint(gSigEff[1,0,3],ratioSigEff,0.05)"); // 5% signal efficiency uncertainty wspace->factory("Gaussian::bkgConstraint(gSigBkg[1,0,3],ratioBkgEff,0.2)"); // 10% background efficiency uncertainty wspace->factory("PROD::modelWithConstraints(countingModel,sigConstraint,bkgConstraint)"); // product of terms wspace->Print(); RooAbsPdf* modelWithConstraints = wspace->pdf("modelWithConstraints"); // get the model RooRealVar* obs = wspace->var("obs"); // get the observable RooRealVar* s = wspace->var("s"); // get the signal we care about RooRealVar* b = wspace->var("b"); // get the background and set it to a constant. Uncertainty included in ratioBkgEff b->setConstant(); RooRealVar* ratioSigEff = wspace->var("ratioSigEff"); // get uncertain parameter to constrain RooRealVar* ratioBkgEff = wspace->var("ratioBkgEff"); // get uncertain parameter to constrain RooArgSet constrainedParams(*ratioSigEff, *ratioBkgEff); // need to constrain these in the fit (should change default behavior) RooRealVar * gSigEff = wspace->var("gSigEff"); // global observables for signal efficiency RooRealVar * gSigBkg = wspace->var("gSigBkg"); // global obs for background efficiency gSigEff->setConstant(); gSigBkg->setConstant(); // Create an example dataset with 160 observed events obs->setVal(160.); RooDataSet* data = new RooDataSet("exampleData", "exampleData", RooArgSet(*obs)); data->add(*obs); RooArgSet all(*s, *ratioBkgEff, *ratioSigEff); // not necessary modelWithConstraints->fitTo(*data, RooFit::Constrain(RooArgSet(*ratioSigEff, *ratioBkgEff))); // Now let's make some confidence intervals for s, our parameter of interest RooArgSet paramOfInterest(*s); ModelConfig modelConfig(wspace); modelConfig.SetPdf(*modelWithConstraints); modelConfig.SetParametersOfInterest(paramOfInterest); modelConfig.SetNuisanceParameters(constrainedParams); modelConfig.SetObservables(*obs); modelConfig.SetGlobalObservables( RooArgSet(*gSigEff,*gSigBkg)); modelConfig.SetName("ModelConfig"); wspace->import(modelConfig); wspace->import(*data); wspace->SetName("w"); wspace->writeToFile("rs101_ws.root"); // First, let's use a Calculator based on the Profile Likelihood Ratio //ProfileLikelihoodCalculator plc(*data, *modelWithConstraints, paramOfInterest); ProfileLikelihoodCalculator plc(*data, modelConfig); plc.SetTestSize(.05); ConfInterval* lrinterval = plc.GetInterval(); // that was easy. // Let's make a plot TCanvas* dataCanvas = new TCanvas("dataCanvas"); dataCanvas->Divide(2,1); dataCanvas->cd(1); LikelihoodIntervalPlot plotInt((LikelihoodInterval*)lrinterval); plotInt.SetTitle("Profile Likelihood Ratio and Posterior for S"); plotInt.Draw(); // Second, use a Calculator based on the Feldman Cousins technique FeldmanCousins fc(*data, modelConfig); fc.UseAdaptiveSampling(true); fc.FluctuateNumDataEntries(false); // number counting analysis: dataset always has 1 entry with N events observed fc.SetNBins(100); // number of points to test per parameter fc.SetTestSize(.05); // fc.SaveBeltToFile(true); // optional ConfInterval* fcint = NULL; fcint = fc.GetInterval(); // that was easy. RooFitResult* fit = modelWithConstraints->fitTo(*data, Save(true)); // Third, use a Calculator based on Markov Chain monte carlo // Before configuring the calculator, let's make a ProposalFunction // that will achieve a high acceptance rate ProposalHelper ph; ph.SetVariables((RooArgSet&)fit->floatParsFinal()); ph.SetCovMatrix(fit->covarianceMatrix()); ph.SetUpdateProposalParameters(true); ph.SetCacheSize(100); ProposalFunction* pdfProp = ph.GetProposalFunction(); // that was easy MCMCCalculator mc(*data, modelConfig); mc.SetNumIters(20000); // steps to propose in the chain mc.SetTestSize(.05); // 95% CL mc.SetNumBurnInSteps(40); // ignore first N steps in chain as "burn in" mc.SetProposalFunction(*pdfProp); mc.SetLeftSideTailFraction(0.5); // find a "central" interval MCMCInterval* mcInt = (MCMCInterval*)mc.GetInterval(); // that was easy // Get Lower and Upper limits from Profile Calculator cout << "Profile lower limit on s = " << ((LikelihoodInterval*) lrinterval)->LowerLimit(*s) << endl; cout << "Profile upper limit on s = " << ((LikelihoodInterval*) lrinterval)->UpperLimit(*s) << endl; // Get Lower and Upper limits from FeldmanCousins with profile construction if (fcint != NULL) { double fcul = ((PointSetInterval*) fcint)->UpperLimit(*s); double fcll = ((PointSetInterval*) fcint)->LowerLimit(*s); cout << "FC lower limit on s = " << fcll << endl; cout << "FC upper limit on s = " << fcul << endl; TLine* fcllLine = new TLine(fcll, 0, fcll, 1); TLine* fculLine = new TLine(fcul, 0, fcul, 1); fcllLine->SetLineColor(kRed); fculLine->SetLineColor(kRed); fcllLine->Draw("same"); fculLine->Draw("same"); dataCanvas->Update(); } // Plot MCMC interval and print some statistics MCMCIntervalPlot mcPlot(*mcInt); mcPlot.SetLineColor(kMagenta); mcPlot.SetLineWidth(2); mcPlot.Draw("same"); double mcul = mcInt->UpperLimit(*s); double mcll = mcInt->LowerLimit(*s); cout << "MCMC lower limit on s = " << mcll << endl; cout << "MCMC upper limit on s = " << mcul << endl; cout << "MCMC Actual confidence level: " << mcInt->GetActualConfidenceLevel() << endl; // 3-d plot of the parameter points dataCanvas->cd(2); // also plot the points in the markov chain RooDataSet * chainData = mcInt->GetChainAsDataSet(); assert(chainData); std::cout << "plotting the chain data - nentries = " << chainData->numEntries() << std::endl; TTree* chain = RooStats::GetAsTTree("chainTreeData","chainTreeData",*chainData); assert(chain); chain->SetMarkerStyle(6); chain->SetMarkerColor(kRed); chain->Draw("s:ratioSigEff:ratioBkgEff","nll_MarkovChain_local_","box"); // 3-d box proportional to posterior // the points used in the profile construction RooDataSet * parScanData = (RooDataSet*) fc.GetPointsToScan(); assert(parScanData); std::cout << "plotting the scanned points used in the frequentist construction - npoints = " << parScanData->numEntries() << std::endl; // getting the tree and drawing it -crashes (very strange....); // TTree* parameterScan = RooStats::GetAsTTree("parScanTreeData","parScanTreeData",*parScanData); // assert(parameterScan); // parameterScan->Draw("s:ratioSigEff:ratioBkgEff","","goff"); TGraph2D *gr = new TGraph2D(parScanData->numEntries()); for (int ievt = 0; ievt < parScanData->numEntries(); ++ievt) { const RooArgSet * evt = parScanData->get(ievt); double x = evt->getRealValue("ratioBkgEff"); double y = evt->getRealValue("ratioSigEff"); double z = evt->getRealValue("s"); gr->SetPoint(ievt, x,y,z); // std::cout << ievt << " " << x << " " << y << " " << z << std::endl; } gr->SetMarkerStyle(24); gr->Draw("P SAME"); delete wspace; delete lrinterval; delete mcInt; delete fcint; delete data; // print timing info t.Stop(); t.Print(); }