//void RunToyScan5(TString fileName, double startVal, double stopVal, TString outFile) { void frequentist(TString fileName) { cout << "Starting frequentist " << time(NULL) << endl; double startVal = 0; double stopVal = 200; TString outFile = ""; int nToys = 1 ; int nscanpoints = 2 ; /* gROOT->LoadMacro("RooBetaPdf.cxx+") ; gROOT->LoadMacro("RooRatio.cxx+") ; gROOT->LoadMacro("RooPosDefCorrGauss.cxx+") ; */ // get relevant objects out of the "ws" file TFile *file = TFile::Open(fileName); if(!file){ cout <<"file not found" << endl; return; } RooWorkspace* w = (RooWorkspace*) file->Get("workspace"); if(!w){ cout <<"workspace not found" << endl; return; } ModelConfig* mc = (ModelConfig*) w->obj("S+B_model"); RooAbsData* data = w->data("data"); if( !data || !mc ){ w->Print(); cout << "data or ModelConfig was not found" <<endl; return; } RooRealVar* myPOI = (RooRealVar*) mc->GetParametersOfInterest()->first(); myPOI->setRange(0, 1000.); ModelConfig* bModel = (ModelConfig*) w->obj("B_model"); ModelConfig* sbModel = (ModelConfig*) w->obj("S+B_model"); ProfileLikelihoodTestStat profll(*sbModel->GetPdf()); profll.SetPrintLevel(2); profll.SetOneSided(1); TestStatistic * testStat = &profll; HypoTestCalculatorGeneric * hc = 0; hc = new FrequentistCalculator(*data, *bModel, *sbModel); ToyMCSampler *toymcs = (ToyMCSampler*)hc->GetTestStatSampler(); toymcs->SetMaxToys(10000); toymcs->SetNEventsPerToy(1); toymcs->SetTestStatistic(testStat); ((FrequentistCalculator *)hc)->SetToys(nToys,nToys); HypoTestInverter calc(*hc); calc.SetConfidenceLevel(0.95); calc.UseCLs(true); //calc.SetVerbose(true); calc.SetVerbose(2); cout << "About to set fixed scan " << time(NULL) << endl; calc.SetFixedScan(nscanpoints,startVal,stopVal); cout << "About to do inverter " << time(NULL) << endl; HypoTestInverterResult * res_toysCLs_calculator = calc.GetInterval(); cout << "CLs = " << res_toysCLs_calculator->UpperLimit() << " CLs_exp = " << res_toysCLs_calculator->GetExpectedUpperLimit(0) << " CLs_exp(-1s) = " << res_toysCLs_calculator->GetExpectedUpperLimit(-1) << " CLs_exp(+1s) = " << res_toysCLs_calculator->GetExpectedUpperLimit(1) << endl ; /* // dump results string to output file ofstream outStream ; outStream.open(outFile,ios::app) ; outStream << "CLs = " << res_toysCLs_calculator->UpperLimit() << " CLs_exp = " << res_toysCLs_calculator->GetExpectedUpperLimit(0) << " CLs_exp(-1s) = " << res_toysCLs_calculator->GetExpectedUpperLimit(-1) << " CLs_exp(+1s) = " << res_toysCLs_calculator->GetExpectedUpperLimit(1) << endl ; outStream.close() ; */ cout << "End of frequentist " << time(NULL) << endl; return ; }
// internal routine to run the inverter HypoTestInverterResult * RooStats::HypoTestInvTool::RunInverter(RooWorkspace * w, const char * modelSBName, const char * modelBName, const char * dataName, int type, int testStatType, bool useCLs, int npoints, double poimin, double poimax, int ntoys, bool useNumberCounting, const char * nuisPriorName ){ std::cout << "Running HypoTestInverter on the workspace " << w->GetName() << std::endl; w->Print(); RooAbsData * data = w->data(dataName); if (!data) { Error("StandardHypoTestDemo","Not existing data %s",dataName); return 0; } else std::cout << "Using data set " << dataName << std::endl; if (mUseVectorStore) { RooAbsData::setDefaultStorageType(RooAbsData::Vector); data->convertToVectorStore() ; } // get models from WS // get the modelConfig out of the file ModelConfig* bModel = (ModelConfig*) w->obj(modelBName); ModelConfig* sbModel = (ModelConfig*) w->obj(modelSBName); if (!sbModel) { Error("StandardHypoTestDemo","Not existing ModelConfig %s",modelSBName); return 0; } // check the model if (!sbModel->GetPdf()) { Error("StandardHypoTestDemo","Model %s has no pdf ",modelSBName); return 0; } if (!sbModel->GetParametersOfInterest()) { Error("StandardHypoTestDemo","Model %s has no poi ",modelSBName); return 0; } if (!sbModel->GetObservables()) { Error("StandardHypoTestInvDemo","Model %s has no observables ",modelSBName); return 0; } if (!sbModel->GetSnapshot() ) { Info("StandardHypoTestInvDemo","Model %s has no snapshot - make one using model poi",modelSBName); sbModel->SetSnapshot( *sbModel->GetParametersOfInterest() ); } // case of no systematics // remove nuisance parameters from model if (noSystematics) { const RooArgSet * nuisPar = sbModel->GetNuisanceParameters(); if (nuisPar && nuisPar->getSize() > 0) { std::cout << "StandardHypoTestInvDemo" << " - Switch off all systematics by setting them constant to their initial values" << std::endl; RooStats::SetAllConstant(*nuisPar); } if (bModel) { const RooArgSet * bnuisPar = bModel->GetNuisanceParameters(); if (bnuisPar) RooStats::SetAllConstant(*bnuisPar); } } if (!bModel || bModel == sbModel) { Info("StandardHypoTestInvDemo","The background model %s does not exist",modelBName); Info("StandardHypoTestInvDemo","Copy it from ModelConfig %s and set POI to zero",modelSBName); bModel = (ModelConfig*) sbModel->Clone(); bModel->SetName(TString(modelSBName)+TString("_with_poi_0")); RooRealVar * var = dynamic_cast<RooRealVar*>(bModel->GetParametersOfInterest()->first()); if (!var) return 0; double oldval = var->getVal(); var->setVal(0); bModel->SetSnapshot( RooArgSet(*var) ); var->setVal(oldval); } else { if (!bModel->GetSnapshot() ) { Info("StandardHypoTestInvDemo","Model %s has no snapshot - make one using model poi and 0 values ",modelBName); RooRealVar * var = dynamic_cast<RooRealVar*>(bModel->GetParametersOfInterest()->first()); if (var) { double oldval = var->getVal(); var->setVal(0); bModel->SetSnapshot( RooArgSet(*var) ); var->setVal(oldval); } else { Error("StandardHypoTestInvDemo","Model %s has no valid poi",modelBName); return 0; } } } // check model has global observables when there are nuisance pdf // for the hybrid case the globobs are not needed if (type != 1 ) { bool hasNuisParam = (sbModel->GetNuisanceParameters() && sbModel->GetNuisanceParameters()->getSize() > 0); bool hasGlobalObs = (sbModel->GetGlobalObservables() && sbModel->GetGlobalObservables()->getSize() > 0); if (hasNuisParam && !hasGlobalObs ) { // try to see if model has nuisance parameters first RooAbsPdf * constrPdf = RooStats::MakeNuisancePdf(*sbModel,"nuisanceConstraintPdf_sbmodel"); if (constrPdf) { Warning("StandardHypoTestInvDemo","Model %s has nuisance parameters but no global observables associated",sbModel->GetName()); Warning("StandardHypoTestInvDemo","\tThe effect of the nuisance parameters will not be treated correctly "); } } } // run first a data fit const RooArgSet * poiSet = sbModel->GetParametersOfInterest(); RooRealVar *poi = (RooRealVar*)poiSet->first(); std::cout << "StandardHypoTestInvDemo : POI initial value: " << poi->GetName() << " = " << poi->getVal() << std::endl; // fit the data first (need to use constraint ) TStopwatch tw; bool doFit = initialFit; if (testStatType == 0 && initialFit == -1) doFit = false; // case of LEP test statistic if (type == 3 && initialFit == -1) doFit = false; // case of Asymptoticcalculator with nominal Asimov double poihat = 0; if (minimizerType.size()==0) minimizerType = ROOT::Math::MinimizerOptions::DefaultMinimizerType(); else ROOT::Math::MinimizerOptions::SetDefaultMinimizer(minimizerType.c_str()); Info("StandardHypoTestInvDemo","Using %s as minimizer for computing the test statistic", ROOT::Math::MinimizerOptions::DefaultMinimizerType().c_str() ); if (doFit) { // do the fit : By doing a fit the POI snapshot (for S+B) is set to the fit value // and the nuisance parameters nominal values will be set to the fit value. // This is relevant when using LEP test statistics Info( "StandardHypoTestInvDemo"," Doing a first fit to the observed data "); RooArgSet constrainParams; if (sbModel->GetNuisanceParameters() ) constrainParams.add(*sbModel->GetNuisanceParameters()); RooStats::RemoveConstantParameters(&constrainParams); tw.Start(); RooFitResult * fitres = sbModel->GetPdf()->fitTo(*data,InitialHesse(false), Hesse(false), Minimizer(minimizerType.c_str(),"Migrad"), Strategy(0), PrintLevel(mPrintLevel), Constrain(constrainParams), Save(true) ); if (fitres->status() != 0) { Warning("StandardHypoTestInvDemo","Fit to the model failed - try with strategy 1 and perform first an Hesse computation"); fitres = sbModel->GetPdf()->fitTo(*data,InitialHesse(true), Hesse(false),Minimizer(minimizerType.c_str(),"Migrad"), Strategy(1), PrintLevel(mPrintLevel+1), Constrain(constrainParams), Save(true) ); } if (fitres->status() != 0) Warning("StandardHypoTestInvDemo"," Fit still failed - continue anyway....."); poihat = poi->getVal(); std::cout << "StandardHypoTestInvDemo - Best Fit value : " << poi->GetName() << " = " << poihat << " +/- " << poi->getError() << std::endl; std::cout << "Time for fitting : "; tw.Print(); //save best fit value in the poi snapshot sbModel->SetSnapshot(*sbModel->GetParametersOfInterest()); std::cout << "StandardHypoTestInvo: snapshot of S+B Model " << sbModel->GetName() << " is set to the best fit value" << std::endl; } // print a message in case of LEP test statistics because it affects result by doing or not doing a fit if (testStatType == 0) { if (!doFit) Info("StandardHypoTestInvDemo","Using LEP test statistic - an initial fit is not done and the TS will use the nuisances at the model value"); else Info("StandardHypoTestInvDemo","Using LEP test statistic - an initial fit has been done and the TS will use the nuisances at the best fit value"); } // build test statistics and hypotest calculators for running the inverter SimpleLikelihoodRatioTestStat slrts(*sbModel->GetPdf(),*bModel->GetPdf()); // null parameters must includes snapshot of poi plus the nuisance values RooArgSet nullParams(*sbModel->GetSnapshot()); if (sbModel->GetNuisanceParameters()) nullParams.add(*sbModel->GetNuisanceParameters()); if (sbModel->GetSnapshot()) slrts.SetNullParameters(nullParams); RooArgSet altParams(*bModel->GetSnapshot()); if (bModel->GetNuisanceParameters()) altParams.add(*bModel->GetNuisanceParameters()); if (bModel->GetSnapshot()) slrts.SetAltParameters(altParams); // ratio of profile likelihood - need to pass snapshot for the alt RatioOfProfiledLikelihoodsTestStat ropl(*sbModel->GetPdf(), *bModel->GetPdf(), bModel->GetSnapshot()); ropl.SetSubtractMLE(false); if (testStatType == 11) ropl.SetSubtractMLE(true); ropl.SetPrintLevel(mPrintLevel); ropl.SetMinimizer(minimizerType.c_str()); ProfileLikelihoodTestStat profll(*sbModel->GetPdf()); if (testStatType == 3) profll.SetOneSided(true); if (testStatType == 4) profll.SetSigned(true); profll.SetMinimizer(minimizerType.c_str()); profll.SetPrintLevel(mPrintLevel); profll.SetReuseNLL(mOptimize); slrts.SetReuseNLL(mOptimize); ropl.SetReuseNLL(mOptimize); if (mOptimize) { profll.SetStrategy(0); ropl.SetStrategy(0); ROOT::Math::MinimizerOptions::SetDefaultStrategy(0); } if (mMaxPoi > 0) poi->setMax(mMaxPoi); // increase limit MaxLikelihoodEstimateTestStat maxll(*sbModel->GetPdf(),*poi); NumEventsTestStat nevtts; AsymptoticCalculator::SetPrintLevel(mPrintLevel); // create the HypoTest calculator class HypoTestCalculatorGeneric * hc = 0; if (type == 0) hc = new FrequentistCalculator(*data, *bModel, *sbModel); else if (type == 1) hc = new HybridCalculator(*data, *bModel, *sbModel); // else if (type == 2 ) hc = new AsymptoticCalculator(*data, *bModel, *sbModel, false, mAsimovBins); // else if (type == 3 ) hc = new AsymptoticCalculator(*data, *bModel, *sbModel, true, mAsimovBins); // for using Asimov data generated with nominal values else if (type == 2 ) hc = new AsymptoticCalculator(*data, *bModel, *sbModel, false ); else if (type == 3 ) hc = new AsymptoticCalculator(*data, *bModel, *sbModel, true ); // for using Asimov data generated with nominal values else { Error("StandardHypoTestInvDemo","Invalid - calculator type = %d supported values are only :\n\t\t\t 0 (Frequentist) , 1 (Hybrid) , 2 (Asymptotic) ",type); return 0; } // set the test statistic TestStatistic * testStat = 0; if (testStatType == 0) testStat = &slrts; if (testStatType == 1 || testStatType == 11) testStat = &ropl; if (testStatType == 2 || testStatType == 3 || testStatType == 4) testStat = &profll; if (testStatType == 5) testStat = &maxll; if (testStatType == 6) testStat = &nevtts; if (testStat == 0) { Error("StandardHypoTestInvDemo","Invalid - test statistic type = %d supported values are only :\n\t\t\t 0 (SLR) , 1 (Tevatron) , 2 (PLR), 3 (PLR1), 4(MLE)",testStatType); return 0; } ToyMCSampler *toymcs = (ToyMCSampler*)hc->GetTestStatSampler(); if (toymcs && (type == 0 || type == 1) ) { // look if pdf is number counting or extended if (sbModel->GetPdf()->canBeExtended() ) { if (useNumberCounting) Warning("StandardHypoTestInvDemo","Pdf is extended: but number counting flag is set: ignore it "); } else { // for not extended pdf if (!useNumberCounting ) { int nEvents = data->numEntries(); Info("StandardHypoTestInvDemo","Pdf is not extended: number of events to generate taken from observed data set is %d",nEvents); toymcs->SetNEventsPerToy(nEvents); } else { Info("StandardHypoTestInvDemo","using a number counting pdf"); toymcs->SetNEventsPerToy(1); } } toymcs->SetTestStatistic(testStat); if (data->isWeighted() && !mGenerateBinned) { Info("StandardHypoTestInvDemo","Data set is weighted, nentries = %d and sum of weights = %8.1f but toy generation is unbinned - it would be faster to set mGenerateBinned to true\n",data->numEntries(), data->sumEntries()); } toymcs->SetGenerateBinned(mGenerateBinned); toymcs->SetUseMultiGen(mOptimize); if (mGenerateBinned && sbModel->GetObservables()->getSize() > 2) { Warning("StandardHypoTestInvDemo","generate binned is activated but the number of ovservable is %d. Too much memory could be needed for allocating all the bins",sbModel->GetObservables()->getSize() ); } // set the random seed if needed if (mRandomSeed >= 0) RooRandom::randomGenerator()->SetSeed(mRandomSeed); } // specify if need to re-use same toys if (reuseAltToys) { hc->UseSameAltToys(); } if (type == 1) { HybridCalculator *hhc = dynamic_cast<HybridCalculator*> (hc); assert(hhc); hhc->SetToys(ntoys,ntoys/mNToysRatio); // can use less ntoys for b hypothesis // remove global observables from ModelConfig (this is probably not needed anymore in 5.32) bModel->SetGlobalObservables(RooArgSet() ); sbModel->SetGlobalObservables(RooArgSet() ); // check for nuisance prior pdf in case of nuisance parameters if (bModel->GetNuisanceParameters() || sbModel->GetNuisanceParameters() ) { // fix for using multigen (does not work in this case) toymcs->SetUseMultiGen(false); ToyMCSampler::SetAlwaysUseMultiGen(false); RooAbsPdf * nuisPdf = 0; if (nuisPriorName) nuisPdf = w->pdf(nuisPriorName); // use prior defined first in bModel (then in SbModel) if (!nuisPdf) { Info("StandardHypoTestInvDemo","No nuisance pdf given for the HybridCalculator - try to deduce pdf from the model"); if (bModel->GetPdf() && bModel->GetObservables() ) nuisPdf = RooStats::MakeNuisancePdf(*bModel,"nuisancePdf_bmodel"); else nuisPdf = RooStats::MakeNuisancePdf(*sbModel,"nuisancePdf_sbmodel"); } if (!nuisPdf ) { if (bModel->GetPriorPdf()) { nuisPdf = bModel->GetPriorPdf(); Info("StandardHypoTestInvDemo","No nuisance pdf given - try to use %s that is defined as a prior pdf in the B model",nuisPdf->GetName()); } else { Error("StandardHypoTestInvDemo","Cannnot run Hybrid calculator because no prior on the nuisance parameter is specified or can be derived"); return 0; } } assert(nuisPdf); Info("StandardHypoTestInvDemo","Using as nuisance Pdf ... " ); nuisPdf->Print(); const RooArgSet * nuisParams = (bModel->GetNuisanceParameters() ) ? bModel->GetNuisanceParameters() : sbModel->GetNuisanceParameters(); RooArgSet * np = nuisPdf->getObservables(*nuisParams); if (np->getSize() == 0) { Warning("StandardHypoTestInvDemo","Prior nuisance does not depend on nuisance parameters. They will be smeared in their full range"); } delete np; hhc->ForcePriorNuisanceAlt(*nuisPdf); hhc->ForcePriorNuisanceNull(*nuisPdf); } } else if (type == 2 || type == 3) { if (testStatType == 3) ((AsymptoticCalculator*) hc)->SetOneSided(true); if (testStatType != 2 && testStatType != 3) Warning("StandardHypoTestInvDemo","Only the PL test statistic can be used with AsymptoticCalculator - use by default a two-sided PL"); } else if (type == 0 || type == 1) ((FrequentistCalculator*) hc)->SetToys(ntoys,ntoys/mNToysRatio); // Get the result RooMsgService::instance().getStream(1).removeTopic(RooFit::NumIntegration); HypoTestInverter calc(*hc); calc.SetConfidenceLevel(0.95); calc.UseCLs(useCLs); calc.SetVerbose(true); // can speed up using proof-lite if (mUseProof && mNWorkers > 1) { ProofConfig pc(*w, mNWorkers, "", kFALSE); toymcs->SetProofConfig(&pc); // enable proof } if (npoints > 0) { if (poimin > poimax) { // if no min/max given scan between MLE and +4 sigma poimin = int(poihat); poimax = int(poihat + 4 * poi->getError()); } std::cout << "Doing a fixed scan in interval : " << poimin << " , " << poimax << std::endl; calc.SetFixedScan(npoints,poimin,poimax); } else { //poi->setMax(10*int( (poihat+ 10 *poi->getError() )/10 ) ); std::cout << "Doing an automatic scan in interval : " << poi->getMin() << " , " << poi->getMax() << std::endl; } tw.Start(); HypoTestInverterResult * r = calc.GetInterval(); std::cout << "Time to perform limit scan \n"; tw.Print(); if (mRebuild) { calc.SetCloseProof(1); tw.Start(); SamplingDistribution * limDist = calc.GetUpperLimitDistribution(true,mNToyToRebuild); std::cout << "Time to rebuild distributions " << std::endl; tw.Print(); if (limDist) { std::cout << "expected up limit " << limDist->InverseCDF(0.5) << " +/- " << limDist->InverseCDF(0.16) << " " << limDist->InverseCDF(0.84) << "\n"; //update r to a new updated result object containing the rebuilt expected p-values distributions // (it will not recompute the expected limit) if (r) delete r; // need to delete previous object since GetInterval will return a cloned copy r = calc.GetInterval(); } else std::cout << "ERROR : failed to re-build distributions " << std::endl; } return r; }
// internal routine to run the inverter HypoTestInverterResult * RunInverter(RooWorkspace * w, const char * modelSBName, const char * modelBName, const char * dataName, int type, int testStatType, int npoints, double poimin, double poimax, int ntoys, bool useCls ) { std::cout << "Running HypoTestInverter on the workspace " << w->GetName() << std::endl; w->Print(); RooAbsData * data = w->data(dataName); if (!data) { Error("RA2bHypoTestDemo","Not existing data %s",dataName); return 0; } else std::cout << "Using data set " << dataName << std::endl; // get models from WS // get the modelConfig out of the file ModelConfig* bModel = (ModelConfig*) w->obj(modelBName); ModelConfig* sbModel = (ModelConfig*) w->obj(modelSBName); if (!sbModel) { Error("RA2bHypoTestDemo","Not existing ModelConfig %s",modelSBName); return 0; } // check the model if (!sbModel->GetPdf()) { Error("RA2bHypoTestDemo","Model %s has no pdf ",modelSBName); return 0; } if (!sbModel->GetParametersOfInterest()) { Error("RA2bHypoTestDemo","Model %s has no poi ",modelSBName); return 0; } if (!sbModel->GetParametersOfInterest()) { Error("RA2bHypoTestInvDemo","Model %s has no poi ",modelSBName); return 0; } if (!sbModel->GetSnapshot() ) { Info("RA2bHypoTestInvDemo","Model %s has no snapshot - make one using model poi",modelSBName); sbModel->SetSnapshot( *sbModel->GetParametersOfInterest() ); } if (!bModel || bModel == sbModel) { Info("RA2bHypoTestInvDemo","The background model %s does not exist",modelBName); Info("RA2bHypoTestInvDemo","Copy it from ModelConfig %s and set POI to zero",modelSBName); bModel = (ModelConfig*) sbModel->Clone(); bModel->SetName(TString(modelSBName)+TString("_with_poi_0")); RooRealVar * var = dynamic_cast<RooRealVar*>(bModel->GetParametersOfInterest()->first()); if (!var) return 0; double oldval = var->getVal(); var->setVal(0); bModel->SetSnapshot( RooArgSet(*var) ); var->setVal(oldval); } else { if (!bModel->GetSnapshot() ) { Info("RA2bHypoTestInvDemo","Model %s has no snapshot - make one using model poi and 0 values ",modelBName); RooRealVar * var = dynamic_cast<RooRealVar*>(bModel->GetParametersOfInterest()->first()); if (var) { double oldval = var->getVal(); var->setVal(0); bModel->SetSnapshot( RooArgSet(*var) ); var->setVal(oldval); } else { Error("RA2bHypoTestInvDemo","Model %s has no valid poi",modelBName); return 0; } } } SimpleLikelihoodRatioTestStat slrts(*sbModel->GetPdf(),*bModel->GetPdf()); if (sbModel->GetSnapshot()) slrts.SetNullParameters(*sbModel->GetSnapshot()); if (bModel->GetSnapshot()) slrts.SetAltParameters(*bModel->GetSnapshot()); // ratio of profile likelihood - need to pass snapshot for the alt RatioOfProfiledLikelihoodsTestStat ropl(*sbModel->GetPdf(), *bModel->GetPdf(), bModel->GetSnapshot()); ropl.SetSubtractMLE(false); //MyProfileLikelihoodTestStat profll(*sbModel->GetPdf()); ProfileLikelihoodTestStat profll(*sbModel->GetPdf()); if (testStatType == 3) profll.SetOneSided(1); if (optimize) profll.SetReuseNLL(true); TestStatistic * testStat = &slrts; if (testStatType == 1) testStat = &ropl; if (testStatType == 2 || testStatType == 3) testStat = &profll; HypoTestCalculatorGeneric * hc = 0; if (type == 0) hc = new FrequentistCalculator(*data, *bModel, *sbModel); else hc = new HybridCalculator(*data, *bModel, *sbModel); ToyMCSampler *toymcs = (ToyMCSampler*)hc->GetTestStatSampler(); //=== DEBUG ///// toymcs->SetWS( w ) ; //=== DEBUG toymcs->SetNEventsPerToy(1); toymcs->SetTestStatistic(testStat); if (optimize) toymcs->SetUseMultiGen(true); if (type == 1) { HybridCalculator *hhc = (HybridCalculator*) hc; hhc->SetToys(ntoys,ntoys); // check for nuisance prior pdf if (bModel->GetPriorPdf() && sbModel->GetPriorPdf() ) { hhc->ForcePriorNuisanceAlt(*bModel->GetPriorPdf()); hhc->ForcePriorNuisanceNull(*sbModel->GetPriorPdf()); } else { if (bModel->GetNuisanceParameters() || sbModel->GetNuisanceParameters() ) { Error("RA2bHypoTestInvDemo","Cannnot run Hybrid calculator because no prior on the nuisance parameter is specified"); return 0; } } } else ((FrequentistCalculator*) hc)->SetToys(ntoys,ntoys); // Get the result RooMsgService::instance().getStream(1).removeTopic(RooFit::NumIntegration); TStopwatch tw; tw.Start(); const RooArgSet * poiSet = sbModel->GetParametersOfInterest(); RooRealVar *poi = (RooRealVar*)poiSet->first(); // fit the data first sbModel->GetPdf()->fitTo(*data); double poihat = poi->getVal(); HypoTestInverter calc(*hc); calc.SetConfidenceLevel(0.95); calc.UseCLs(useCls); calc.SetVerbose(true); // can speed up using proof-lite if (useProof && nworkers > 1) { ProofConfig pc(*w, nworkers, "", kFALSE); toymcs->SetProofConfig(&pc); // enable proof } printf(" npoints = %d, poimin = %7.2f, poimax = %7.2f\n\n", npoints, poimin, poimax ) ; cout << flush ; if ( npoints==1 ) { std::cout << "Evaluating one point : " << poimax << std::endl; calc.RunOnePoint(poimax); } else if (npoints > 0) { if (poimin >= poimax) { // if no min/max given scan between MLE and +4 sigma poimin = int(poihat); poimax = int(poihat + 4 * poi->getError()); } std::cout << "Doing a fixed scan in interval : " << poimin << " , " << poimax << std::endl; calc.SetFixedScan(npoints,poimin,poimax); } else { //poi->setMax(10*int( (poihat+ 10 *poi->getError() )/10 ) ); std::cout << "Doing an automatic scan in interval : " << poi->getMin() << " , " << poi->getMax() << std::endl; } cout << "\n\n right before calc.GetInterval(), ntoys = " << ntoys << " \n\n" << flush ; HypoTestInverterResult * r = calc.GetInterval(); return r; }
void OneSidedFrequentistUpperLimitWithBands(const char* infile = "", const char* workspaceName = "combined", const char* modelConfigName = "ModelConfig", const char* dataName = "obsData") { double confidenceLevel=0.95; int nPointsToScan = 20; int nToyMC = 200; // ------------------------------------------------------- // First part is just to access a user-defined file // or create the standard example file if it doesn't exist const char* filename = ""; if (!strcmp(infile,"")) { filename = "results/example_combined_GaussExample_model.root"; bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code // if file does not exists generate with histfactory if (!fileExist) { #ifdef _WIN32 cout << "HistFactory file cannot be generated on Windows - exit" << endl; return; #endif // Normally this would be run on the command line cout <<"will run standard hist2workspace example"<<endl; gROOT->ProcessLine(".! prepareHistFactory ."); gROOT->ProcessLine(".! hist2workspace config/example.xml"); cout <<"\n\n---------------------"<<endl; cout <<"Done creating example input"<<endl; cout <<"---------------------\n\n"<<endl; } } else filename = infile; // Try to open the file TFile *file = TFile::Open(filename); // if input file was specified byt not found, quit if(!file ){ cout <<"StandardRooStatsDemoMacro: Input file " << filename << " is not found" << endl; return; } // ------------------------------------------------------- // Now get the data and workspace // get the workspace out of the file RooWorkspace* w = (RooWorkspace*) file->Get(workspaceName); if(!w){ cout <<"workspace not found" << endl; return; } // get the modelConfig out of the file ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName); // get the modelConfig out of the file RooAbsData* data = w->data(dataName); // make sure ingredients are found if(!data || !mc){ w->Print(); cout << "data or ModelConfig was not found" <<endl; return; } // ------------------------------------------------------- // Now get the POI for convenience // you may want to adjust the range of your POI RooRealVar* firstPOI = (RooRealVar*) mc->GetParametersOfInterest()->first(); /* firstPOI->setMin(0);*/ /* firstPOI->setMax(10);*/ // -------------------------------------------- // Create and use the FeldmanCousins tool // to find and plot the 95% confidence interval // on the parameter of interest as specified // in the model config // REMEMBER, we will change the test statistic // so this is NOT a Feldman-Cousins interval FeldmanCousins fc(*data,*mc); fc.SetConfidenceLevel(confidenceLevel); /* fc.AdditionalNToysFactor(0.25); // degrade/improve sampling that defines confidence belt*/ /* fc.UseAdaptiveSampling(true); // speed it up a bit, don't use for expected limits*/ fc.SetNBins(nPointsToScan); // set how many points per parameter of interest to scan fc.CreateConfBelt(true); // save the information in the belt for plotting // ------------------------------------------------------- // Feldman-Cousins is a unified limit by definition // but the tool takes care of a few things for us like which values // of the nuisance parameters should be used to generate toys. // so let's just change the test statistic and realize this is // no longer "Feldman-Cousins" but is a fully frequentist Neyman-Construction. /* ProfileLikelihoodTestStatModified onesided(*mc->GetPdf());*/ /* fc.GetTestStatSampler()->SetTestStatistic(&onesided);*/ /* ((ToyMCSampler*) fc.GetTestStatSampler())->SetGenerateBinned(true); */ ToyMCSampler* toymcsampler = (ToyMCSampler*) fc.GetTestStatSampler(); ProfileLikelihoodTestStat* testStat = dynamic_cast<ProfileLikelihoodTestStat*>(toymcsampler->GetTestStatistic()); testStat->SetOneSided(true); // Since this tool needs to throw toy MC the PDF needs to be // extended or the tool needs to know how many entries in a dataset // per pseudo experiment. // In the 'number counting form' where the entries in the dataset // are counts, and not values of discriminating variables, the // datasets typically only have one entry and the PDF is not // extended. if(!mc->GetPdf()->canBeExtended()){ if(data->numEntries()==1) fc.FluctuateNumDataEntries(false); else cout <<"Not sure what to do about this model" <<endl; } // We can use PROOF to speed things along in parallel // However, the test statistic has to be installed on the workers // so either turn off PROOF or include the modified test statistic // in your `$ROOTSYS/roofit/roostats/inc` directory, // add the additional line to the LinkDef.h file, // and recompile root. if (useProof) { ProofConfig pc(*w, nworkers, "", false); toymcsampler->SetProofConfig(&pc); // enable proof } if(mc->GetGlobalObservables()){ cout << "will use global observables for unconditional ensemble"<<endl; mc->GetGlobalObservables()->Print(); toymcsampler->SetGlobalObservables(*mc->GetGlobalObservables()); } // Now get the interval PointSetInterval* interval = fc.GetInterval(); ConfidenceBelt* belt = fc.GetConfidenceBelt(); // print out the interval on the first Parameter of Interest cout << "\n95% interval on " <<firstPOI->GetName()<<" is : ["<< interval->LowerLimit(*firstPOI) << ", "<< interval->UpperLimit(*firstPOI) <<"] "<<endl; // get observed UL and value of test statistic evaluated there RooArgSet tmpPOI(*firstPOI); double observedUL = interval->UpperLimit(*firstPOI); firstPOI->setVal(observedUL); double obsTSatObsUL = fc.GetTestStatSampler()->EvaluateTestStatistic(*data,tmpPOI); // Ask the calculator which points were scanned RooDataSet* parameterScan = (RooDataSet*) fc.GetPointsToScan(); RooArgSet* tmpPoint; // make a histogram of parameter vs. threshold TH1F* histOfThresholds = new TH1F("histOfThresholds","", parameterScan->numEntries(), firstPOI->getMin(), firstPOI->getMax()); histOfThresholds->GetXaxis()->SetTitle(firstPOI->GetName()); histOfThresholds->GetYaxis()->SetTitle("Threshold"); // loop through the points that were tested and ask confidence belt // what the upper/lower thresholds were. // For FeldmanCousins, the lower cut off is always 0 for(Int_t i=0; i<parameterScan->numEntries(); ++i){ tmpPoint = (RooArgSet*) parameterScan->get(i)->clone("temp"); //cout <<"get threshold"<<endl; double arMax = belt->GetAcceptanceRegionMax(*tmpPoint); double poiVal = tmpPoint->getRealValue(firstPOI->GetName()) ; histOfThresholds->Fill(poiVal,arMax); } TCanvas* c1 = new TCanvas(); c1->Divide(2); c1->cd(1); histOfThresholds->SetMinimum(0); histOfThresholds->Draw(); c1->cd(2); // ------------------------------------------------------- // Now we generate the expected bands and power-constraint // First: find parameter point for mu=0, with conditional MLEs for nuisance parameters RooAbsReal* nll = mc->GetPdf()->createNLL(*data); RooAbsReal* profile = nll->createProfile(*mc->GetParametersOfInterest()); firstPOI->setVal(0.); profile->getVal(); // this will do fit and set nuisance parameters to profiled values RooArgSet* poiAndNuisance = new RooArgSet(); if(mc->GetNuisanceParameters()) poiAndNuisance->add(*mc->GetNuisanceParameters()); poiAndNuisance->add(*mc->GetParametersOfInterest()); w->saveSnapshot("paramsToGenerateData",*poiAndNuisance); RooArgSet* paramsToGenerateData = (RooArgSet*) poiAndNuisance->snapshot(); cout << "\nWill use these parameter points to generate pseudo data for bkg only" << endl; paramsToGenerateData->Print("v"); RooArgSet unconditionalObs; unconditionalObs.add(*mc->GetObservables()); unconditionalObs.add(*mc->GetGlobalObservables()); // comment this out for the original conditional ensemble double CLb=0; double CLbinclusive=0; // Now we generate background only and find distribution of upper limits TH1F* histOfUL = new TH1F("histOfUL","",100,0,firstPOI->getMax()); histOfUL->GetXaxis()->SetTitle("Upper Limit (background only)"); histOfUL->GetYaxis()->SetTitle("Entries"); for(int imc=0; imc<nToyMC; ++imc){ // set parameters back to values for generating pseudo data // cout << "\n get current nuis, set vals, print again" << endl; w->loadSnapshot("paramsToGenerateData"); // poiAndNuisance->Print("v"); RooDataSet* toyData = 0; // now generate a toy dataset if(!mc->GetPdf()->canBeExtended()){ if(data->numEntries()==1) toyData = mc->GetPdf()->generate(*mc->GetObservables(),1); else cout <<"Not sure what to do about this model" <<endl; } else{ // cout << "generating extended dataset"<<endl; toyData = mc->GetPdf()->generate(*mc->GetObservables(),Extended()); } // generate global observables // need to be careful for simpdf // RooDataSet* globalData = mc->GetPdf()->generate(*mc->GetGlobalObservables(),1); RooSimultaneous* simPdf = dynamic_cast<RooSimultaneous*>(mc->GetPdf()); if(!simPdf){ RooDataSet *one = mc->GetPdf()->generate(*mc->GetGlobalObservables(), 1); const RooArgSet *values = one->get(); RooArgSet *allVars = mc->GetPdf()->getVariables(); *allVars = *values; delete allVars; delete values; delete one; } else { //try fix for sim pdf TIterator* iter = simPdf->indexCat().typeIterator() ; RooCatType* tt = NULL; while((tt=(RooCatType*) iter->Next())) { // Get pdf associated with state from simpdf RooAbsPdf* pdftmp = simPdf->getPdf(tt->GetName()) ; // Generate only global variables defined by the pdf associated with this state RooArgSet* globtmp = pdftmp->getObservables(*mc->GetGlobalObservables()) ; RooDataSet* tmp = pdftmp->generate(*globtmp,1) ; // Transfer values to output placeholder *globtmp = *tmp->get(0) ; // Cleanup delete globtmp ; delete tmp ; } } // globalData->Print("v"); // unconditionalObs = *globalData->get(); // mc->GetGlobalObservables()->Print("v"); // delete globalData; // cout << "toy data = " << endl; // toyData->get()->Print("v"); // get test stat at observed UL in observed data firstPOI->setVal(observedUL); double toyTSatObsUL = fc.GetTestStatSampler()->EvaluateTestStatistic(*toyData,tmpPOI); // toyData->get()->Print("v"); // cout <<"obsTSatObsUL " <<obsTSatObsUL << "toyTS " << toyTSatObsUL << endl; if(obsTSatObsUL < toyTSatObsUL) // not sure about <= part yet CLb+= (1.)/nToyMC; if(obsTSatObsUL <= toyTSatObsUL) // not sure about <= part yet CLbinclusive+= (1.)/nToyMC; // loop over points in belt to find upper limit for this toy data double thisUL = 0; for(Int_t i=0; i<parameterScan->numEntries(); ++i){ tmpPoint = (RooArgSet*) parameterScan->get(i)->clone("temp"); double arMax = belt->GetAcceptanceRegionMax(*tmpPoint); firstPOI->setVal( tmpPoint->getRealValue(firstPOI->GetName()) ); // double thisTS = profile->getVal(); double thisTS = fc.GetTestStatSampler()->EvaluateTestStatistic(*toyData,tmpPOI); // cout << "poi = " << firstPOI->getVal() // << " max is " << arMax << " this profile = " << thisTS << endl; // cout << "thisTS = " << thisTS<<endl; if(thisTS<=arMax){ thisUL = firstPOI->getVal(); } else{ break; } } /* // loop over points in belt to find upper limit for this toy data double thisUL = 0; for(Int_t i=0; i<histOfThresholds->GetNbinsX(); ++i){ tmpPoint = (RooArgSet*) parameterScan->get(i)->clone("temp"); cout <<"---------------- "<<i<<endl; tmpPoint->Print("v"); cout << "from hist " << histOfThresholds->GetBinCenter(i+1) <<endl; double arMax = histOfThresholds->GetBinContent(i+1); // cout << " threhold from Hist = aMax " << arMax<<endl; // double arMax2 = belt->GetAcceptanceRegionMax(*tmpPoint); // cout << "from scan arMax2 = "<< arMax2 << endl; // not the same due to TH1F not TH1D // cout << "scan - hist" << arMax2-arMax << endl; firstPOI->setVal( histOfThresholds->GetBinCenter(i+1)); // double thisTS = profile->getVal(); double thisTS = fc.GetTestStatSampler()->EvaluateTestStatistic(*toyData,tmpPOI); // cout << "poi = " << firstPOI->getVal() // << " max is " << arMax << " this profile = " << thisTS << endl; // cout << "thisTS = " << thisTS<<endl; // NOTE: need to add a small epsilon term for single precision vs. double precision if(thisTS<=arMax + 1e-7){ thisUL = firstPOI->getVal(); } else{ break; } } */ histOfUL->Fill(thisUL); // for few events, data is often the same, and UL is often the same // cout << "thisUL = " << thisUL<<endl; delete toyData; } histOfUL->Draw(); c1->SaveAs("one-sided_upper_limit_output.pdf"); // if you want to see a plot of the sampling distribution for a particular scan point: /* SamplingDistPlot sampPlot; int indexInScan = 0; tmpPoint = (RooArgSet*) parameterScan->get(indexInScan)->clone("temp"); firstPOI->setVal( tmpPoint->getRealValue(firstPOI->GetName()) ); toymcsampler->SetParametersForTestStat(tmpPOI); SamplingDistribution* samp = toymcsampler->GetSamplingDistribution(*tmpPoint); sampPlot.AddSamplingDistribution(samp); sampPlot.Draw(); */ // Now find bands and power constraint Double_t* bins = histOfUL->GetIntegral(); TH1F* cumulative = (TH1F*) histOfUL->Clone("cumulative"); cumulative->SetContent(bins); double band2sigDown, band1sigDown, bandMedian, band1sigUp,band2sigUp; for(int i=1; i<=cumulative->GetNbinsX(); ++i){ if(bins[i]<RooStats::SignificanceToPValue(2)) band2sigDown=cumulative->GetBinCenter(i); if(bins[i]<RooStats::SignificanceToPValue(1)) band1sigDown=cumulative->GetBinCenter(i); if(bins[i]<0.5) bandMedian=cumulative->GetBinCenter(i); if(bins[i]<RooStats::SignificanceToPValue(-1)) band1sigUp=cumulative->GetBinCenter(i); if(bins[i]<RooStats::SignificanceToPValue(-2)) band2sigUp=cumulative->GetBinCenter(i); } cout << "-2 sigma band " << band2sigDown << endl; cout << "-1 sigma band " << band1sigDown << " [Power Constraint)]" << endl; cout << "median of band " << bandMedian << endl; cout << "+1 sigma band " << band1sigUp << endl; cout << "+2 sigma band " << band2sigUp << endl; // print out the interval on the first Parameter of Interest cout << "\nobserved 95% upper-limit "<< interval->UpperLimit(*firstPOI) <<endl; cout << "CLb strict [P(toy>obs|0)] for observed 95% upper-limit "<< CLb <<endl; cout << "CLb inclusive [P(toy>=obs|0)] for observed 95% upper-limit "<< CLbinclusive <<endl; delete profile; delete nll; }
void OneSidedFrequentistUpperLimitWithBands_intermediate(const char* infile = "", const char* workspaceName = "combined", const char* modelConfigName = "ModelConfig", const char* dataName = "obsData"){ double confidenceLevel=0.95; // degrade/improve number of pseudo-experiments used to define the confidence belt. // value of 1 corresponds to default number of toys in the tail, which is 50/(1-confidenceLevel) double additionalToysFac = 1.; int nPointsToScan = 30; // number of steps in the parameter of interest int nToyMC = 100; // number of toys used to define the expected limit and band TStopwatch t; t.Start(); ///////////////////////////////////////////////////////////// // First part is just to access a user-defined file // or create the standard example file if it doesn't exist //////////////////////////////////////////////////////////// const char* filename = ""; if (!strcmp(infile,"")) filename = "results/example_combined_GaussExample_model.root"; else filename = infile; // Check if example input file exists TFile *file = TFile::Open(filename); // if input file was specified byt not found, quit if(!file && strcmp(infile,"")){ cout <<"file not found" << endl; return; } // if default file not found, try to create it if(!file ){ // Normally this would be run on the command line cout <<"will run standard hist2workspace example"<<endl; gROOT->ProcessLine(".! prepareHistFactory ."); gROOT->ProcessLine(".! hist2workspace config/example.xml"); cout <<"\n\n---------------------"<<endl; cout <<"Done creating example input"<<endl; cout <<"---------------------\n\n"<<endl; } // now try to access the file again file = TFile::Open(filename); if(!file){ // if it is still not there, then we can't continue cout << "Not able to run hist2workspace to create example input" <<endl; return; } ///////////////////////////////////////////////////////////// // Now get the data and workspace //////////////////////////////////////////////////////////// // get the workspace out of the file RooWorkspace* w = (RooWorkspace*) file->Get(workspaceName); if(!w){ cout <<"workspace not found" << endl; return; } // get the modelConfig out of the file ModelConfig* mc = (ModelConfig*) w->obj(modelConfigName); // get the modelConfig out of the file RooAbsData* data = w->data(dataName); // make sure ingredients are found if(!data || !mc){ w->Print(); cout << "data or ModelConfig was not found" <<endl; return; } cout << "Found data and ModelConfig:" <<endl; mc->Print(); ///////////////////////////////////////////////////////////// // Now get the POI for convenience // you may want to adjust the range of your POI //////////////////////////////////////////////////////////// RooRealVar* firstPOI = (RooRealVar*) mc->GetParametersOfInterest()->first(); // firstPOI->setMin(0); // firstPOI->setMax(10); ///////////////////////////////////////////// // create and use the FeldmanCousins tool // to find and plot the 95% confidence interval // on the parameter of interest as specified // in the model config // REMEMBER, we will change the test statistic // so this is NOT a Feldman-Cousins interval FeldmanCousins fc(*data,*mc); fc.SetConfidenceLevel(confidenceLevel); fc.AdditionalNToysFactor(additionalToysFac); // improve sampling that defines confidence belt // fc.UseAdaptiveSampling(true); // speed it up a bit, but don't use for expectd limits fc.SetNBins(nPointsToScan); // set how many points per parameter of interest to scan fc.CreateConfBelt(true); // save the information in the belt for plotting ///////////////////////////////////////////// // Feldman-Cousins is a unified limit by definition // but the tool takes care of a few things for us like which values // of the nuisance parameters should be used to generate toys. // so let's just change the test statistic and realize this is // no longer "Feldman-Cousins" but is a fully frequentist Neyman-Construction. // ProfileLikelihoodTestStatModified onesided(*mc->GetPdf()); // fc.GetTestStatSampler()->SetTestStatistic(&onesided); // ((ToyMCSampler*) fc.GetTestStatSampler())->SetGenerateBinned(true); ToyMCSampler* toymcsampler = (ToyMCSampler*) fc.GetTestStatSampler(); ProfileLikelihoodTestStat* testStat = dynamic_cast<ProfileLikelihoodTestStat*>(toymcsampler->GetTestStatistic()); testStat->SetOneSided(true); // test speedups: testStat->SetReuseNLL(true); // toymcsampler->setUseMultiGen(true); // not fully validated // Since this tool needs to throw toy MC the PDF needs to be // extended or the tool needs to know how many entries in a dataset // per pseudo experiment. // In the 'number counting form' where the entries in the dataset // are counts, and not values of discriminating variables, the // datasets typically only have one entry and the PDF is not // extended. if(!mc->GetPdf()->canBeExtended()){ if(data->numEntries()==1) fc.FluctuateNumDataEntries(false); else cout <<"Not sure what to do about this model" <<endl; } // We can use PROOF to speed things along in parallel ProofConfig pc(*w, 4, "",false); if(mc->GetGlobalObservables()){ cout << "will use global observables for unconditional ensemble"<<endl; mc->GetGlobalObservables()->Print(); toymcsampler->SetGlobalObservables(*mc->GetGlobalObservables()); } toymcsampler->SetProofConfig(&pc); // enable proof // Now get the interval PointSetInterval* interval = fc.GetInterval(); ConfidenceBelt* belt = fc.GetConfidenceBelt(); // print out the iterval on the first Parameter of Interest cout << "\n95% interval on " <<firstPOI->GetName()<<" is : ["<< interval->LowerLimit(*firstPOI) << ", "<< interval->UpperLimit(*firstPOI) <<"] "<<endl; // get observed UL and value of test statistic evaluated there RooArgSet tmpPOI(*firstPOI); double observedUL = interval->UpperLimit(*firstPOI); firstPOI->setVal(observedUL); double obsTSatObsUL = fc.GetTestStatSampler()->EvaluateTestStatistic(*data,tmpPOI); // Ask the calculator which points were scanned RooDataSet* parameterScan = (RooDataSet*) fc.GetPointsToScan(); RooArgSet* tmpPoint; // make a histogram of parameter vs. threshold TH1F* histOfThresholds = new TH1F("histOfThresholds","", parameterScan->numEntries(), firstPOI->getMin(), firstPOI->getMax()); histOfThresholds->GetXaxis()->SetTitle(firstPOI->GetName()); histOfThresholds->GetYaxis()->SetTitle("Threshold"); // loop through the points that were tested and ask confidence belt // what the upper/lower thresholds were. // For FeldmanCousins, the lower cut off is always 0 for(Int_t i=0; i<parameterScan->numEntries(); ++i){ tmpPoint = (RooArgSet*) parameterScan->get(i)->clone("temp"); double arMax = belt->GetAcceptanceRegionMax(*tmpPoint); double poiVal = tmpPoint->getRealValue(firstPOI->GetName()) ; histOfThresholds->Fill(poiVal,arMax); } TCanvas* c1 = new TCanvas(); c1->Divide(2); c1->cd(1); histOfThresholds->SetMinimum(0); histOfThresholds->Draw(); c1->cd(2); ///////////////////////////////////////////////////////////// // Now we generate the expected bands and power-constriant //////////////////////////////////////////////////////////// // First: find parameter point for mu=0, with conditional MLEs for nuisance parameters RooAbsReal* nll = mc->GetPdf()->createNLL(*data); RooAbsReal* profile = nll->createProfile(*mc->GetParametersOfInterest()); firstPOI->setVal(0.); profile->getVal(); // this will do fit and set nuisance parameters to profiled values RooArgSet* poiAndNuisance = new RooArgSet(); if(mc->GetNuisanceParameters()) poiAndNuisance->add(*mc->GetNuisanceParameters()); poiAndNuisance->add(*mc->GetParametersOfInterest()); w->saveSnapshot("paramsToGenerateData",*poiAndNuisance); RooArgSet* paramsToGenerateData = (RooArgSet*) poiAndNuisance->snapshot(); cout << "\nWill use these parameter points to generate pseudo data for bkg only" << endl; paramsToGenerateData->Print("v"); double CLb=0; double CLbinclusive=0; // Now we generate background only and find distribution of upper limits TH1F* histOfUL = new TH1F("histOfUL","",100,0,firstPOI->getMax()); histOfUL->GetXaxis()->SetTitle("Upper Limit (background only)"); histOfUL->GetYaxis()->SetTitle("Entries"); for(int imc=0; imc<nToyMC; ++imc){ // set parameters back to values for generating pseudo data w->loadSnapshot("paramsToGenerateData"); // in 5.30 there is a nicer way to generate toy data & randomize global obs RooAbsData* toyData = toymcsampler->GenerateToyData(*paramsToGenerateData); // get test stat at observed UL in observed data firstPOI->setVal(observedUL); double toyTSatObsUL = fc.GetTestStatSampler()->EvaluateTestStatistic(*toyData,tmpPOI); // toyData->get()->Print("v"); // cout <<"obsTSatObsUL " <<obsTSatObsUL << "toyTS " << toyTSatObsUL << endl; if(obsTSatObsUL < toyTSatObsUL) // (should be checked) CLb+= (1.)/nToyMC; if(obsTSatObsUL <= toyTSatObsUL) // (should be checked) CLbinclusive+= (1.)/nToyMC; // loop over points in belt to find upper limit for this toy data double thisUL = 0; for(Int_t i=0; i<parameterScan->numEntries(); ++i){ tmpPoint = (RooArgSet*) parameterScan->get(i)->clone("temp"); double arMax = belt->GetAcceptanceRegionMax(*tmpPoint); firstPOI->setVal( tmpPoint->getRealValue(firstPOI->GetName()) ); double thisTS = fc.GetTestStatSampler()->EvaluateTestStatistic(*toyData,tmpPOI); if(thisTS<=arMax){ thisUL = firstPOI->getVal(); } else{ break; } } histOfUL->Fill(thisUL); delete toyData; } histOfUL->Draw(); c1->SaveAs("one-sided_upper_limit_output.pdf"); // if you want to see a plot of the sampling distribution for a particular scan point: // Now find bands and power constraint Double_t* bins = histOfUL->GetIntegral(); TH1F* cumulative = (TH1F*) histOfUL->Clone("cumulative"); cumulative->SetContent(bins); double band2sigDown=0, band1sigDown=0, bandMedian=0, band1sigUp=0,band2sigUp=0; for(int i=1; i<=cumulative->GetNbinsX(); ++i){ if(bins[i]<RooStats::SignificanceToPValue(2)) band2sigDown=cumulative->GetBinCenter(i); if(bins[i]<RooStats::SignificanceToPValue(1)) band1sigDown=cumulative->GetBinCenter(i); if(bins[i]<0.5) bandMedian=cumulative->GetBinCenter(i); if(bins[i]<RooStats::SignificanceToPValue(-1)) band1sigUp=cumulative->GetBinCenter(i); if(bins[i]<RooStats::SignificanceToPValue(-2)) band2sigUp=cumulative->GetBinCenter(i); } t.Stop(); t.Print(); cout << "-2 sigma band " << band2sigDown << endl; cout << "-1 sigma band " << band1sigDown << endl; cout << "median of band " << bandMedian << " [Power Constriant)]" << endl; cout << "+1 sigma band " << band1sigUp << endl; cout << "+2 sigma band " << band2sigUp << endl; // print out the iterval on the first Parameter of Interest cout << "\nobserved 95% upper-limit "<< interval->UpperLimit(*firstPOI) <<endl; cout << "CLb strict [P(toy>obs|0)] for observed 95% upper-limit "<< CLb <<endl; cout << "CLb inclusive [P(toy>=obs|0)] for observed 95% upper-limit "<< CLbinclusive <<endl; delete profile; delete nll; }
void statTest(double mu_pe, double mu_hyp, ModelConfig *mc , RooDataSet *data ){ int nToyMC = 5; // set roofit seed RooRandom::randomGenerator()->SetSeed(); cout << endl; cout << endl; cout << "Will generate " << nToyMC << " pseudo-experiments for : " << endl; cout << " - mu[pseudo-data] = " << mu_pe << endl; cout << " - mu[stat-test] = " << mu_hyp << endl; cout << endl; // Check number of POI (for Wald approx) RooArgSet *ParamOfInterest = (RooArgSet*) mc->GetParametersOfInterest(); int nPOI = ParamOfInterest->getSize(); if(nPOI>1){ cout <<"not sure what to do with other parameters of interest, but here are their values"<<endl; mc->GetParametersOfInterest()->Print("v"); } RooRealVar* firstPOI = (RooRealVar*) ParamOfInterest->first(); RooAbsPdf *simPdf = (mc->GetPdf()); //PrintAllParametersAndValues( *mc->GetGlobalObservables() ); //PrintAllParametersAndValues( *mc->GetObservables() ); firstPOI->setVal(0.0); // FIXME //simPdf->fitTo( *data, Hesse(kTRUE), Minos(kTRUE), PrintLevel(1) ); simPdf->fitTo( *data ); // set up the sampler ToyMCSampler sampler; sampler.SetPdf(*mc->GetPdf()); sampler.SetObservables(*mc->GetObservables()); sampler.SetNToys(nToyMC); sampler.SetGlobalObservables(*mc->GetGlobalObservables()); sampler.SetParametersForTestStat(*mc->GetParametersOfInterest()); RooArgSet* poiset = dynamic_cast<RooArgSet*>(ParamOfInterest->Clone()); // only unconditional fit MinNLLTestStat *minNll = new MinNLLTestStat(*mc->GetPdf()); minNll->EnableDetailedOutput(true); sampler.AddTestStatistic(minNll); // enable PROOF if desired //ProofConfig pc(*w, 8, "workers=8", kFALSE); //sampler.SetProofConfig(&pc); // evaluate the test statistics - this is where most of our time will be spent cout << "Generating " << nToyMC << " toys...this will take a few minutes" << endl; TStopwatch *mn_t = new TStopwatch; mn_t->Start(); RooDataSet* sd = sampler.GetSamplingDistributions(*poiset); cout << "Toy generation complete :" << endl; // stop timing mn_t->Stop(); cout << " total CPU time: " << mn_t->CpuTime() << endl; cout << " total real time: " << mn_t->RealTime() << endl; // now sd contains all information about our test statistics, including detailed output // we might eg. want to explore the results either directly, or first converting to a TTree // do the conversion TFile f("mytoys.root", "RECREATE"); TTree *toyTree = RooStats::GetAsTTree("toyTree", "TTree created from test statistics", *sd); // save result to file, but in general do whatever you like f.cd(); toyTree->Write(); f.Close(); /* TFile* tmpFile = new TFile("mytoys.root","READ"); TTree* myTree = (TTree*)tmpFile->Get("toyTree"); // get boundaries for histograms TIter nextLeaf( (myTree->GetListOfLeaves())->MakeIterator() ); TObject* leafObj(0); map<TString, float> xMaxs; map<TString, float> xMins; for(int i(0); i<myTree->GetEntries(); i++) { myTree->GetEntry(i); nextLeaf = ( (myTree->GetListOfLeaves())->MakeIterator() ); while( (leafObj = nextLeaf.Next()) ) { TString name(leafObj->GetName()); float value(myTree->GetLeaf( leafObj->GetName() )->GetValue()); if(value > xMaxs[name]) { xMaxs[name] = value; } if(value < xMins[name]) { xMins[name] = value; } } // loop over leaves } // loop over tree entries // plot everything in the tree myTree->GetEntry(0); nextLeaf = ( (myTree->GetListOfLeaves())->MakeIterator() ); leafObj = 0; // make a histogram per leaf map<TString, TH1F*> hists; myTree->GetEntry(0); while( (leafObj = nextLeaf.Next()) ) { if(!leafObj) { continue; } //cout << leafObj->GetName() << endl; TString name(leafObj->GetName()); // special ones : fit related things if(name.Contains("covQual")) { hists[name] = new TH1F(name,name,5,0,5); continue; } if(name.Contains("fitStatus")) { hists[name] = new TH1F(name,name,5,0,5); continue; } int nbin(500); float histMin( xMins[name] - 0.1*fabs(xMins[name]) ); float histMax( xMaxs[name] + 0.1*fabs(xMaxs[name]) ); if(name.Contains("ATLAS_norm")) { // floating normalization factors histMin = 0; histMax = 10; } else if(name.Contains("gamma_stat")) { // statistical nus param if(name.Contains("globObs")) { // get custom range for sampling histMin = int( xMins[name] - 0.1*fabs(xMins[name]) ); histMax = int( xMaxs[name] + 0.1*fabs(xMaxs[name]) ); } // use small range for pull and error else { nbin = 100; histMin = 0.0; histMax = 2.0; } } else if(name.Contains("_err")) { // errors on nus param nbin = 100; histMin = 0.0; histMax = 2.0; } else if(name.Contains("fitCond") || name.Contains("fitUncond") || name.Contains("globObs")) { // fit pulls nbin = 500; histMin = -5; histMax = 5; } hists[name] = new TH1F(name,name,nbin,histMin,histMax); } // loop over leaves to declare histos // loop over entries and fill histograms for(int i(0); i<myTree->GetEntries(); i++) { myTree->GetEntry(i); nextLeaf = ( (myTree->GetListOfLeaves())->MakeIterator() ); while( (leafObj = nextLeaf.Next()) ) { TString name(leafObj->GetName()); if(hists.find(name) == hists.end()) { continue; } hists[name]->Fill( myTree->GetLeaf( leafObj->GetName() )->GetValue() ); } // loop over leaves } // loop over tree entries // overflow and underflow for(map<TString,TH1F*>::iterator ihist(hists.begin()); ihist!=hists.end(); ihist++) { if(ihist->second->GetBinContent(0)>0) { ihist->second->SetBinContent(1, ihist->second->GetBinContent(0) + ihist->second->GetBinContent(1) ); // fix err } int nBinx = ihist->second->GetNbinsX(); if(ihist->second->GetBinContent(nBinx)>0) { ihist->second->SetBinContent(nBinx-1, ihist->second->GetBinContent(nBinx) + ihist->second->GetBinContent(nBinx-1) ); // fix err } } // save the results TString dirName(OutputDir+"/PlotsStatisticalTest/GlobalFit"); if(drawPlots) { system(TString("mkdir -vp "+dirName)); } TCanvas* canvas = new TCanvas("pulls"); TLegend *leg = new TLegend(0.67, 0.64, 0.87, 0.86); LegendStyle(leg); for(map<TString,TH1F*>::iterator ihist(hists.begin()); ihist!=hists.end(); ihist++) { if( (ihist->first).Contains("fitCond_") ) { continue; } // skip unconditional fit - get it explicitly canvas->Clear(); leg->Clear(); TString niceName(ihist->first); niceName.ReplaceAll("fitUncond_",""); //niceName.ReplaceAll("SD_TS0_",""); // not good if have multiple test statistics // conditional fit information ihist->second->SetLineColor(kGray+2); ihist->second->SetTitle(niceName); ihist->second->SetLineStyle(kSolid); ihist->second->SetLineWidth(2); if((ihist->first).Contains("fit") && !(ihist->first).Contains("_err") && !(ihist->first).Contains("Qual") && !(ihist->first).Contains("Status")) { ihist->second->Rebin(4); } // ihist->second->GetXaxis()->SetTitle(""); // ihist->second->GetYaxis()->SetTitle(""); if(niceName.Contains("globObs")) { leg->AddEntry( ihist->second, "Sampling", "l" ); // add value of mu } else { leg->AddEntry( ihist->second, "Unconditional Fit", "l" ); // add value of mu } TString condName(ihist->first); condName.ReplaceAll("fitUncond","fitCond"); // uncomditional fit information if(hists.find(condName) != hists.end() && condName != ihist->first) { hists[condName]->SetLineColor(kGray+2); hists[condName]->SetLineStyle(kDashed); hists[condName]->SetLineWidth(2); if(!(ihist->first).Contains("_err")) { hists[condName]->Rebin(4); } leg->AddEntry( hists[condName], "Conditional Fit", "l" ); if( hists[condName]->GetMaximum() > ihist->second->GetMaximum() ) { ihist->second->SetMaximum( hists[condName]->GetMaximum() ); } } ihist->second->SetMaximum( 1.2 * ihist->second->GetMaximum() ); canvas->cd(); ihist->second->Draw(); leg->Draw(); if(hists[condName] && condName != ihist->first) { hists[condName]->Draw("same"); } if(drawPlots) { canvas->Print(dirName+"/"+niceName+".eps"); canvas->Print(dirName+"/"+niceName+".png"); } MainDirStatTest->cd(); canvas->Write(); gROOT->cd(); } */ return; }
void StandardHypoTestDemo(const char* infile = "", const char* workspaceName = "combined", const char* modelSBName = "ModelConfig", const char* modelBName = "", const char* dataName = "obsData", int calcType = 0, // 0 freq 1 hybrid, 2 asymptotic int testStatType = 3, // 0 LEP, 1 TeV, 2 LHC, 3 LHC - one sided int ntoys = 5000, bool useNC = false, const char * nuisPriorName = 0) { /* Other Parameter to pass in tutorial apart from standard for filename, ws, modelconfig and data type = 0 Freq calculator type = 1 Hybrid calculator type = 2 Asymptotic calculator testStatType = 0 LEP = 1 Tevatron = 2 Profile Likelihood = 3 Profile Likelihood one sided (i.e. = 0 if mu < mu_hat) ntoys: number of toys to use useNumberCounting: set to true when using number counting events nuisPriorName: name of prior for the nnuisance. This is often expressed as constraint term in the global model It is needed only when using the HybridCalculator (type=1) If not given by default the prior pdf from ModelConfig is used. extra options are available as global paramwters of the macro. They major ones are: generateBinned generate binned data sets for toys (default is false) - be careful not to activate with a too large (>=3) number of observables nToyRatio ratio of S+B/B toys (default is 2) printLevel */ // disable - can cause some problems //ToyMCSampler::SetAlwaysUseMultiGen(true); SimpleLikelihoodRatioTestStat::SetAlwaysReuseNLL(true); ProfileLikelihoodTestStat::SetAlwaysReuseNLL(true); RatioOfProfiledLikelihoodsTestStat::SetAlwaysReuseNLL(true); //RooRandom::randomGenerator()->SetSeed(0); // to change minimizers // ROOT::Math::MinimizerOptions::SetDefaultStrategy(0); // ROOT::Math::MinimizerOptions::SetDefaultMinimizer("Minuit2"); // ROOT::Math::MinimizerOptions::SetDefaultTolerance(1); ///////////////////////////////////////////////////////////// // First part is just to access a user-defined file // or create the standard example file if it doesn't exist //////////////////////////////////////////////////////////// const char* filename = ""; if (!strcmp(infile,"")) filename = "results/example_combined_GaussExample_model.root"; else filename = infile; // Check if example input file exists TFile *file = TFile::Open(filename); // if input file was specified byt not found, quit if(!file && strcmp(infile,"")){ cout <<"file not found" << endl; return; } // if default file not found, try to create it if(!file ){ // Normally this would be run on the command line cout <<"will run standard hist2workspace example"<<endl; gROOT->ProcessLine(".! prepareHistFactory ."); gROOT->ProcessLine(".! hist2workspace config/example.xml"); cout <<"\n\n---------------------"<<endl; cout <<"Done creating example input"<<endl; cout <<"---------------------\n\n"<<endl; } // now try to access the file again file = TFile::Open(filename); if(!file){ // if it is still not there, then we can't continue cout << "Not able to run hist2workspace to create example input" <<endl; return; } ///////////////////////////////////////////////////////////// // Tutorial starts here //////////////////////////////////////////////////////////// // get the workspace out of the file RooWorkspace* w = (RooWorkspace*) file->Get(workspaceName); if(!w){ cout <<"workspace not found" << endl; return; } w->Print(); // get the modelConfig out of the file ModelConfig* sbModel = (ModelConfig*) w->obj(modelSBName); // get the modelConfig out of the file RooAbsData* data = w->data(dataName); // make sure ingredients are found if(!data || !sbModel){ w->Print(); cout << "data or ModelConfig was not found" <<endl; return; } // make b model ModelConfig* bModel = (ModelConfig*) w->obj(modelBName); // case of no systematics // remove nuisance parameters from model if (noSystematics) { const RooArgSet * nuisPar = sbModel->GetNuisanceParameters(); if (nuisPar && nuisPar->getSize() > 0) { std::cout << "StandardHypoTestInvDemo" << " - Switch off all systematics by setting them constant to their initial values" << std::endl; RooStats::SetAllConstant(*nuisPar); } if (bModel) { const RooArgSet * bnuisPar = bModel->GetNuisanceParameters(); if (bnuisPar) RooStats::SetAllConstant(*bnuisPar); } } if (!bModel ) { Info("StandardHypoTestInvDemo","The background model %s does not exist",modelBName); Info("StandardHypoTestInvDemo","Copy it from ModelConfig %s and set POI to zero",modelSBName); bModel = (ModelConfig*) sbModel->Clone(); bModel->SetName(TString(modelSBName)+TString("B_only")); RooRealVar * var = dynamic_cast<RooRealVar*>(bModel->GetParametersOfInterest()->first()); if (!var) return; double oldval = var->getVal(); var->setVal(0); //bModel->SetSnapshot( RooArgSet(*var, *w->var("lumi")) ); bModel->SetSnapshot( RooArgSet(*var) ); var->setVal(oldval); } if (!sbModel->GetSnapshot() || poiValue > 0) { Info("StandardHypoTestDemo","Model %s has no snapshot - make one using model poi",modelSBName); RooRealVar * var = dynamic_cast<RooRealVar*>(sbModel->GetParametersOfInterest()->first()); if (!var) return; double oldval = var->getVal(); if (poiValue > 0) var->setVal(poiValue); //sbModel->SetSnapshot( RooArgSet(*var, *w->var("lumi") ) ); sbModel->SetSnapshot( RooArgSet(*var) ); if (poiValue > 0) var->setVal(oldval); //sbModel->SetSnapshot( *sbModel->GetParametersOfInterest() ); } // part 1, hypothesis testing SimpleLikelihoodRatioTestStat * slrts = new SimpleLikelihoodRatioTestStat(*bModel->GetPdf(), *sbModel->GetPdf()); // null parameters must includes snapshot of poi plus the nuisance values RooArgSet nullParams(*bModel->GetSnapshot()); if (bModel->GetNuisanceParameters()) nullParams.add(*bModel->GetNuisanceParameters()); slrts->SetNullParameters(nullParams); RooArgSet altParams(*sbModel->GetSnapshot()); if (sbModel->GetNuisanceParameters()) altParams.add(*sbModel->GetNuisanceParameters()); slrts->SetAltParameters(altParams); ProfileLikelihoodTestStat * profll = new ProfileLikelihoodTestStat(*bModel->GetPdf()); RatioOfProfiledLikelihoodsTestStat * ropl = new RatioOfProfiledLikelihoodsTestStat(*bModel->GetPdf(), *sbModel->GetPdf(), sbModel->GetSnapshot()); ropl->SetSubtractMLE(false); if (testStatType == 3) profll->SetOneSidedDiscovery(1); profll->SetPrintLevel(printLevel); // profll.SetReuseNLL(mOptimize); // slrts.SetReuseNLL(mOptimize); // ropl.SetReuseNLL(mOptimize); AsymptoticCalculator::SetPrintLevel(printLevel); HypoTestCalculatorGeneric * hypoCalc = 0; // note here Null is B and Alt is S+B if (calcType == 0) hypoCalc = new FrequentistCalculator(*data, *sbModel, *bModel); else if (calcType == 1) hypoCalc= new HybridCalculator(*data, *sbModel, *bModel); else if (calcType == 2) hypoCalc= new AsymptoticCalculator(*data, *sbModel, *bModel); if (calcType == 0) ((FrequentistCalculator*)hypoCalc)->SetToys(ntoys, ntoys/nToysRatio); if (calcType == 1) ((HybridCalculator*)hypoCalc)->SetToys(ntoys, ntoys/nToysRatio); if (calcType == 2 ) { if (testStatType == 3) ((AsymptoticCalculator*) hypoCalc)->SetOneSidedDiscovery(true); if (testStatType != 2 && testStatType != 3) Warning("StandardHypoTestDemo","Only the PL test statistic can be used with AsymptoticCalculator - use by default a two-sided PL"); } // check for nuisance prior pdf in case of nuisance parameters if (calcType == 1 && (bModel->GetNuisanceParameters() || sbModel->GetNuisanceParameters() )) { RooAbsPdf * nuisPdf = 0; if (nuisPriorName) nuisPdf = w->pdf(nuisPriorName); // use prior defined first in bModel (then in SbModel) if (!nuisPdf) { Info("StandardHypoTestDemo","No nuisance pdf given for the HybridCalculator - try to deduce pdf from the model"); if (bModel->GetPdf() && bModel->GetObservables() ) nuisPdf = RooStats::MakeNuisancePdf(*bModel,"nuisancePdf_bmodel"); else nuisPdf = RooStats::MakeNuisancePdf(*sbModel,"nuisancePdf_sbmodel"); } if (!nuisPdf ) { if (bModel->GetPriorPdf()) { nuisPdf = bModel->GetPriorPdf(); Info("StandardHypoTestDemo","No nuisance pdf given - try to use %s that is defined as a prior pdf in the B model",nuisPdf->GetName()); } else { Error("StandardHypoTestDemo","Cannnot run Hybrid calculator because no prior on the nuisance parameter is specified or can be derived"); return; } } assert(nuisPdf); Info("StandardHypoTestDemo","Using as nuisance Pdf ... " ); nuisPdf->Print(); const RooArgSet * nuisParams = (bModel->GetNuisanceParameters() ) ? bModel->GetNuisanceParameters() : sbModel->GetNuisanceParameters(); RooArgSet * np = nuisPdf->getObservables(*nuisParams); if (np->getSize() == 0) { Warning("StandardHypoTestDemo","Prior nuisance does not depend on nuisance parameters. They will be smeared in their full range"); } delete np; ((HybridCalculator*)hypoCalc)->ForcePriorNuisanceAlt(*nuisPdf); ((HybridCalculator*)hypoCalc)->ForcePriorNuisanceNull(*nuisPdf); } // hypoCalc->ForcePriorNuisanceAlt(*sbModel->GetPriorPdf()); // hypoCalc->ForcePriorNuisanceNull(*bModel->GetPriorPdf()); ToyMCSampler * sampler = (ToyMCSampler *)hypoCalc->GetTestStatSampler(); if (sampler && (calcType == 0 || calcType == 1) ) { // look if pdf is number counting or extended if (sbModel->GetPdf()->canBeExtended() ) { if (useNC) Warning("StandardHypoTestDemo","Pdf is extended: but number counting flag is set: ignore it "); } else { // for not extended pdf if (!useNC) { int nEvents = data->numEntries(); Info("StandardHypoTestDemo","Pdf is not extended: number of events to generate taken from observed data set is %d",nEvents); sampler->SetNEventsPerToy(nEvents); } else { Info("StandardHypoTestDemo","using a number counting pdf"); sampler->SetNEventsPerToy(1); } } if (data->isWeighted() && !generateBinned) { Info("StandardHypoTestDemo","Data set is weighted, nentries = %d and sum of weights = %8.1f but toy generation is unbinned - it would be faster to set generateBinned to true\n",data->numEntries(), data->sumEntries()); } if (generateBinned) sampler->SetGenerateBinned(generateBinned); // set the test statistic if (testStatType == 0) sampler->SetTestStatistic(slrts); if (testStatType == 1) sampler->SetTestStatistic(ropl); if (testStatType == 2 || testStatType == 3) sampler->SetTestStatistic(profll); } HypoTestResult * htr = hypoCalc->GetHypoTest(); htr->SetPValueIsRightTail(true); htr->SetBackgroundAsAlt(false); htr->Print(); // how to get meaningfull CLs at this point? delete sampler; delete slrts; delete ropl; delete profll; if (calcType != 2) { HypoTestPlot * plot = new HypoTestPlot(*htr,100); plot->SetLogYaxis(true); plot->Draw(); } else { std::cout << "Asymptotic results " << std::endl; } // look at expected significances // found median of S+B distribution if (calcType != 2) { SamplingDistribution * altDist = htr->GetAltDistribution(); HypoTestResult htExp("Expected Result"); htExp.Append(htr); // find quantiles in alt (S+B) distribution double p[5]; double q[5]; for (int i = 0; i < 5; ++i) { double sig = -2 + i; p[i] = ROOT::Math::normal_cdf(sig,1); } std::vector<double> values = altDist->GetSamplingDistribution(); TMath::Quantiles( values.size(), 5, &values[0], q, p, false); for (int i = 0; i < 5; ++i) { htExp.SetTestStatisticData( q[i] ); double sig = -2 + i; std::cout << " Expected p -value and significance at " << sig << " sigma = " << htExp.NullPValue() << " significance " << htExp.Significance() << " sigma " << std::endl; } } else { // case of asymptotic calculator for (int i = 0; i < 5; ++i) { double sig = -2 + i; // sigma is inverted here double pval = AsymptoticCalculator::GetExpectedPValues( htr->NullPValue(), htr->AlternatePValue(), -sig, false); std::cout << " Expected p -value and significance at " << sig << " sigma = " << pval << " significance " << ROOT::Math::normal_quantile_c(pval,1) << " sigma " << std::endl; } } }
result fit_toy(RooWorkspace* wspace, int n, const RooArgSet* globals) { RooRandom::randomGenerator()->SetSeed(0); // TFile f(filename); // RooWorkspace *wspace = (RooWorkspace*)f.Get("combined"); ModelConfig* model = (ModelConfig*)wspace->obj("ModelConfig"); RooAbsPdf* pdf; pdf = model->GetPdf(); RooAbsPdf* top_constraint = (RooAbsPdf*)wspace->obj("top_ratio_constraint"); RooAbsPdf* vv_constraint = (RooAbsPdf*)wspace->obj("vv_ratio_constraint"); RooAbsPdf* top_vv_constraint_sf = (RooAbsPdf*)wspace->obj("top_vv_ratio_sf_constraint"); RooAbsPdf* top_vv_constraint_of = (RooAbsPdf*)wspace->obj("top_vv_ratio_of_constraint"); // generate constraint global observables RooRealVar *nom_top_ratio = (RooRealVar*)wspace->obj("nom_top_ratio"); nom_top_ratio->setRange(0, 100); RooRealVar *nom_vv_ratio = (RooRealVar*)wspace->obj("nom_vv_ratio"); nom_vv_ratio->setRange(0,100); RooRealVar *nom_top_vv_ratio_sf = (RooRealVar*)wspace->obj("nom_top_vv_ratio_sf"); nom_top_vv_ratio_sf->setRange(0,100); RooRealVar *nom_top_vv_ratio_of = (RooRealVar*)wspace->obj("nom_top_vv_ratio_of"); nom_top_vv_ratio_of->setRange(0,100); RooDataSet *nom_top_generated = top_constraint->generateSimGlobal(RooArgSet(*nom_top_ratio), 1); nom_top_ratio->setVal(((RooRealVar*)nom_top_generated->get(0)->find("nom_top_ratio"))->getVal()); RooDataSet *nom_vv_generated = vv_constraint->generateSimGlobal(RooArgSet(*nom_vv_ratio), 1); nom_vv_ratio->setVal(((RooRealVar*)nom_vv_generated->get(0)->find("nom_vv_ratio"))->getVal()); RooDataSet *nom_top_vv_sf_generated = top_vv_constraint_sf->generateSimGlobal(RooArgSet(*nom_top_vv_ratio_sf), 1); nom_top_vv_ratio_sf->setVal(((RooRealVar*)nom_top_vv_sf_generated->get(0)->find("nom_top_vv_ratio_sf"))->getVal()); RooDataSet *nom_top_vv_of_generated = top_vv_constraint_of->generateSimGlobal(RooArgSet(*nom_top_vv_ratio_of), 1); nom_top_vv_ratio_of->setVal(((RooRealVar*)nom_top_vv_of_generated->get(0)->find("nom_top_vv_ratio_of"))->getVal()); NumEventsTestStat* dummy = new NumEventsTestStat(*pdf); ToyMCSampler* mc = new ToyMCSampler(*dummy, 1); mc->SetPdf(*pdf); mc->SetObservables(*model->GetObservables()); mc->SetGlobalObservables(*globals); mc->SetNuisanceParameters(*model->GetNuisanceParameters()); mc->SetParametersForTestStat(*model->GetParametersOfInterest()); mc->SetNEventsPerToy(n); RooArgSet constr; constr.add(*(model->GetNuisanceParameters())); RemoveConstantParameters(&constr); RooDataSet* toy_data = (RooDataSet*)mc->GenerateToyData(*const_cast<RooArgSet*>(model->GetSnapshot())); RooFitResult *res = pdf->fitTo(*toy_data, Constrain(constr), PrintLevel(0), Save(), Range("fitRange"), InitialHesse(), ExternalConstraints(RooArgSet(*top_constraint, *vv_constraint, *top_vv_constraint_sf, *top_vv_constraint_of))); result yield = get_results(wspace, res); yield.of.generated_sum.val = toy_data->sumEntries("(channelCat==channelCat::of) & (obs_x_of>120)"); yield.sf.generated_sum.val = toy_data->sumEntries("(channelCat==channelCat::sf) & (obs_x_sf>120)"); delete mc; delete dummy; // f.Close(); return yield; }
void HypoTestInvDemo(const char * fileName ="GausModel_b.root", const char * wsName = "w", const char * modelSBName = "model_sb", const char * modelBName = "model_b", const char * dataName = "data_obs", int type = 0, // calculator type int testStatType = 0, // test stat type int npoints = 10, int ntoys=1000, bool useCls = true ) { /* type = 0 Freq calculator type = 1 Hybrid testStatType = 0 LEP = 1 Tevatron = 2 PL */ if (fileName==0) { std::cout << "give input filename " << std::endl; return; } TFile * file = new TFile(fileName); RooWorkspace * w = dynamic_cast<RooWorkspace*>( file->Get(wsName) ); if (!w) { return; } w->Print(); RooAbsData * data = w->data(dataName); if (!data) { Error("HypoTestDemo","Not existing data %s",dataName); } // get models from WS // get the modelConfig out of the file ModelConfig* bModel = (ModelConfig*) w->obj(modelBName); ModelConfig* sbModel = (ModelConfig*) w->obj(modelSBName); SimpleLikelihoodRatioTestStat slrts(*bModel->GetPdf(),*sbModel->GetPdf()); slrts.SetNullParameters(*bModel->GetSnapshot()); slrts.SetAltParameters(*sbModel->GetSnapshot()); RatioOfProfiledLikelihoodsTestStat ropl(*bModel->GetPdf(), *sbModel->GetPdf(), sbModel->GetSnapshot()); ropl.SetSubtractMLE(false); ProfileLikelihoodTestStat profll(*sbModel->GetPdf()); profll.SetOneSided(0); TestStatistic * testStat = &slrts; if (testStatType == 1) testStat = &ropl; if (testStatType == 2) testStat = &profll; HypoTestCalculatorGeneric * hc = 0; if (type == 0) hc = new FrequentistCalculator(*data, *sbModel, *bModel); else new HybridCalculator(*data, *sbModel, *bModel); ToyMCSampler *toymcs = (ToyMCSampler*)hc->GetTestStatSampler(); //toymcs->SetNEventsPerToy(1); toymcs->SetTestStatistic(testStat); if (type == 1) { HybridCalculator *hhc = (HybridCalculator*) hc; hhc->SetToys(ntoys,ntoys); // hhc->ForcePriorNuisanceAlt(*pdfNuis); // hhc->ForcePriorNuisanceNull(*pdfNuis); } else ((FrequentistCalculator*) hc)->SetToys(ntoys,ntoys); // Get the result RooMsgService::instance().getStream(1).removeTopic(RooFit::NumIntegration); TStopwatch tw; tw.Start(); const RooArgSet * poiSet = sbModel->GetParametersOfInterest(); RooRealVar *poi = (RooRealVar*)poiSet->first(); // fit the data first sbModel->GetPdf()->fitTo(*data); double poihat = poi->getVal(); //poi->setVal(30); //poi->setError(10); HypoTestInverter calc(*hc); // GENA: for two-sided interval //calc.SetConfidenceLevel(0.95); // GENA: for 95% upper limit calc.SetConfidenceLevel(0.90); calc.UseCLs(useCls); calc.SetVerbose(true); // can spped up using proof ProofConfig pc(*w, 2, "workers=2", kFALSE); //ProofConfig pc(*w, 30, "localhost", kFALSE); //ToyMCSampler * toymcs = dynamic_cast<ToyMCSampler *> (calc.GetHypoTestCalculator()->GetTestStatSampler() ); // GENA: disable proof for now //toymcs->SetProofConfig(&pc); // enable proof if (npoints > 0) { // GENA double poimin = TMath::Max(poihat - 4 * poi->getError(), 0.0); //poimin = poihat; double poimax = poihat + 4 * poi->getError(); poimin = 0; poimax = 20; //double poimin = poi->getMin(); //double poimax = poi->getMax(); std::cout << "Doing a fixed scan in interval : " << poimin << " , " << poimax << std::endl; calc.SetFixedScan(npoints,poimin,poimax); } HypoTestInverterResult * r = calc.GetInterval(); // write to a file the results TString resultFileName = (useCls) ? "CLs_" : "Cls+b_"; resultFileName += fileName; // GENA //TFile * file = new TFile(resultFileName,"RECREATE"); file = new TFile(resultFileName,"RECREATE"); r->Write(); file->Close(); double ulError = r->UpperLimitEstimatedError(); double upperLimit = r->UpperLimit(); std::cout << "The computed upper limit is: " << upperLimit << std::endl; std::cout << "an estimated error on this upper limit is: " << ulError << std::endl; // check using interpolation // double interpLimit = r->FindInterpolatedLimit(1.-r->ConfidenceLevel() ); // cout << "The computer interpolated limits is " << interpLimit << endl; const int nEntries = r->ArraySize(); std::vector<Double_t> xArray(nEntries); std::vector<Double_t> yArray(nEntries); std::vector<Double_t> yErrArray(nEntries); for (int i=0; i<nEntries; i++) { xArray[i] = r->GetXValue(i); yArray[i] = r->GetYValue(i); yErrArray[i] = r->GetYError(i); std::cout << xArray[i] << " , " << yArray[i] << " err = " << yErrArray[i] << std::endl; } // see expected result (bands) TGraph * g0 = new TGraph(nEntries); TGraphAsymmErrors * g1 = new TGraphAsymmErrors(nEntries); TGraphAsymmErrors * g2l = new TGraphAsymmErrors(nEntries); TGraphAsymmErrors * g2u = new TGraphAsymmErrors(nEntries); double p[7]; double q[7]; p[0] = ROOT::Math::normal_cdf(-2); p[1] = ROOT::Math::normal_cdf(-1.5); p[2] = ROOT::Math::normal_cdf(-1); p[3] = 0.5; p[4] = ROOT::Math::normal_cdf(1); p[5] = ROOT::Math::normal_cdf(1.5); p[6] = ROOT::Math::normal_cdf(2); for (int i=0; i<nEntries; i++) { SamplingDistribution * s = r->GetExpectedDistribution(i); // GENA //const std::vector<double> & values = s->GetSamplingDistribution(); const std::vector<Double_t> & cValues = s->GetSamplingDistribution(); std::vector<Double_t> values; for (std::vector<Double_t>::const_iterator val = cValues.begin(); val != cValues.end(); ++val) values.push_back(*val); TMath::Quantiles(values.size(), 7, &values[0],q,p,false); double p0 = q[3]; double p2l = q[1]; double p2u = q[5]; g0->SetPoint(i, r->GetXValue(i), p0 ) ; g1->SetPoint(i, r->GetXValue(i), p0); g2l->SetPoint(i, r->GetXValue(i), p2l); g2u->SetPoint(i, r->GetXValue(i), p2u); //g2->SetPoint(i, r->GetXValue(i), s->InverseCDF(0.50)); g1->SetPointEYlow(i, q[3] - q[2]); // -1 sigma errorr g1->SetPointEYhigh(i, q[4] - q[3]);//+1 sigma error g2l->SetPointEYlow(i, q[1]-q[0]); // -2 -- -1 sigma error g2l->SetPointEYhigh(i, q[2]-q[1]); g2u->SetPointEYlow(i, q[5]-q[4]); g2u->SetPointEYhigh(i, q[6]-q[5]); if (plotHypoTestResult) { HypoTestResult * hr = new HypoTestResult(); hr->SetNullDistribution( r->GetBackgroundDistribution() ); hr->SetAltDistribution( r->GetSignalAndBackgroundDistribution(i) ); new TCanvas(); HypoTestPlot * pl = new HypoTestPlot(*hr); pl->Draw(); } } HypoTestInverterPlot *plot = new HypoTestInverterPlot("result","",r); TGraphErrors * g = plot->MakePlot(); g->Draw("APL"); g2l->SetFillColor(kYellow); g2l->Draw("3"); g2u->SetFillColor(kYellow); g2u->Draw("3"); g1->SetFillColor(kGreen); g1->Draw("3"); g0->SetLineColor(kBlue); g0->SetLineStyle(2); g0->SetLineWidth(2); g0->Draw("L"); //g1->Draw("P"); //g2->Draw("P"); g->SetLineWidth(2); g->Draw("PL"); // GENA: two-sided interval //double alpha = 1.-r->ConfidenceLevel(); // GENA: upper limit double alpha = (1.-r->ConfidenceLevel())/2.0; double x1 = g->GetXaxis()->GetXmin(); double x2 = g->GetXaxis()->GetXmax(); TLine * line = new TLine(x1, alpha, x2,alpha); line->SetLineColor(kRed); line->Draw(); // see the expected limit and -1 +1 sigma bands // SamplingDistribution * limits = r->GetUpperLimitDistribution(); // std::cout << " expected limit (median) " << limits->InverseCDF(0.50) << std::endl; // std::cout << " expected limit (-1 sig) " << limits->InverseCDF((ROOT::Math::normal_cdf(-1))) << std::endl; // std::cout << " expected limit (+1 sig) " << limits->InverseCDF((ROOT::Math::normal_cdf(+1))) << std::endl; tw.Print(); }