//____________________________________ void DoHypothesisTest(RooWorkspace* wks){ // Use a RooStats ProfileLikleihoodCalculator to do the hypothesis test. ModelConfig model; model.SetWorkspace(*wks); model.SetPdf("model"); //plc.SetData("data"); ProfileLikelihoodCalculator plc; plc.SetData( *(wks->data("data") )); // here we explicitly set the value of the parameters for the null. // We want no signal contribution, eg. mu = 0 RooRealVar* mu = wks->var("mu"); // RooArgSet* nullParams = new RooArgSet("nullParams"); // nullParams->addClone(*mu); RooArgSet poi(*mu); RooArgSet * nullParams = (RooArgSet*) poi.snapshot(); nullParams->setRealValue("mu",0); //plc.SetNullParameters(*nullParams); plc.SetModel(model); // NOTE: using snapshot will import nullparams // in the WS and merge with existing "mu" // model.SetSnapshot(*nullParams); //use instead setNuisanceParameters plc.SetNullParameters( *nullParams); // We get a HypoTestResult out of the calculator, and we can query it. HypoTestResult* htr = plc.GetHypoTest(); cout << "-------------------------------------------------" << endl; cout << "The p-value for the null is " << htr->NullPValue() << endl; cout << "Corresponding to a signifcance of " << htr->Significance() << endl; cout << "-------------------------------------------------\n\n" << endl; }
void RA2bHypoTestInvDemo(const char * fileName =0, const char * wsName = "combined", const char * modelSBName = "ModelConfig", const char * modelBName = "", const char * dataName = "obsData", int calculatorType = 0, int testStatType = 3, bool useCls = true , int npoints = 5, double poimin = 0, double poimax = 5, int ntoys=1000, int mgl = -1, int mlsp = -1, const char * outFileName = "test") { /* Other Parameter to pass in tutorial apart from standard for filename, ws, modelconfig and data type = 0 Freq calculator type = 1 Hybrid testStatType = 0 LEP = 1 Tevatron = 2 Profile Likelihood = 3 Profile Likelihood one sided (i.e. = 0 if mu < mu_hat) useCLs scan for CLs (otherwise for CLs+b) npoints: number of points to scan , for autoscan set npoints = -1 poimin,poimax: min/max value to scan in case of fixed scans (if min >= max, try to find automatically) ntoys: number of toys to use extra options are available as global paramters of the macro. They are: plotHypoTestResult plot result of tests at each point (TS distributions) useProof = true; writeResult = true; nworkers = 4; */ if (fileName==0) { fileName = "results/example_combined_GaussExample_model.root"; std::cout << "Use standard file generated with HistFactory :" << fileName << std::endl; } TFile * file = new TFile(fileName); RooWorkspace * w = dynamic_cast<RooWorkspace*>( file->Get(wsName) ); HypoTestInverterResult * r = 0; std::cout << w << "\t" << fileName << std::endl; if (w != NULL) { r = RunInverter(w, modelSBName, modelBName, dataName, calculatorType, testStatType, npoints, poimin, poimax, ntoys, useCls ); if (!r) { std::cerr << "Error running the HypoTestInverter - Exit " << std::endl; return; } } else { // case workspace is not present look for the inverter result std::cout << "Reading an HypoTestInverterResult with name " << wsName << " from file " << fileName << std::endl; r = dynamic_cast<HypoTestInverterResult*>( file->Get(wsName) ); // if (!r) { std::cerr << "File " << fileName << " does not contain a workspace or an HypoTestInverterResult - Exit " << std::endl; file->ls(); return; } } printf("\n\n") ; HypoTestResult* htr = r->GetResult(0) ; printf(" Data value for test stat : %7.3f\n", htr->GetTestStatisticData() ) ; printf(" CLsplusb : %9.4f\n", r->CLsplusb(0) ) ; printf(" CLb : %9.4f\n", r->CLb(0) ) ; printf(" CLs : %9.4f\n", r->CLs(0) ) ; printf("\n\n") ; cout << flush ; double upperLimit = r->UpperLimit(); double ulError = r->UpperLimitEstimatedError(); std::cout << "The computed upper limit is: " << upperLimit << " +/- " << ulError << std::endl; const int nEntries = r->ArraySize(); const char * typeName = (calculatorType == 0) ? "Frequentist" : "Hybrid"; const char * resultName = (w) ? w->GetName() : r->GetName(); TString plotTitle = TString::Format("%s CL Scan for workspace %s",typeName,resultName); HypoTestInverterPlot *plot = new HypoTestInverterPlot("HTI_Result_Plot",plotTitle,r); TCanvas* c1 = new TCanvas() ; plot->Draw("CLb 2CL"); // plot all and Clb c1->Update() ; c1->SaveAs("cls-canv1.png") ; c1->SaveAs("cls-canv1.pdf") ; if (plotHypoTestResult) { TCanvas * c2 = new TCanvas(); c2->Divide( 2, TMath::Ceil(nEntries/2)); for (int i=0; i<nEntries; i++) { c2->cd(i+1); SamplingDistPlot * pl = plot->MakeTestStatPlot(i); pl->SetLogYaxis(true); pl->Draw(); } c2->Update() ; c2->SaveAs("cls-canv2.png") ; c2->SaveAs("cls-canv2.pdf") ; } std::cout << " expected limit (median) " << r->GetExpectedUpperLimit(0) << std::endl; std::cout << " expected limit (-1 sig) " << r->GetExpectedUpperLimit(-1) << std::endl; std::cout << " expected limit (+1 sig) " << r->GetExpectedUpperLimit(1) << std::endl; // save 2d histograms bin to file TH2F *result = new TH2F("result","result",22,100,1200,23,50,1200); TH2F *exp_res = new TH2F("exp_res","exp_res",22,100,1200,23,50,1200); TH2F *exp_res_minus = new TH2F("exp_res_minus","exp_res_minus",22,100,1200,23,50,1200); TH2F *exp_res_plus = new TH2F("exp_res_plus","exp_res_plus",22,100,1200,23,50,1200); result->Fill(mgl,mlsp,upperLimit); exp_res->Fill(mgl,mlsp,r->GetExpectedUpperLimit(0)); exp_res_minus->Fill(mgl,mlsp,r->GetExpectedUpperLimit(-1)); exp_res_plus->Fill(mgl,mlsp,r->GetExpectedUpperLimit(1)); TFile *f = new TFile(outFileName,"RECREATE"); f->cd(); result->Write(); exp_res->Write(); exp_res_minus->Write(); exp_res_plus->Write(); f->Close(); if (w != NULL && writeResult) { // write to a file the results const char * calcType = (calculatorType == 0) ? "Freq" : "Hybr"; const char * limitType = (useCls) ? "CLs" : "Cls+b"; const char * scanType = (npoints < 0) ? "auto" : "grid"; TString resultFileName = TString::Format("%s_%s_%s_ts%d_",calcType,limitType,scanType,testStatType); resultFileName += fileName; TFile * fileOut = new TFile(resultFileName,"RECREATE"); r->Write(); fileOut->Close(); } }
double StandardFrequentistDiscovery( const char* infile = "", const char* workspaceName = "channel1", const char* modelConfigNameSB = "ModelConfig", const char* dataName = "obsData", int toys = 1000, double poiValueForBackground = 0.0, double poiValueForSignal = 1.0 ) { // The workspace contains the model for s+b. The b model is "autogenerated" // by copying s+b and setting the one parameter of interest to zero. // To keep the script simple, multiple parameters of interest or different // functional forms of the b model are not supported. // for now, assume there is only one parameter of interest, and these are // its values: ///////////////////////////////////////////////////////////// // First part is just to access a user-defined file // or create the standard example file if it doesn't exist //////////////////////////////////////////////////////////// const char* filename = ""; if (!strcmp(infile,"")) { filename = "results/example_channel1_GammaExample_model.root"; bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code // if file does not exists generate with histfactory if (!fileExist) { #ifdef _WIN32 cout << "HistFactory file cannot be generated on Windows - exit" << endl; return -1; #endif // Normally this would be run on the command line cout <<"will run standard hist2workspace example"<<endl; gROOT->ProcessLine(".! prepareHistFactory ."); gROOT->ProcessLine(".! hist2workspace config/example.xml"); cout <<"\n\n---------------------"<<endl; cout <<"Done creating example input"<<endl; cout <<"---------------------\n\n"<<endl; } } else filename = infile; // Try to open the file TFile *file = TFile::Open(filename); // if input file was specified byt not found, quit if(!file ){ cout <<"StandardRooStatsDemoMacro: Input file " << filename << " is not found" << endl; return -1; } ///////////////////////////////////////////////////////////// // Tutorial starts here //////////////////////////////////////////////////////////// TStopwatch *mn_t = new TStopwatch; mn_t->Start(); // get the workspace out of the file RooWorkspace* w = (RooWorkspace*) file->Get(workspaceName); if (!w) { cout << "workspace not found" << endl; return -1.0; } // get the modelConfig out of the file ModelConfig* mc = (ModelConfig*) w->obj(modelConfigNameSB); // get the data out of the file RooAbsData* data = w->data(dataName); // make sure ingredients are found if (!data || !mc) { w->Print(); cout << "data or ModelConfig was not found" << endl; return -1.0; } RooRealVar* firstPOI = (RooRealVar*) mc->GetParametersOfInterest()->first(); firstPOI->setVal(poiValueForSignal); mc->SetSnapshot(*mc->GetParametersOfInterest()); // create null model ModelConfig *mcNull = mc->Clone("ModelConfigNull"); firstPOI->setVal(poiValueForBackground); mcNull->SetSnapshot(*(RooArgSet*)mcNull->GetParametersOfInterest()->snapshot()); // ---------------------------------------------------- // Configure a ProfileLikelihoodTestStat and a SimpleLikelihoodRatioTestStat // to use simultaneously with ToyMCSampler ProfileLikelihoodTestStat* plts = new ProfileLikelihoodTestStat(*mc->GetPdf()); plts->SetOneSidedDiscovery(true); plts->SetVarName( "q_{0}/2" ); // ---------------------------------------------------- // configure the ToyMCImportanceSampler with two test statistics ToyMCSampler toymcs(*plts, 50); // Since this tool needs to throw toy MC the PDF needs to be // extended or the tool needs to know how many entries in a dataset // per pseudo experiment. // In the 'number counting form' where the entries in the dataset // are counts, and not values of discriminating variables, the // datasets typically only have one entry and the PDF is not // extended. if (!mc->GetPdf()->canBeExtended()) { if (data->numEntries() == 1) { toymcs.SetNEventsPerToy(1); } else cout << "Not sure what to do about this model" << endl; } // We can use PROOF to speed things along in parallel // ProofConfig pc(*w, 2, "user@yourfavoriteproofcluster", false); ProofConfig pc(*w, 2, "", false); //toymcs.SetProofConfig(&pc); // enable proof // instantiate the calculator FrequentistCalculator freqCalc(*data, *mc, *mcNull, &toymcs); freqCalc.SetToys( toys,toys ); // null toys, alt toys // Run the calculator and print result HypoTestResult* freqCalcResult = freqCalc.GetHypoTest(); freqCalcResult->GetNullDistribution()->SetTitle( "b only" ); freqCalcResult->GetAltDistribution()->SetTitle( "s+b" ); freqCalcResult->Print(); double pvalue = freqCalcResult->NullPValue(); // stop timing mn_t->Stop(); cout << "total CPU time: " << mn_t->CpuTime() << endl; cout << "total real time: " << mn_t->RealTime() << endl; // plot TCanvas* c1 = new TCanvas(); HypoTestPlot *plot = new HypoTestPlot(*freqCalcResult, 100, -0.49, 9.51 ); plot->SetLogYaxis(true); // add chi2 to plot int nPOI = 1; TF1* f = new TF1("f", TString::Format("1*ROOT::Math::chisquared_pdf(2*x,%d,0)",nPOI), 0,20); f->SetLineColor( kBlack ); f->SetLineStyle( 7 ); plot->AddTF1( f, TString::Format("#chi^{2}(2x,%d)",nPOI) ); plot->Draw(); c1->SaveAs("standard_discovery_output.pdf"); return pvalue; }
void StandardHypoTestDemo(const char* infile = "", const char* workspaceName = "combined", const char* modelSBName = "ModelConfig", const char* modelBName = "", const char* dataName = "obsData", int calcType = 0, // 0 freq 1 hybrid, 2 asymptotic int testStatType = 3, // 0 LEP, 1 TeV, 2 LHC, 3 LHC - one sided int ntoys = 5000, bool useNC = false, const char * nuisPriorName = 0) { /* Other Parameter to pass in tutorial apart from standard for filename, ws, modelconfig and data type = 0 Freq calculator type = 1 Hybrid calculator type = 2 Asymptotic calculator testStatType = 0 LEP = 1 Tevatron = 2 Profile Likelihood = 3 Profile Likelihood one sided (i.e. = 0 if mu < mu_hat) ntoys: number of toys to use useNumberCounting: set to true when using number counting events nuisPriorName: name of prior for the nnuisance. This is often expressed as constraint term in the global model It is needed only when using the HybridCalculator (type=1) If not given by default the prior pdf from ModelConfig is used. extra options are available as global paramwters of the macro. They major ones are: generateBinned generate binned data sets for toys (default is false) - be careful not to activate with a too large (>=3) number of observables nToyRatio ratio of S+B/B toys (default is 2) printLevel */ // disable - can cause some problems //ToyMCSampler::SetAlwaysUseMultiGen(true); SimpleLikelihoodRatioTestStat::SetAlwaysReuseNLL(true); ProfileLikelihoodTestStat::SetAlwaysReuseNLL(true); RatioOfProfiledLikelihoodsTestStat::SetAlwaysReuseNLL(true); //RooRandom::randomGenerator()->SetSeed(0); // to change minimizers // ROOT::Math::MinimizerOptions::SetDefaultStrategy(0); // ROOT::Math::MinimizerOptions::SetDefaultMinimizer("Minuit2"); // ROOT::Math::MinimizerOptions::SetDefaultTolerance(1); ///////////////////////////////////////////////////////////// // First part is just to access a user-defined file // or create the standard example file if it doesn't exist //////////////////////////////////////////////////////////// const char* filename = ""; if (!strcmp(infile,"")) filename = "results/example_combined_GaussExample_model.root"; else filename = infile; // Check if example input file exists TFile *file = TFile::Open(filename); // if input file was specified byt not found, quit if(!file && strcmp(infile,"")){ cout <<"file not found" << endl; return; } // if default file not found, try to create it if(!file ){ // Normally this would be run on the command line cout <<"will run standard hist2workspace example"<<endl; gROOT->ProcessLine(".! prepareHistFactory ."); gROOT->ProcessLine(".! hist2workspace config/example.xml"); cout <<"\n\n---------------------"<<endl; cout <<"Done creating example input"<<endl; cout <<"---------------------\n\n"<<endl; } // now try to access the file again file = TFile::Open(filename); if(!file){ // if it is still not there, then we can't continue cout << "Not able to run hist2workspace to create example input" <<endl; return; } ///////////////////////////////////////////////////////////// // Tutorial starts here //////////////////////////////////////////////////////////// // get the workspace out of the file RooWorkspace* w = (RooWorkspace*) file->Get(workspaceName); if(!w){ cout <<"workspace not found" << endl; return; } w->Print(); // get the modelConfig out of the file ModelConfig* sbModel = (ModelConfig*) w->obj(modelSBName); // get the modelConfig out of the file RooAbsData* data = w->data(dataName); // make sure ingredients are found if(!data || !sbModel){ w->Print(); cout << "data or ModelConfig was not found" <<endl; return; } // make b model ModelConfig* bModel = (ModelConfig*) w->obj(modelBName); // case of no systematics // remove nuisance parameters from model if (noSystematics) { const RooArgSet * nuisPar = sbModel->GetNuisanceParameters(); if (nuisPar && nuisPar->getSize() > 0) { std::cout << "StandardHypoTestInvDemo" << " - Switch off all systematics by setting them constant to their initial values" << std::endl; RooStats::SetAllConstant(*nuisPar); } if (bModel) { const RooArgSet * bnuisPar = bModel->GetNuisanceParameters(); if (bnuisPar) RooStats::SetAllConstant(*bnuisPar); } } if (!bModel ) { Info("StandardHypoTestInvDemo","The background model %s does not exist",modelBName); Info("StandardHypoTestInvDemo","Copy it from ModelConfig %s and set POI to zero",modelSBName); bModel = (ModelConfig*) sbModel->Clone(); bModel->SetName(TString(modelSBName)+TString("B_only")); RooRealVar * var = dynamic_cast<RooRealVar*>(bModel->GetParametersOfInterest()->first()); if (!var) return; double oldval = var->getVal(); var->setVal(0); //bModel->SetSnapshot( RooArgSet(*var, *w->var("lumi")) ); bModel->SetSnapshot( RooArgSet(*var) ); var->setVal(oldval); } if (!sbModel->GetSnapshot() || poiValue > 0) { Info("StandardHypoTestDemo","Model %s has no snapshot - make one using model poi",modelSBName); RooRealVar * var = dynamic_cast<RooRealVar*>(sbModel->GetParametersOfInterest()->first()); if (!var) return; double oldval = var->getVal(); if (poiValue > 0) var->setVal(poiValue); //sbModel->SetSnapshot( RooArgSet(*var, *w->var("lumi") ) ); sbModel->SetSnapshot( RooArgSet(*var) ); if (poiValue > 0) var->setVal(oldval); //sbModel->SetSnapshot( *sbModel->GetParametersOfInterest() ); } // part 1, hypothesis testing SimpleLikelihoodRatioTestStat * slrts = new SimpleLikelihoodRatioTestStat(*bModel->GetPdf(), *sbModel->GetPdf()); // null parameters must includes snapshot of poi plus the nuisance values RooArgSet nullParams(*bModel->GetSnapshot()); if (bModel->GetNuisanceParameters()) nullParams.add(*bModel->GetNuisanceParameters()); slrts->SetNullParameters(nullParams); RooArgSet altParams(*sbModel->GetSnapshot()); if (sbModel->GetNuisanceParameters()) altParams.add(*sbModel->GetNuisanceParameters()); slrts->SetAltParameters(altParams); ProfileLikelihoodTestStat * profll = new ProfileLikelihoodTestStat(*bModel->GetPdf()); RatioOfProfiledLikelihoodsTestStat * ropl = new RatioOfProfiledLikelihoodsTestStat(*bModel->GetPdf(), *sbModel->GetPdf(), sbModel->GetSnapshot()); ropl->SetSubtractMLE(false); if (testStatType == 3) profll->SetOneSidedDiscovery(1); profll->SetPrintLevel(printLevel); // profll.SetReuseNLL(mOptimize); // slrts.SetReuseNLL(mOptimize); // ropl.SetReuseNLL(mOptimize); AsymptoticCalculator::SetPrintLevel(printLevel); HypoTestCalculatorGeneric * hypoCalc = 0; // note here Null is B and Alt is S+B if (calcType == 0) hypoCalc = new FrequentistCalculator(*data, *sbModel, *bModel); else if (calcType == 1) hypoCalc= new HybridCalculator(*data, *sbModel, *bModel); else if (calcType == 2) hypoCalc= new AsymptoticCalculator(*data, *sbModel, *bModel); if (calcType == 0) ((FrequentistCalculator*)hypoCalc)->SetToys(ntoys, ntoys/nToysRatio); if (calcType == 1) ((HybridCalculator*)hypoCalc)->SetToys(ntoys, ntoys/nToysRatio); if (calcType == 2 ) { if (testStatType == 3) ((AsymptoticCalculator*) hypoCalc)->SetOneSidedDiscovery(true); if (testStatType != 2 && testStatType != 3) Warning("StandardHypoTestDemo","Only the PL test statistic can be used with AsymptoticCalculator - use by default a two-sided PL"); } // check for nuisance prior pdf in case of nuisance parameters if (calcType == 1 && (bModel->GetNuisanceParameters() || sbModel->GetNuisanceParameters() )) { RooAbsPdf * nuisPdf = 0; if (nuisPriorName) nuisPdf = w->pdf(nuisPriorName); // use prior defined first in bModel (then in SbModel) if (!nuisPdf) { Info("StandardHypoTestDemo","No nuisance pdf given for the HybridCalculator - try to deduce pdf from the model"); if (bModel->GetPdf() && bModel->GetObservables() ) nuisPdf = RooStats::MakeNuisancePdf(*bModel,"nuisancePdf_bmodel"); else nuisPdf = RooStats::MakeNuisancePdf(*sbModel,"nuisancePdf_sbmodel"); } if (!nuisPdf ) { if (bModel->GetPriorPdf()) { nuisPdf = bModel->GetPriorPdf(); Info("StandardHypoTestDemo","No nuisance pdf given - try to use %s that is defined as a prior pdf in the B model",nuisPdf->GetName()); } else { Error("StandardHypoTestDemo","Cannnot run Hybrid calculator because no prior on the nuisance parameter is specified or can be derived"); return; } } assert(nuisPdf); Info("StandardHypoTestDemo","Using as nuisance Pdf ... " ); nuisPdf->Print(); const RooArgSet * nuisParams = (bModel->GetNuisanceParameters() ) ? bModel->GetNuisanceParameters() : sbModel->GetNuisanceParameters(); RooArgSet * np = nuisPdf->getObservables(*nuisParams); if (np->getSize() == 0) { Warning("StandardHypoTestDemo","Prior nuisance does not depend on nuisance parameters. They will be smeared in their full range"); } delete np; ((HybridCalculator*)hypoCalc)->ForcePriorNuisanceAlt(*nuisPdf); ((HybridCalculator*)hypoCalc)->ForcePriorNuisanceNull(*nuisPdf); } // hypoCalc->ForcePriorNuisanceAlt(*sbModel->GetPriorPdf()); // hypoCalc->ForcePriorNuisanceNull(*bModel->GetPriorPdf()); ToyMCSampler * sampler = (ToyMCSampler *)hypoCalc->GetTestStatSampler(); if (sampler && (calcType == 0 || calcType == 1) ) { // look if pdf is number counting or extended if (sbModel->GetPdf()->canBeExtended() ) { if (useNC) Warning("StandardHypoTestDemo","Pdf is extended: but number counting flag is set: ignore it "); } else { // for not extended pdf if (!useNC) { int nEvents = data->numEntries(); Info("StandardHypoTestDemo","Pdf is not extended: number of events to generate taken from observed data set is %d",nEvents); sampler->SetNEventsPerToy(nEvents); } else { Info("StandardHypoTestDemo","using a number counting pdf"); sampler->SetNEventsPerToy(1); } } if (data->isWeighted() && !generateBinned) { Info("StandardHypoTestDemo","Data set is weighted, nentries = %d and sum of weights = %8.1f but toy generation is unbinned - it would be faster to set generateBinned to true\n",data->numEntries(), data->sumEntries()); } if (generateBinned) sampler->SetGenerateBinned(generateBinned); // set the test statistic if (testStatType == 0) sampler->SetTestStatistic(slrts); if (testStatType == 1) sampler->SetTestStatistic(ropl); if (testStatType == 2 || testStatType == 3) sampler->SetTestStatistic(profll); } HypoTestResult * htr = hypoCalc->GetHypoTest(); htr->SetPValueIsRightTail(true); htr->SetBackgroundAsAlt(false); htr->Print(); // how to get meaningfull CLs at this point? delete sampler; delete slrts; delete ropl; delete profll; if (calcType != 2) { HypoTestPlot * plot = new HypoTestPlot(*htr,100); plot->SetLogYaxis(true); plot->Draw(); } else { std::cout << "Asymptotic results " << std::endl; } // look at expected significances // found median of S+B distribution if (calcType != 2) { SamplingDistribution * altDist = htr->GetAltDistribution(); HypoTestResult htExp("Expected Result"); htExp.Append(htr); // find quantiles in alt (S+B) distribution double p[5]; double q[5]; for (int i = 0; i < 5; ++i) { double sig = -2 + i; p[i] = ROOT::Math::normal_cdf(sig,1); } std::vector<double> values = altDist->GetSamplingDistribution(); TMath::Quantiles( values.size(), 5, &values[0], q, p, false); for (int i = 0; i < 5; ++i) { htExp.SetTestStatisticData( q[i] ); double sig = -2 + i; std::cout << " Expected p -value and significance at " << sig << " sigma = " << htExp.NullPValue() << " significance " << htExp.Significance() << " sigma " << std::endl; } } else { // case of asymptotic calculator for (int i = 0; i < 5; ++i) { double sig = -2 + i; // sigma is inverted here double pval = AsymptoticCalculator::GetExpectedPValues( htr->NullPValue(), htr->AlternatePValue(), -sig, false); std::cout << " Expected p -value and significance at " << sig << " sigma = " << pval << " significance " << ROOT::Math::normal_quantile_c(pval,1) << " sigma " << std::endl; } } }
void significance(RooWorkspace& w ) { ModelConfig* mc = (ModelConfig*)w.obj("mc"); RooDataSet* data = (RooDataSet*)w.data("data"); //data->Print(); // define the S+B snapshot (this is used for computing the expected significance) ModelConfig* sbModel = mc->Clone(); sbModel->SetName("S+B Model"); RooRealVar* poi = (RooRealVar*) sbModel->GetParametersOfInterest()->first(); poi->setVal(50); sbModel->SetSnapshot(*poi); ModelConfig * bModel = (ModelConfig*) sbModel->Clone(); bModel->SetName("B model"); poi->setVal(0); bModel->SetSnapshot(*poi); vector<double> masses; vector<double> p0values; vector<double> p0valuesExpected; vector<double> sigvalues; double massMin = 200; double massMax = 2500; int nbins = 100; // loop on the mass values for ( double mass=massMin; mass<=massMax; mass += (massMax-massMin)/nbins ) { w.var("mass")->setVal( mass ); // create the AsymptoticCalculator from data,alt model, null model AsymptoticCalculator * ac = new AsymptoticCalculator(*data, *sbModel, *bModel); ac->SetOneSidedDiscovery(true); // for one-side discovery test AsymptoticCalculator::SetPrintLevel(-1); // run the calculator HypoTestResult* asymCalcResult = ac->GetHypoTest(); asymCalcResult->Print(); double pvalue = asymCalcResult->NullPValue(); double sigvalue = asymCalcResult->Significance(); double expectedP0 = AsymptoticCalculator::GetExpectedPValues(asymCalcResult->NullPValue(),asymCalcResult->AlternatePValue(), 0, false); masses.push_back(mass); p0values.push_back(pvalue); p0valuesExpected.push_back(expectedP0); sigvalues.push_back(sigvalue); std::cout << "** Mass = " << mass << " p0-value = " << expectedP0 << " p-value = " << pvalue << " significance = " << sigvalue << std::endl; } TGraph* graph1 = new TGraph(masses.size(),&masses[0],&p0values[0]); TGraph* graph2 = new TGraph(masses.size(),&masses[0],&p0valuesExpected[0]); TGraph* graph3 = new TGraph(masses.size(),&masses[0],&sigvalues[0]); TCanvas* c2 = new TCanvas("c2","Significance", 900, 700); c2->Divide(1,2); c2->cd(1); graph1->SetMarkerStyle(10); //graph1->Draw("APC"); graph1->Draw("AC"); graph2->SetLineStyle(2); graph2->Draw("C"); graph1->GetXaxis()->SetTitle("Mass [GeV]"); graph1->GetYaxis()->SetTitle("p0 value"); graph1->SetTitle("P-value vs Mass"); graph1->SetMinimum(graph2->GetMinimum()); graph1->SetLineColor(kBlue); graph2->SetLineColor(kRed); gPad->SetLogy(true); c2->cd(2); graph3->SetMarkerStyle(10); graph3->Draw("AC"); graph3->SetLineStyle(1); graph3->SetLineColor(kRed); graph3->GetXaxis()->SetTitle("Mass [GeV]"); graph3->GetYaxis()->SetTitle("Significance"); graph3->SetTitle("Significance vs Mass"); gPad->SetLogy(false); c2->SaveAs("significance.pdf"); c2->SaveAs("significance.png"); }
void HypoTestInvDemo(const char * fileName ="GausModel_b.root", const char * wsName = "w", const char * modelSBName = "model_sb", const char * modelBName = "model_b", const char * dataName = "data_obs", int type = 0, // calculator type int testStatType = 0, // test stat type int npoints = 10, int ntoys=1000, bool useCls = true ) { /* type = 0 Freq calculator type = 1 Hybrid testStatType = 0 LEP = 1 Tevatron = 2 PL */ if (fileName==0) { std::cout << "give input filename " << std::endl; return; } TFile * file = new TFile(fileName); RooWorkspace * w = dynamic_cast<RooWorkspace*>( file->Get(wsName) ); if (!w) { return; } w->Print(); RooAbsData * data = w->data(dataName); if (!data) { Error("HypoTestDemo","Not existing data %s",dataName); } // get models from WS // get the modelConfig out of the file ModelConfig* bModel = (ModelConfig*) w->obj(modelBName); ModelConfig* sbModel = (ModelConfig*) w->obj(modelSBName); SimpleLikelihoodRatioTestStat slrts(*bModel->GetPdf(),*sbModel->GetPdf()); slrts.SetNullParameters(*bModel->GetSnapshot()); slrts.SetAltParameters(*sbModel->GetSnapshot()); RatioOfProfiledLikelihoodsTestStat ropl(*bModel->GetPdf(), *sbModel->GetPdf(), sbModel->GetSnapshot()); ropl.SetSubtractMLE(false); ProfileLikelihoodTestStat profll(*sbModel->GetPdf()); profll.SetOneSided(0); TestStatistic * testStat = &slrts; if (testStatType == 1) testStat = &ropl; if (testStatType == 2) testStat = &profll; HypoTestCalculatorGeneric * hc = 0; if (type == 0) hc = new FrequentistCalculator(*data, *sbModel, *bModel); else new HybridCalculator(*data, *sbModel, *bModel); ToyMCSampler *toymcs = (ToyMCSampler*)hc->GetTestStatSampler(); //toymcs->SetNEventsPerToy(1); toymcs->SetTestStatistic(testStat); if (type == 1) { HybridCalculator *hhc = (HybridCalculator*) hc; hhc->SetToys(ntoys,ntoys); // hhc->ForcePriorNuisanceAlt(*pdfNuis); // hhc->ForcePriorNuisanceNull(*pdfNuis); } else ((FrequentistCalculator*) hc)->SetToys(ntoys,ntoys); // Get the result RooMsgService::instance().getStream(1).removeTopic(RooFit::NumIntegration); TStopwatch tw; tw.Start(); const RooArgSet * poiSet = sbModel->GetParametersOfInterest(); RooRealVar *poi = (RooRealVar*)poiSet->first(); // fit the data first sbModel->GetPdf()->fitTo(*data); double poihat = poi->getVal(); //poi->setVal(30); //poi->setError(10); HypoTestInverter calc(*hc); // GENA: for two-sided interval //calc.SetConfidenceLevel(0.95); // GENA: for 95% upper limit calc.SetConfidenceLevel(0.90); calc.UseCLs(useCls); calc.SetVerbose(true); // can spped up using proof ProofConfig pc(*w, 2, "workers=2", kFALSE); //ProofConfig pc(*w, 30, "localhost", kFALSE); //ToyMCSampler * toymcs = dynamic_cast<ToyMCSampler *> (calc.GetHypoTestCalculator()->GetTestStatSampler() ); // GENA: disable proof for now //toymcs->SetProofConfig(&pc); // enable proof if (npoints > 0) { // GENA double poimin = TMath::Max(poihat - 4 * poi->getError(), 0.0); //poimin = poihat; double poimax = poihat + 4 * poi->getError(); poimin = 0; poimax = 20; //double poimin = poi->getMin(); //double poimax = poi->getMax(); std::cout << "Doing a fixed scan in interval : " << poimin << " , " << poimax << std::endl; calc.SetFixedScan(npoints,poimin,poimax); } HypoTestInverterResult * r = calc.GetInterval(); // write to a file the results TString resultFileName = (useCls) ? "CLs_" : "Cls+b_"; resultFileName += fileName; // GENA //TFile * file = new TFile(resultFileName,"RECREATE"); file = new TFile(resultFileName,"RECREATE"); r->Write(); file->Close(); double ulError = r->UpperLimitEstimatedError(); double upperLimit = r->UpperLimit(); std::cout << "The computed upper limit is: " << upperLimit << std::endl; std::cout << "an estimated error on this upper limit is: " << ulError << std::endl; // check using interpolation // double interpLimit = r->FindInterpolatedLimit(1.-r->ConfidenceLevel() ); // cout << "The computer interpolated limits is " << interpLimit << endl; const int nEntries = r->ArraySize(); std::vector<Double_t> xArray(nEntries); std::vector<Double_t> yArray(nEntries); std::vector<Double_t> yErrArray(nEntries); for (int i=0; i<nEntries; i++) { xArray[i] = r->GetXValue(i); yArray[i] = r->GetYValue(i); yErrArray[i] = r->GetYError(i); std::cout << xArray[i] << " , " << yArray[i] << " err = " << yErrArray[i] << std::endl; } // see expected result (bands) TGraph * g0 = new TGraph(nEntries); TGraphAsymmErrors * g1 = new TGraphAsymmErrors(nEntries); TGraphAsymmErrors * g2l = new TGraphAsymmErrors(nEntries); TGraphAsymmErrors * g2u = new TGraphAsymmErrors(nEntries); double p[7]; double q[7]; p[0] = ROOT::Math::normal_cdf(-2); p[1] = ROOT::Math::normal_cdf(-1.5); p[2] = ROOT::Math::normal_cdf(-1); p[3] = 0.5; p[4] = ROOT::Math::normal_cdf(1); p[5] = ROOT::Math::normal_cdf(1.5); p[6] = ROOT::Math::normal_cdf(2); for (int i=0; i<nEntries; i++) { SamplingDistribution * s = r->GetExpectedDistribution(i); // GENA //const std::vector<double> & values = s->GetSamplingDistribution(); const std::vector<Double_t> & cValues = s->GetSamplingDistribution(); std::vector<Double_t> values; for (std::vector<Double_t>::const_iterator val = cValues.begin(); val != cValues.end(); ++val) values.push_back(*val); TMath::Quantiles(values.size(), 7, &values[0],q,p,false); double p0 = q[3]; double p2l = q[1]; double p2u = q[5]; g0->SetPoint(i, r->GetXValue(i), p0 ) ; g1->SetPoint(i, r->GetXValue(i), p0); g2l->SetPoint(i, r->GetXValue(i), p2l); g2u->SetPoint(i, r->GetXValue(i), p2u); //g2->SetPoint(i, r->GetXValue(i), s->InverseCDF(0.50)); g1->SetPointEYlow(i, q[3] - q[2]); // -1 sigma errorr g1->SetPointEYhigh(i, q[4] - q[3]);//+1 sigma error g2l->SetPointEYlow(i, q[1]-q[0]); // -2 -- -1 sigma error g2l->SetPointEYhigh(i, q[2]-q[1]); g2u->SetPointEYlow(i, q[5]-q[4]); g2u->SetPointEYhigh(i, q[6]-q[5]); if (plotHypoTestResult) { HypoTestResult * hr = new HypoTestResult(); hr->SetNullDistribution( r->GetBackgroundDistribution() ); hr->SetAltDistribution( r->GetSignalAndBackgroundDistribution(i) ); new TCanvas(); HypoTestPlot * pl = new HypoTestPlot(*hr); pl->Draw(); } } HypoTestInverterPlot *plot = new HypoTestInverterPlot("result","",r); TGraphErrors * g = plot->MakePlot(); g->Draw("APL"); g2l->SetFillColor(kYellow); g2l->Draw("3"); g2u->SetFillColor(kYellow); g2u->Draw("3"); g1->SetFillColor(kGreen); g1->Draw("3"); g0->SetLineColor(kBlue); g0->SetLineStyle(2); g0->SetLineWidth(2); g0->Draw("L"); //g1->Draw("P"); //g2->Draw("P"); g->SetLineWidth(2); g->Draw("PL"); // GENA: two-sided interval //double alpha = 1.-r->ConfidenceLevel(); // GENA: upper limit double alpha = (1.-r->ConfidenceLevel())/2.0; double x1 = g->GetXaxis()->GetXmin(); double x2 = g->GetXaxis()->GetXmax(); TLine * line = new TLine(x1, alpha, x2,alpha); line->SetLineColor(kRed); line->Draw(); // see the expected limit and -1 +1 sigma bands // SamplingDistribution * limits = r->GetUpperLimitDistribution(); // std::cout << " expected limit (median) " << limits->InverseCDF(0.50) << std::endl; // std::cout << " expected limit (-1 sig) " << limits->InverseCDF((ROOT::Math::normal_cdf(-1))) << std::endl; // std::cout << " expected limit (+1 sig) " << limits->InverseCDF((ROOT::Math::normal_cdf(+1))) << std::endl; tw.Print(); }