void CompareWithRazor(const double mass_stop, const double mass_lsp, const std::string sr, const std::string& suffix = "png")
{
    // get observed limits 
    const std::string limit_path = "test";
    const unsigned int num_obs   = 7;
    stop::Yield::value_t ul_obs[num_obs] =
    {
        ExtractObsUpperLimitCombine(Form("%s/higgsCombine_t2tt_%1.0f_%1.0f_%s.Asymptotic.mH120.1234.root"                                      , limit_path.c_str() , mass_stop , mass_lsp, sr.c_str())), 
        //ExtractObsUpperLimitCombine(Form("%s/higgsCombine_razor_combine_Had_T2tt_MG_%1.0f.000000_MCHI_%1.0f.000000.Asymptotic.mH120.1234.root" , limit_path.c_str() , mass_stop , mass_lsp            )), 
        ExtractObsUpperLimitCombine(Form("%s/higgsCombine_combined_t2tt_%1.0f_%1.0f_%s_junk2.Asymptotic.mH120.1234.root"                       , limit_path.c_str() , mass_stop , mass_lsp, sr.c_str())), 
        ExtractObsUpperLimitCombine(Form("%s/higgsCombine_combined_t2tt_%1.0f_%1.0f_%s.Asymptotic.mH120.1234.root"                             , limit_path.c_str() , mass_stop , mass_lsp, sr.c_str())), 
        ExtractObsUpperLimitCombine(Form("%s/higgsCombine_combined_t2tt_%1.0f_%1.0f_%s_jescorr.Asymptotic.mH120.1234.root"                     , limit_path.c_str() , mass_stop , mass_lsp, sr.c_str())), 
        ExtractObsUpperLimitCombine(Form("%s/higgsCombine_combined_t2tt_%1.0f_%1.0f_%s_btagcorr.Asymptotic.mH120.1234.root"                    , limit_path.c_str() , mass_stop , mass_lsp, sr.c_str())), 
        ExtractObsUpperLimitCombine(Form("%s/higgsCombine_combined_t2tt_%1.0f_%1.0f_%s_isrcorr.Asymptotic.mH120.1234.root"                     , limit_path.c_str() , mass_stop , mass_lsp, sr.c_str())),
        ExtractObsUpperLimitCombine(Form("%s/higgsCombine_combined_t2tt_%1.0f_%1.0f_%s_lepacorr.Asymptotic.mH120.1234.root"                    , limit_path.c_str() , mass_stop , mass_lsp, sr.c_str()))
    };

    // get exp limits 
    stop::Yield::value_t ul_exp[num_obs] =
    {
        ExtractExpUpperLimitCombine(Form("%s/higgsCombine_t2tt_%1.0f_%1.0f_%s.Asymptotic.mH120.1234.root"                                      , limit_path.c_str() , mass_stop , mass_lsp, sr.c_str())), 
        //ExtractExpUpperLimitCombine(Form("%s/higgsCombine_razor_combine_Had_T2tt_MG_%1.0f.000000_MCHI_%1.0f.000000.Asymptotic.mH120.1234.root" , limit_path.c_str() , mass_stop , mass_lsp            )), 
        ExtractExpUpperLimitCombine(Form("%s/higgsCombine_combined_t2tt_%1.0f_%1.0f_%s_junk2.Asymptotic.mH120.1234.root"                       , limit_path.c_str() , mass_stop , mass_lsp, sr.c_str())), 
        ExtractExpUpperLimitCombine(Form("%s/higgsCombine_combined_t2tt_%1.0f_%1.0f_%s.Asymptotic.mH120.1234.root"                             , limit_path.c_str() , mass_stop , mass_lsp, sr.c_str())), 
        ExtractExpUpperLimitCombine(Form("%s/higgsCombine_combined_t2tt_%1.0f_%1.0f_%s_jescorr.Asymptotic.mH120.1234.root"                     , limit_path.c_str() , mass_stop , mass_lsp, sr.c_str())), 
        ExtractExpUpperLimitCombine(Form("%s/higgsCombine_combined_t2tt_%1.0f_%1.0f_%s_btagcorr.Asymptotic.mH120.1234.root"                    , limit_path.c_str() , mass_stop , mass_lsp, sr.c_str())), 
        ExtractExpUpperLimitCombine(Form("%s/higgsCombine_combined_t2tt_%1.0f_%1.0f_%s_isrcorr.Asymptotic.mH120.1234.root"                     , limit_path.c_str() , mass_stop , mass_lsp, sr.c_str())),
        ExtractExpUpperLimitCombine(Form("%s/higgsCombine_combined_t2tt_%1.0f_%1.0f_%s_lepacorr.Asymptotic.mH120.1234.root"                    , limit_path.c_str() , mass_stop , mass_lsp, sr.c_str()))
    };

    // output table
    const char* const fmt = "1.5";
    CTable t1;
    t1.useTitle();
    t1.setTitle(Form("Comparison of limits for m_stop = %1.0f and m_lsp = %1.0f", mass_stop, mass_lsp));
    t1.setTable()(                                           "obs limits",     "exp limits")
                 ("Single lepton"                    , ul_obs[0].pm(fmt) , ul_exp[0].value )
                 ("Razor hadronic"                   , ul_obs[1].pm(fmt) , ul_exp[1].value )
                 ("combined (uncorrelated)"          , ul_obs[2].pm(fmt) , ul_exp[2].value )
                 ("combined (JES 100%% corr)"        , ul_obs[3].pm(fmt) , ul_exp[3].value )
                 ("combined (Btag 100%% corr)"       , ul_obs[4].pm(fmt) , ul_exp[4].value )
                 ("combined (ISR 100%% corr)"        , ul_obs[5].pm(fmt) , ul_exp[5].value )
                 ("combined (lepton 100%% anti-corr)", ul_obs[6].pm(fmt) , ul_exp[6].value )
    ;
    cout << t1 << std::endl;

    // output plot
    TH1F h_obs("h_obs", Form("Observed signal strength (r-value), m_{stop} = %1.0f GeV, m_{LSP} = %1.0f GeV, SR= %s;Card;r-value", mass_stop, mass_lsp, sr.c_str()), 7, 0.5, 7.5);
    TH1F h_exp("h_exp", Form("Expected signal strength (r-value), m_{stop} = %1.0f GeV, m_{LSP} = %1.0f GeV, SR= %s;Card;r-value", mass_stop, mass_lsp, sr.c_str()), 7, 0.5, 7.5);
    FillLimitHistogram(h_obs, ul_obs);
    FillLimitHistogram(h_exp, ul_exp);

    TCanvas c1;
    rt::TH1Overlay p_obs(Form("Signal strength (r-value), m_{stop} = %1.0f GeV, m_{LSP} = %1.0f GeV, SR= %s;Card;r-value", mass_stop, mass_lsp, sr.c_str()), "sb::off lg::top_left");
    p_obs.Add(&h_obs, "Observed", kBlack, 2);
    p_obs.Add(&h_exp, "Expected", kRed  , 2);
    p_obs.Draw();
    lt::mkdir("plots/razor_compare", /*force=*/true);
    c1.Print(Form("plots/razor_compare/p_rvalue_%1.0f_%1.0f_%s.%s", mass_stop, mass_lsp, sr.c_str(), suffix.c_str()));
}
	__host__ void calc_chisq_wrapper_c(const double *ph_model, const double *ph_obs, const double *ph_sigma, const int num_data, const int num_model, double *ph_chisq )
	  {
	    int gpuid = ebf::init_cuda();
	    // put vectors in thrust format from raw points
	    int num = num_data*num_model;
	    thrust::host_vector<double> h_model(ph_model,ph_model+num);
	    thrust::host_vector<double> h_obs(ph_obs,ph_obs+num_data);
	    thrust::host_vector<double> h_sigma(ph_sigma,ph_sigma+num_data);
	    thrust::host_vector<double> h_chisq(ph_chisq,ph_chisq+num_model);

	    thrust::counting_iterator<int> index_begin(0);
	    thrust::counting_iterator<int> index_end(num);

	    if(gpuid>=0)
	    	{

		// allocate mem on GPU
		thrust::device_vector<double> d_model(num);
		thrust::device_vector<double> d_obs(num_data);
		thrust::device_vector<double> d_sigma(num_data);
		thrust::device_vector<double> d_chisq(num_model);

		// transfer input params to GPU
//		start_timer_upload();
		d_model = h_model;
		d_obs = h_obs;
		d_sigma = h_sigma;
//		stop_timer_upload();

		// distribute the computation to the GPU
		ebf::sync();
//	        start_timer_kernel();

        typedef thrust::counting_iterator<int> CountIntIter;
        typedef thrust::transform_iterator<modulo_stride_functor,CountIntIter> ObsIdxIter;
        typedef thrust::permutation_iterator<thrust::device_vector<double>::iterator, ObsIdxIter>  ObsIter;
        typedef thrust::tuple<thrust::device_vector<double>::iterator, ObsIter, ObsIter> ModelObsSigmaIteratorTuple;
        typedef thrust::zip_iterator<ModelObsSigmaIteratorTuple> ModelObsSigmaZipIter;
 	ModelObsSigmaZipIter input_begin = thrust::make_zip_iterator(thrust::make_tuple(
                         d_model.begin(),
                         thrust::make_permutation_iterator(d_obs.begin(),thrust::make_transform_iterator(index_begin,modulo_stride_functor(num_data))),
                         thrust::make_permutation_iterator(d_sigma.begin(),thrust::make_transform_iterator(index_begin,modulo_stride_functor(num_data))) ));

		for(int m=0;m<num_model;++m)
		   {
		   d_chisq[m] = thrust::transform_reduce(
	   	      input_begin+m*num_data,
		      input_begin+((m+1)*num_data),
		      calc_ressq_functor(), 0., thrust::plus<double>() );	
		   }
		ebf::sync();
//                 stop_timer_kernel();

		 // transfer results back to host
//		 start_timer_download();
		 thrust::copy(d_chisq.begin(),d_chisq.end(),ph_chisq);
//		 stop_timer_download();
		 }
	       else
		 {
		 // distribute the computation to the CPU
        typedef thrust::counting_iterator<int> CountIntIter;
        typedef thrust::transform_iterator<modulo_stride_functor,CountIntIter> ObsIdxIter;
        typedef thrust::permutation_iterator<thrust::host_vector<double>::iterator, ObsIdxIter>  ObsIter;
        typedef thrust::tuple<thrust::host_vector<double>::iterator, ObsIter, ObsIter> ModelObsSigmaIteratorTuple;
        typedef thrust::zip_iterator<ModelObsSigmaIteratorTuple> ModelObsSigmaZipIter;
 	ModelObsSigmaZipIter input_begin = thrust::make_zip_iterator(thrust::make_tuple(
                         h_model.begin(),
                         thrust::make_permutation_iterator(h_obs.begin(),thrust::make_transform_iterator(index_begin,modulo_stride_functor(num_data))),
                         thrust::make_permutation_iterator(h_sigma.begin(),thrust::make_transform_iterator(index_begin,modulo_stride_functor(num_data))) ));

//	        start_timer_kernel();
		for(int m=0;m<num_model;++m)
		   {
	           h_chisq[m] = thrust::transform_reduce(
                      input_begin+m*num_data,
                      input_begin+((m+1)*num_data),
                      calc_ressq_functor(), 0., thrust::plus<double>() );
		   }
//                 stop_timer_kernel();
		  }
	}