コード例 #1
0
ファイル: sampleGlm.cpp プロジェクト: rforge/bfp
SEXP
cpp_sampleGlm(SEXP r_interface)
{
    // ----------------------------------------------------------------------------------
    // extract arguments
    // ----------------------------------------------------------------------------------

    r_interface = CDR(r_interface);
    List rcpp_model(CAR(r_interface));

    r_interface = CDR(r_interface);
    List rcpp_data(CAR(r_interface));

    r_interface = CDR(r_interface);
    List rcpp_fpInfos(CAR(r_interface));

    r_interface = CDR(r_interface);
    List rcpp_ucInfos(CAR(r_interface));
    
    r_interface = CDR(r_interface);
    List rcpp_fixInfos(CAR(r_interface));
    
    r_interface = CDR(r_interface);
    List rcpp_distribution(CAR(r_interface));

    r_interface = CDR(r_interface);
    List rcpp_searchConfig(CAR(r_interface));

    r_interface = CDR(r_interface);
    List rcpp_options(CAR(r_interface));

    r_interface = CDR(r_interface);
    List rcpp_marginalz(CAR(r_interface));

    // ----------------------------------------------------------------------------------
    // unpack the R objects
    // ----------------------------------------------------------------------------------

    // data:
    const NumericMatrix n_x = rcpp_data["x"];
    const AMatrix x(n_x.begin(), n_x.nrow(),
                   n_x.ncol());

    const NumericMatrix n_xCentered = rcpp_data["xCentered"];
    const AMatrix xCentered(n_xCentered.begin(), n_xCentered.nrow(),
                           n_xCentered.ncol());

    const NumericVector n_y = rcpp_data["y"];
    const AVector y(n_y.begin(), n_y.size());

    const IntVector censInd = as<IntVector>(rcpp_data["censInd"]);

    // FP configuration:

    // vector of maximum fp degrees
    const PosIntVector fpmaxs = as<PosIntVector>(rcpp_fpInfos["fpmaxs"]);
    // corresponding vector of fp column indices
    const PosIntVector fppos = rcpp_fpInfos["fppos"];
    // corresponding vector of power set cardinalities
    const PosIntVector fpcards = rcpp_fpInfos["fpcards"];
    // names of fp terms
    const StrVector fpnames = rcpp_fpInfos["fpnames"];


    // UC configuration:

    const PosIntVector ucIndices = rcpp_ucInfos["ucIndices"];
    List rcpp_ucColList = rcpp_ucInfos["ucColList"];

    std::vector<PosIntVector> ucColList;
    for (R_len_t i = 0; i != rcpp_ucColList.length(); ++i)
    {
        ucColList.push_back(as<PosIntVector>(rcpp_ucColList[i]));
    }
    
    // fixed covariate configuration:
    
    const PosIntVector fixIndices = rcpp_fixInfos["fixIndices"];
    List rcpp_fixColList = rcpp_fixInfos["fixColList"];
    
    std::vector<PosIntVector> fixColList;
    for (R_len_t i = 0; i != rcpp_fixColList.length(); ++i)
    {
      fixColList.push_back(as<PosIntVector>(rcpp_fixColList[i]));
    }
    
    
    

    // distributions info:

    const double nullModelLogMargLik = as<double>(rcpp_distribution["nullModelLogMargLik"]);
    const double nullModelDeviance = as<double>(rcpp_distribution["nullModelDeviance"]);
    S4 rcpp_gPrior = rcpp_distribution["gPrior"];
    List rcpp_family = rcpp_distribution["family"];
    const bool tbf = as<bool>(rcpp_distribution["tbf"]);
    const bool doGlm = as<bool>(rcpp_distribution["doGlm"]);
    
    const double empiricalMean = as<double>(rcpp_distribution["yMean"]);
    const bool empiricalgPrior = as<bool>(rcpp_distribution["empiricalgPrior"]);
    
    // model search configuration:
    const bool useFixedc = as<bool>(rcpp_searchConfig["useFixedc"]);
    
    // options:

    const bool estimateMargLik = as<bool>(rcpp_options["estimateMargLik"]);
    const bool verbose = as<bool>(rcpp_options["verbose"]);
    const bool debug = as<bool>(rcpp_options["debug"]);
    const bool isNullModel = as<bool>(rcpp_options["isNullModel"]);
    const bool useFixedZ = as<bool>(rcpp_options["useFixedZ"]);
    const double fixedZ = as<double>(rcpp_options["fixedZ"]);
#ifdef _OPENMP
    const bool useOpenMP = as<bool>(rcpp_options["useOpenMP"]);
#endif

    S4 rcpp_mcmc = rcpp_options["mcmc"];
    const PosInt iterations = rcpp_mcmc.slot("iterations");
    const PosInt burnin = rcpp_mcmc.slot("burnin");
    const PosInt step = rcpp_mcmc.slot("step");


    // z density stuff:

    const RFunction logMarginalZdens(as<SEXP>(rcpp_marginalz["logDens"]));
    const RFunction marginalZgen(as<SEXP>(rcpp_marginalz["gen"]));


    // ----------------------------------------------------------------------------------
    // further process arguments
    // ----------------------------------------------------------------------------------

    // data:

     // only the intercept is always included, that is fixed, in the model
     IntSet fixedCols;
     fixedCols.insert(1);

     // totalnumber is set to 0 because we do not care about it.
     const DataValues data(x, xCentered, y, censInd, 0, fixedCols);

     // FP configuration:
     const FpInfo fpInfo(fpcards, fppos, fpmaxs, fpnames, x);

     // UC configuration:

     // determine sizes of the UC groups, and the total size == maximum size reached together by all
     // UC groups.
     PosIntVector ucSizes;
     PosInt maxUcDim = 0;
     for (std::vector<PosIntVector>::const_iterator cols = ucColList.begin(); cols != ucColList.end(); ++cols)
     {
         PosInt thisSize = cols->size();

         maxUcDim += thisSize;
         ucSizes.push_back(thisSize);
     }
     const UcInfo ucInfo(ucSizes, maxUcDim, ucIndices, ucColList);

  
     // fix configuration:  
     
     // determine sizes of the fix groups, and the total size == maximum size reached together by all
     // UC groups.
     PosIntVector fixSizes;
     PosInt maxFixDim = 0;
     for (std::vector<PosIntVector>::const_iterator cols = fixColList.begin(); cols != fixColList.end(); ++cols)
     {
       PosInt thisSize = cols->size();
       
       maxFixDim += thisSize;
       fixSizes.push_back(thisSize);
     }
     const FixInfo fixInfo(fixSizes, maxFixDim, fixIndices, fixColList);
     
     
     
     
     
     // model configuration:
     GlmModelConfig config(rcpp_family, nullModelLogMargLik, nullModelDeviance, exp(fixedZ), rcpp_gPrior,
                           data.response, debug, useFixedc, empiricalMean, empiricalgPrior);


     // model config/info:
     const Model thisModel(ModelPar(rcpp_model["configuration"],
                                   fpInfo),
                          GlmModelInfo(as<List>(rcpp_model["information"])));


     // the options
     const Options options(estimateMargLik,
                           verbose,
                           debug,
                           isNullModel,
                           useFixedZ,
                           tbf,
                           doGlm,
                           iterations,
                           burnin,
                           step);

     // marginal z stuff
     const MarginalZ marginalZ(logMarginalZdens,
                               marginalZgen);


     // use only one thread if we do not want to use openMP.
#ifdef _OPENMP
     if(! useOpenMP)
     {
         omp_set_num_threads(1);
     } else {
         omp_set_num_threads(omp_get_num_procs());
     }
#endif


     // ----------------------------------------------------------------------------------
     // prepare the sampling
     // ----------------------------------------------------------------------------------

     Fitter fitter;
     int nCoefs;

     if(options.doGlm)
     {
         // construct IWLS object, which can be used for all IWLS stuff,
         // and also contains the design matrix etc
         fitter.iwlsObject = new Iwls(thisModel.par,
                                      data,
                                      fpInfo,
                                      ucInfo,
                                      fixInfo,
                                      config,
                                      config.linPredStart,
                                      options.useFixedZ,
                                      EPS,
                                      options.debug,
                                      options.tbf);

         nCoefs = fitter.iwlsObject->nCoefs;

         // check that we have the same answer about the null model as R
         //assert(fitter.iwlsObject->isNullModel == options.isNullModel);
         if(fitter.iwlsObject->isNullModel != options.isNullModel){
           Rcpp::stop("sampleGlm.cpp:cpp_sampleGlm: isNullModel != options.isNullModel");
         } 
     }
     else
     {
         AMatrix design = getDesignMatrix(thisModel.par, data, fpInfo, ucInfo, fixInfo, false);
         fitter.coxfitObject = new Coxfit(data.response,
                                          data.censInd,
                                          design,
                                          config.weights,
                                          config.offsets,
                                          1);

         // the number of coefficients (here it does not include the intercept!!)
         nCoefs = design.n_cols;

         // check that we do not have a null model here:
         // assert(nCoefs > 0);
         if(nCoefs <= 0){
           Rcpp::stop("sampleGlm.cpp:cpp_sampleGlm: nCoefs <= 0");
         } 
     }


     // allocate sample container
     Samples samples(nCoefs, options.nSamples);

     // count how many proposals we have accepted:
     PosInt nAccepted(0);

     // at what z do we start?
     double startZ = useFixedZ ? fixedZ : thisModel.info.zMode;

     // start container with current things
     Mcmc now(marginalZ, data.nObs, nCoefs);

     if(doGlm)
     {
         // get the mode for beta given the mode of the approximated marginal posterior as z
         // if TBF approach is used, this will be the only time the IWLS is used,
         // because we only need the MLE and the Cholesky factor of its
         // precision matrix estimate, which do not depend on z.
         PosInt iwlsIterations = fitter.iwlsObject->startWithNewLinPred(40,
                                                                        // this is the corresponding g
                                                                        exp(startZ),
                                                                        // and the start value for the linear predictor is taken from the Glm model config
                                                                        config.linPredStart);

         // echo debug-level message?
         if(options.debug)
         {
             Rprintf("\ncpp_sampleGlm: Initial IWLS for high density point finished after %d iterations",
                     iwlsIterations);
         }

         // this is the current proposal info:
         now.proposalInfo = fitter.iwlsObject->getResults();

         // and this is the current parameters sample:
         now.sample = Parameter(now.proposalInfo.coefs,
                                startZ);

         if(options.tbf)
         {
             // we will not compute this in the TBF case:
             now.logUnPosterior = R_NaReal;

             // start to compute the variance of the intercept parameter:

             // here the inverse cholesky factor of the precision matrix will
             // be stored. First, it's the identity matrix.
             AMatrix inverseQfactor = arma::eye(now.proposalInfo.qFactor.n_rows,
                                                now.proposalInfo.qFactor.n_cols);

             // do the inversion
             trs(false,
                 false,
                 now.proposalInfo.qFactor,
                 inverseQfactor);

             // now we can compute the variance of the intercept estimate:
             const AVector firstCol = inverseQfactor.col(0);
             const double interceptVar = arma::dot(firstCol, firstCol);

             // ok, now alter the qFactor appropriately to reflect the
             // independence assumption between the intercept estimate
             // and the other coefficients estimates
             now.proposalInfo.qFactor.col(0) = arma::zeros<AVector>(now.proposalInfo.qFactor.n_rows);
             now.proposalInfo.qFactor(0, 0) = sqrt(1.0 / interceptVar);
         }
         else
         {
             // compute the (unnormalized) log posterior of the proposal
             now.logUnPosterior = fitter.iwlsObject->computeLogUnPosteriorDens(now.sample);
         }
     }
     else
     {
         PosInt coxfitIterations = fitter.coxfitObject->fit();
         CoxfitResults coxResults = fitter.coxfitObject->finalizeAndGetResults();
         fitter.coxfitObject->checkResults();

         // echo debug-level message?
         if(options.debug)
         {
             Rprintf("\ncpp_sampleGlm: Cox fit finished after %d iterations",
                     coxfitIterations);
         }

         // we will not compute this in the TBF case:
         now.logUnPosterior = R_NaReal;

         // compute the Cholesky factorization of the covariance matrix
         int info = potrf(false,
                          coxResults.imat);

         // check that all went well
         if(info != 0)
         {
             std::ostringstream stream;
             stream << "dpotrf(coxResults.imat) got error code " << info << "in sampleGlm";
             throw std::domain_error(stream.str().c_str());
         }

         // compute the precision matrix, using the Cholesky factorization
         // of the covariance matrix
         now.proposalInfo.qFactor = arma::eye(now.proposalInfo.qFactor.n_rows,
                                              now.proposalInfo.qFactor.n_cols);
         info = potrs(false,
                      coxResults.imat,
                      now.proposalInfo.qFactor);

         // check that all went well
         if(info != 0)
         {
             std::ostringstream stream;
             stream << "dpotrs(coxResults.imat, now.proposalInfo.qFactor) got error code " << info << "in sampleGlm";
             throw std::domain_error(stream.str().c_str());
         }

         // compute the Cholesky factorization of the precision matrix
         info = potrf(false,
                      now.proposalInfo.qFactor);

         // check that all went well
         if(info != 0)
         {
             std::ostringstream stream;
             stream << "dpotrf(now.proposalInfo.qFactor) got error code " << info << "in sampleGlm";
             throw std::domain_error(stream.str().c_str());
         }

         // the MLE of the coefficients
         now.proposalInfo.coefs = coxResults.coefs;
     }

     // so the parameter object "now" is then also the high density point
     // required for the marginal likelihood estimate:
     const Mcmc highDensityPoint(now);

     // we accept this starting value, so initialize "old" with the same ones
     Mcmc old(now);

     // ----------------------------------------------------------------------------------
     // start sampling
     // ----------------------------------------------------------------------------------

     // echo debug-level message?
     if(options.debug)
     {
         if(tbf)
         {
             Rprintf("\ncpp_sampleGlm: Starting MC simulation");
         }
         else
         {
             Rprintf("\ncpp_sampleGlm: Starting MCMC loop");
         }
     }


     // i_iter starts at 1 !!
     for(PosInt i_iter = 1; i_iter <= options.iterations; ++i_iter)
     {
         // echo debug-level message?
         if(options.debug)
         {
             Rprintf("\ncpp_sampleGlm: Starting iteration no. %d", i_iter);
         }

         // ----------------------------------------------------------------------------------
         // store the proposal
         // ----------------------------------------------------------------------------------

         // sample one new log covariance factor z (other arguments than 1 are not useful
         // with the current setup of the RFunction wrapper class)
         now.sample.z = marginalZ.gen(1);

         if(options.tbf)
         {
             if(options.isNullModel)
             {
                 // note that we do not encounter this in the Cox case
                 // assert(options.doGlm);
                 if(!options.doGlm){
                   Rcpp::stop("sampleGlm.cpp:cpp_sampleGlm: options.doGlm should be TRUE");
                 } 

                 // draw the proposal coefs, which is here just the intercept
                 now.sample.coefs = drawNormalVector(now.proposalInfo.coefs,
                                                     now.proposalInfo.qFactor);

             }
             else
             {   // here we have at least one non-intercept coefficient

                 // get vector from N(0, I)
                 AVector w = drawNormalVariates(now.proposalInfo.coefs.n_elem,
                                                0.0,
                                                1.0);

                 // then solve L' * ret = w, and overwrite w with the result:
                 trs(false,
                     true,
                     now.proposalInfo.qFactor,
                     w);

                 // compute the shrinkage factor t = g / (g + 1)
                 const double g = exp(now.sample.z);
                
                //Previously used g directly, but if g=inf we need to use the limit
                 // const double shrinkFactor = g / (g + 1.0);
                const double shrinkFactor = isinf(g) ? 1 : g / (g + 1.0);
                 
                 // scale the variance of the non-intercept coefficients
                 // with this factor.
                 // In the Cox case: no intercept present, so scale everything
                 int startCoef = options.doGlm ? 1 : 0;

                 w.rows(startCoef, w.n_rows - 1) *= sqrt(shrinkFactor);

                 // also scale the mean of the non-intercept coefficients
                 // appropriately:
                 // In the Cox case: no intercept present, so scale everything
                 now.sample.coefs = now.proposalInfo.coefs;
                 now.sample.coefs.rows(startCoef, now.sample.coefs.n_rows - 1) *= shrinkFactor;

                 // so altogether we have:
                 now.sample.coefs += w;
             }
             ++nAccepted;
         }
         else // the generalized hyper-g prior case
         {
             // do 1 IWLS step, starting from the last linear predictor and the new z
             // (here the return value is not very interesting, as it must be 1)
             fitter.iwlsObject->startWithNewCoefs(1,
                                                  exp(now.sample.z),
                                                  now.sample.coefs);

             // get the results
             now.proposalInfo = fitter.iwlsObject->getResults();

             // draw the proposal coefs:
             now.sample.coefs = drawNormalVector(now.proposalInfo.coefs,
                                                 now.proposalInfo.qFactor);

             // compute the (unnormalized) log posterior of the proposal
             now.logUnPosterior = fitter.iwlsObject->computeLogUnPosteriorDens(now.sample);

             // ----------------------------------------------------------------------------------
             // get the reverse jump normal density
             // ----------------------------------------------------------------------------------

             // copy the old Mcmc object
             Mcmc reverse(old);

             // do again 1 IWLS step, starting from the sampled linear predictor and the old z
             fitter.iwlsObject->startWithNewCoefs(1,
                                          exp(reverse.sample.z),
                                          now.sample.coefs);

             // get the results for the reverse jump Gaussian:
             // only the proposal has changed in contrast to the old container,
             // the sample stays the same!
             reverse.proposalInfo = fitter.iwlsObject->getResults();


             // ----------------------------------------------------------------------------------
             // compute the proposal density ratio
             // ----------------------------------------------------------------------------------

             // first the log of the numerator, i.e. log(f(old | new)):
             double logProposalRatioNumerator = reverse.computeLogProposalDens();

             // second the log of the denominator, i.e. log(f(new | old)):
             double logProposalRatioDenominator = now.computeLogProposalDens();

             // so the log proposal density ratio is
             double logProposalRatio = logProposalRatioNumerator - logProposalRatioDenominator;

             // ----------------------------------------------------------------------------------
             // compute the posterior density ratio
             // ----------------------------------------------------------------------------------

             double logPosteriorRatio = now.logUnPosterior - old.logUnPosterior;

             // ----------------------------------------------------------------------------------
             // accept or reject proposal
             // ----------------------------------------------------------------------------------

             double acceptanceProb = exp(logPosteriorRatio + logProposalRatio);

             if(unif() < acceptanceProb)
             {
                 old = now;

                 ++nAccepted;
             }
             else
             {
                 now = old;
             }
         }

         // ----------------------------------------------------------------------------------
         // store the sample?
         // ----------------------------------------------------------------------------------

         // if the burnin was passed and we are at a multiple of step beyond that, then store
         // the sample.
         if((i_iter > options.burnin) &&
            (((i_iter - options.burnin) % options.step) == 0))
         {
             // echo debug-level message
             if(options.debug)
             {
                 Rprintf("\ncpp_sampleGlm: Storing samples of iteration no. %d", i_iter);
             }

             // store the current parameter sample
             samples.storeParameters(now.sample);

             // ----------------------------------------------------------------------------------
             // compute marginal likelihood terms
             // ----------------------------------------------------------------------------------

             // compute marginal likelihood terms and save them?
             // (Note that the tbf bool is just for safety here,
             // the R function sampleGlm will set estimateMargLik to FALSE
             // when tbf is TRUE.)
             if(options.estimateMargLik && (! options.tbf))
             {
                 // echo debug-level message?
                 if(options.debug)
                 {
                     Rprintf("\ncpp_sampleGlm: Compute marginal likelihood estimation terms");
                 }

                 // ----------------------------------------------------------------------------------
                 // compute next term for the denominator
                 // ----------------------------------------------------------------------------------

                 // draw from the high density point proposal distribution
                 Mcmc denominator(highDensityPoint);
                 denominator.sample.z = marginalZ.gen(1);

                 fitter.iwlsObject->startWithNewLinPred(1,
                                                exp(denominator.sample.z),
                                                highDensityPoint.proposalInfo.linPred);

                 denominator.proposalInfo = fitter.iwlsObject->getResults();

                 denominator.sample.coefs = drawNormalVector(denominator.proposalInfo.coefs,
                                                             denominator.proposalInfo.qFactor);

                 // get posterior density of the sample
                 denominator.logUnPosterior = fitter.iwlsObject->computeLogUnPosteriorDens(denominator.sample);

                 // get the proposal density at the sample
                 double denominator_logProposalDensity = denominator.computeLogProposalDens();

                 // then the reverse stuff:
                 // first we copy again the high density point
                 Mcmc revDenom(highDensityPoint);

                 // but choose the new sampled coefficients as starting point
                 fitter.iwlsObject->startWithNewCoefs(1,
                                              exp(revDenom.sample.z),
                                              denominator.sample.coefs);
                 revDenom.proposalInfo = fitter.iwlsObject->getResults();

                 // so the reverse proposal density is
                 double revDenom_logProposalDensity = revDenom.computeLogProposalDens();


                 // so altogether the next term for the denominator is the following acceptance probability
                 double denominatorTerm = denominator.logUnPosterior - highDensityPoint.logUnPosterior +
                                          revDenom_logProposalDensity - denominator_logProposalDensity;
                 denominatorTerm = exp(fmin(0.0, denominatorTerm));

                 // ----------------------------------------------------------------------------------
                 // compute next term for the numerator
                 // ----------------------------------------------------------------------------------

                 // compute the proposal density of the current sample starting from the high density point
                 Mcmc numerator(now);

                 fitter.iwlsObject->startWithNewLinPred(1,
                                                exp(numerator.sample.z),
                                                highDensityPoint.proposalInfo.linPred);
                 numerator.proposalInfo = fitter.iwlsObject->getResults();

                 double numerator_logProposalDensity = numerator.computeLogProposalDens();

                 // then compute the reverse proposal density of the high density point when we start from the current
                 // sample
                 Mcmc revNum(highDensityPoint);

                 fitter.iwlsObject->startWithNewCoefs(1,
                                              exp(revNum.sample.z),
                                              now.sample.coefs);
                 revNum.proposalInfo = fitter.iwlsObject->getResults();

                 double revNum_logProposalDensity = revNum.computeLogProposalDens();

                 // so altogether the next term for the numerator is the following guy:
                 double numeratorTerm = exp(fmin(revNum_logProposalDensity,
                                                 highDensityPoint.logUnPosterior - now.logUnPosterior +
                                                 numerator_logProposalDensity));

                 // ----------------------------------------------------------------------------------
                 // finally store both terms
                 // ----------------------------------------------------------------------------------

                 samples.storeMargLikTerms(numeratorTerm, denominatorTerm);

             }
         }

         // ----------------------------------------------------------------------------------
         // echo progress?
         // ----------------------------------------------------------------------------------

         // echo debug-level message?
         if(options.debug)
         {
             Rprintf("\ncpp_sampleGlm: Finished iteration no. %d", i_iter);
         }

         if((i_iter % std::max(static_cast<int>(options.iterations / 100), 1) == 0) &&
             options.verbose)
         {
             // display computation progress at each percent
             Rprintf("-");

         } // end echo progress

     } // end MCMC loop


     // echo debug-level message?
     if(options.debug)
     {
         if(tbf)
         {
             Rprintf("\ncpp_sampleGlm: Finished MC simulation");
         }
         else
         {
             Rprintf("\ncpp_sampleGlm: Finished MCMC loop");
         }
     }


     // ----------------------------------------------------------------------------------
     // build up return list for R and return that.
     // ----------------------------------------------------------------------------------

     return List::create(_["samples"] = samples.convert2list(),
                         _["nAccepted"] = nAccepted,
                         _["highDensityPointLogUnPosterior"] = highDensityPoint.logUnPosterior);

} // end cpp_sampleGlm