double LRM::log_likelihood(const Vec & beta, Vec *g, Mat *h, bool initialize_derivs)const{ const LRM::DatasetType &data(dat()); if(initialize_derivs){ if(g){ *g=0; if(h){ *h=0;}}} double ans = 0; int n = data.size(); for(int i = 0; i < n; ++i){ bool y = data[i]->y(); const Vec & x(data[i]->x()); double eta = predict(x) + log_alpha_; double loglike = plogis(eta, 0, 1, y, true); ans += loglike; if(g){ double logp = y ? loglike : plogis(eta, 0, 1, true, true); double p = exp(logp); *g += (y-p) * x; if(h){ h->add_outer(x,x, -p*(1-p)); } } } return ans; }
//---------------------------------------------------------------------- std::pair<double, double> BinomialLogitCltDataImputer::impute_large_sample( RNG &rng, double number_of_trials, double number_of_successes, double linear_predictor) const { double information = 0.0; const Vector &mixing_weights(mixture_approximation.weights()); const Vector &sigma(mixture_approximation.sigma()); double negative_logit_support = plogis(0, linear_predictor, 1, true); double positive_logit_support = plogis(0, linear_predictor, 1, false); Vector p0 = mixing_weights / negative_logit_support; Vector p1 = mixing_weights / positive_logit_support; for (int m = 0; m < mixture_approximation.dim(); ++m) { p0[m] *= pnorm(0, linear_predictor, sigma[m], true); p1[m] *= pnorm(0, linear_predictor, sigma[m], false); } // p0 is the probability distribution over the mixture component // indicators for the failures. N0 is the count of the number of // failures belonging to each mixture component. std::vector<int> N0 = rmultinom_mt(rng, number_of_trials - number_of_successes, p0 / sum(p0)); // p1 is the probability distribution over the mixture component // indicators for the successes. N1 is the count of the number // of successes in each mixture component. std::vector<int> N1 = rmultinom_mt(rng, number_of_successes, p1 / sum(p1)); double simulation_mean = 0; double simulation_variance = 0; for (int m = 0; m < N0.size(); ++m) { int total_obs = N0[m] + N1[m]; if (total_obs == 0) { continue; } double sigsq = square(sigma[m]); double sig4 = square(sigsq); information += total_obs / sigsq; double truncated_normal_mean; double truncated_normal_variance; double cutpoint = 0; if (N0[m] > 0) { trun_norm_moments(linear_predictor, sigma[m], cutpoint, false, &truncated_normal_mean, &truncated_normal_variance); simulation_mean += N0[m] * truncated_normal_mean / sigsq; simulation_variance += N0[m] * truncated_normal_variance / sig4; } if (N1[m] > 0) { trun_norm_moments(linear_predictor, sigma[m], cutpoint, true, &truncated_normal_mean, &truncated_normal_variance); simulation_mean += N1[m] * truncated_normal_mean / sigsq; simulation_variance += N1[m] * truncated_normal_variance / sig4; } } double information_weighted_sum = rnorm_mt(rng, simulation_mean, sqrt(simulation_variance)); return std::make_pair(information_weighted_sum, information); }
//---------------------------------------------------------------------- void BLCSSS::rwm_draw_chunk(int chunk){ clock_t start = clock(); const Selector &inc(m_->coef().inc()); int nvars = inc.nvars(); Vec full_nonzero_beta = m_->beta(); // only nonzero components // Compute information matrix for proposal distribution. For // efficiency, also compute the log-posterior of the current beta. Vec mu(inc.select(pri_->mu())); Spd siginv(inc.select(pri_->siginv())); double original_logpost = dmvn(full_nonzero_beta, mu, siginv, 0, true); const std::vector<Ptr<BinomialRegressionData> > &data(m_->dat()); int nobs = data.size(); int full_chunk_size = compute_chunk_size(); int chunk_start = chunk * full_chunk_size; int elements_remaining = nvars - chunk_start; int this_chunk_size = std::min(elements_remaining, full_chunk_size); Selector chunk_selector(nvars, false); for(int i = chunk_start; i< chunk_start + this_chunk_size; ++i) { chunk_selector.add(i); } Spd proposal_ivar = chunk_selector.select(siginv); for(int i = 0; i < nobs; ++i){ Vec x = inc.select(data[i]->x()); double eta = x.dot(full_nonzero_beta); double prob = plogis(eta); double weight = prob * (1-prob); VectorView x_chunk(x, chunk_start, this_chunk_size); // Only upper triangle is accessed. Need to reflect at end of loop. proposal_ivar.add_outer(x_chunk, weight, false); int yi = data[i]->y(); int ni = data[i]->n(); original_logpost += dbinom(yi, ni, prob, true); } proposal_ivar.reflect(); VectorView beta_chunk(full_nonzero_beta, chunk_start, this_chunk_size); if(tdf_ > 0){ beta_chunk = rmvt_ivar_mt( rng(), beta_chunk, proposal_ivar / rwm_variance_scale_factor_, tdf_); }else{ beta_chunk = rmvn_ivar_mt( rng(), beta_chunk, proposal_ivar / rwm_variance_scale_factor_); } double logpost = dmvn(full_nonzero_beta, mu, siginv, 0, true); Vec full_beta(inc.expand(full_nonzero_beta)); logpost += m_->log_likelihood(full_beta, 0, 0, false); double log_alpha = logpost - original_logpost; double logu = log(runif_mt(rng())); ++rwm_chunk_attempts_; if(logu < log_alpha){ m_->set_beta(full_nonzero_beta); ++rwm_chunk_successes_; } clock_t end = clock(); rwm_chunk_times_ += double(end - start) / CLOCKS_PER_SEC; }
//---------------------------------------------------------------------- void BLCSSS::rwm_draw_chunk(int chunk){ const Selector &inc(m_->coef().inc()); int nvars = inc.nvars(); Vector full_nonzero_beta = m_->included_coefficients(); // Compute information matrix for proposal distribution. For // efficiency, also compute the log-posterior of the current beta. Vector mu(inc.select(pri_->mu())); SpdMatrix siginv(inc.select(pri_->siginv())); double original_logpost = dmvn(full_nonzero_beta, mu, siginv, 0, true); const std::vector<Ptr<BinomialRegressionData> > &data(m_->dat()); int nobs = data.size(); int full_chunk_size = compute_chunk_size(max_rwm_chunk_size_); int chunk_start = chunk * full_chunk_size; int elements_remaining = nvars - chunk_start; int this_chunk_size = std::min(elements_remaining, full_chunk_size); Selector chunk_selector(nvars, false); for(int i = chunk_start; i< chunk_start + this_chunk_size; ++i) { chunk_selector.add(i); } SpdMatrix proposal_ivar = chunk_selector.select(siginv); for(int i = 0; i < nobs; ++i){ Vector x = inc.select(data[i]->x()); double eta = x.dot(full_nonzero_beta); double prob = plogis(eta); double weight = prob * (1-prob); VectorView x_chunk(x, chunk_start, this_chunk_size); // Only upper triangle is accessed. Need to reflect at end of loop. proposal_ivar.add_outer(x_chunk, weight, false); original_logpost += dbinom(data[i]->y(), data[i]->n(), prob, true); } proposal_ivar.reflect(); VectorView beta_chunk(full_nonzero_beta, chunk_start, this_chunk_size); if(tdf_ > 0){ beta_chunk = rmvt_ivar_mt( rng(), beta_chunk, proposal_ivar / rwm_variance_scale_factor_, tdf_); }else{ beta_chunk = rmvn_ivar_mt( rng(), beta_chunk, proposal_ivar / rwm_variance_scale_factor_); } double logpost = dmvn(full_nonzero_beta, mu, siginv, 0, true); Vector full_beta(inc.expand(full_nonzero_beta)); logpost += m_->log_likelihood(full_beta, 0, 0, false); double log_alpha = logpost - original_logpost; double logu = log(runif_mt(rng())); if (logu < log_alpha) { m_->set_included_coefficients(full_nonzero_beta); move_accounting_.record_acceptance("rwm_chunk"); } else { move_accounting_.record_rejection("rwm_chunk"); } }
double LRM::log_likelihood(const Vector & beta, Vector *g, Matrix *h, bool initialize_derivs)const{ const LRM::DatasetType &data(dat()); if(initialize_derivs){ if(g){ g->resize(beta.size()); *g=0; if(h){ h->resize(beta.size(), beta.size()); *h=0;}}} double ans = 0; int n = data.size(); bool all_coefficients_included = coef().nvars() == xdim(); const Selector &inc(coef().inc()); for(int i = 0; i < n; ++i){ bool y = data[i]->y(); const Vector & x(data[i]->x()); double eta = predict(x) + log_alpha_; double loglike = plogis(eta, 0, 1, y, true); ans += loglike; if(g){ double logp = y ? loglike : plogis(eta, 0, 1, true, true); double p = exp(logp); if (all_coefficients_included) { *g += (y-p) * x; if(h){ h->add_outer(x,x, -p*(1-p)); } } else { Vector reduced_x = inc.select(x); *g += (y - p) * reduced_x; if (h) { h->add_outer(reduced_x, reduced_x, -p * (1 - p)); } } } } return ans; }
void BinomialLogitSamplerRwm::draw(){ const std::vector<Ptr<BinomialRegressionData> > &data(m_->dat()); SpdMatrix ivar(pri_->siginv()); Vector beta(m_->Beta()); for(int i = 0; i < data.size(); ++i){ Ptr<BinomialRegressionData> dp = data[i]; double eta = beta.dot(dp->x()); double prob = plogis(eta); ivar.add_outer(dp->x(), dp->n() * prob * (1-prob)); } proposal_->set_ivar(ivar); beta = sam_.draw(beta); m_->set_Beta(beta); }
Vector SSLM::simulate_forecast(const Matrix &forecast_predictors, const Vector &trials, const Vector &final_state) { StateSpaceModelBase::set_state_model_behavior(StateModel::MARGINAL); Vector ans(nrow(forecast_predictors)); Vector state = final_state; int t0 = dat().size(); for (int t = 0; t < ans.size(); ++t) { state = simulate_next_state(state, t + t0); double eta = observation_matrix(t + t0).dot(state) + observation_model_->predict(forecast_predictors.row(t)); double probability = plogis(eta); ans[t] = rbinom(lround(trials[t]), probability); } return ans; }
//---------------------------------------------------------------------- double BinomialLogitLogPostChunk::operator()( const Vector &beta_chunk, Vector &grad, Matrix &hess, int nd)const{ Vector nonzero_beta = m_->included_coefficients(); VectorView nonzero_beta_chunk(nonzero_beta, start_, chunk_size_); nonzero_beta_chunk = beta_chunk; const std::vector<Ptr<BinomialRegressionData> > &data(m_->dat()); const Selector &inc(m_->coef().inc()); const SpdMatrix siginv(inc.select(pri_->siginv())); const Vector mu(inc.select(pri_->mu())); double ans = dmvn(nonzero_beta, mu, siginv, 0.0, true); if(nd > 0){ Selector chunk_selector(nonzero_beta.size(), false); for(int i = start_; i < start_ + chunk_size_; ++i) chunk_selector.add(i); grad = -1*chunk_selector.select(siginv * (nonzero_beta - mu)); if(nd > 1) { hess = chunk_selector.select(siginv); hess *= -1; } } int nobs = data.size(); for(int i = 0; i < nobs; ++i){ double yi = data[i]->y(); double ni = data[i]->n(); Vector x = inc.select(data[i]->x()); double eta = nonzero_beta.dot(x); double prob = plogis(eta); ans += dbinom(yi, ni, prob, true); if(nd > 0){ const ConstVectorView x_chunk(x, start_, chunk_size_); grad.axpy(x_chunk, yi - ni*prob); if(nd > 1){ hess.add_outer(x_chunk, x_chunk, -ni * prob * (1-prob)); } } } return ans; }
static double gam0_fun(double gam, void *info){ Cluster *clust; double dg, egam, egscore; int i; double x; double location = 0.0; double scale = 1.0; int give_log = 0; clust = info; dg = clust->ytot; /* egam = exp(gam); */ for (i = 0; i < clust->n; i++){ x = gam + clust->lin[i]; dg -= clust->weight[i] * plogis(x, location, scale, 1, give_log); /* egscore = egam * exp(ex->lin[i]); dg -= ex->weights[i] * egscore / ( 1.0 + egscore); */ } return(dg); }
double LS::draw_z(bool y, double eta)const{ double trun_prob = plogis(0, eta); double u = y ? runif(trun_prob,1) : runif(0,trun_prob); return qlogis(u,eta); }
Vector StateSpaceLogitModel::one_step_holdout_prediction_errors( RNG &rng, BinomialLogitDataImputer &data_imputer, const Vector &successes, const Vector &trials, const Matrix &predictors, const Vector &final_state) { if (nrow(predictors) != successes.size() || trials.size() != successes.size()) { report_error("Size mismatch in arguments provided to " "one_step_holdout_prediction_errors."); } Vector ans(successes.size()); int t0 = dat().size(); ScalarKalmanStorage ks(state_dimension()); ks.a = *state_transition_matrix(t0 - 1) * final_state; ks.P = SpdMatrix(state_variance_matrix(t0 - 1)->dense()); // This function differs from the Gaussian case because the // response is on the binomial scale, and the state model is on // the logit scale. Because of the nonlinearity, we need to // incorporate the uncertainty about the forecast in the // prediction for the observation. We do this by imputing the // latent logit and its mixture indicator for each observation. // The strategy is (for each observation) // 1) simulate next state. // 2) simulate w_t given state // 3) kalman update state given w_t. for (int t = 0; t < ans.size(); ++t) { bool missing = false; Vector state = rmvn(ks.a, ks.P); double state_contribution = observation_matrix(t+t0).dot(state); double regression_contribution = observation_model_->predict(predictors.row(t)); double mu = state_contribution + regression_contribution; double prediction = trials[t] * plogis(mu); ans[t] = successes[t] - prediction; // ans[t] is a random draw of the one step ahead prediction // error at time t0+t given observed data to time t0+t-1. We // now proceed with the steps needed to update the Kalman filter // so we can compute ans[t+1]. double precision_weighted_sum, total_precision; std::tie(precision_weighted_sum, total_precision) = data_imputer.impute( rng, trials[t], successes[t], mu); double latent_observation = precision_weighted_sum / total_precision; double latent_variance = 1.0 / total_precision; // The latent state was drawn from its predictive distribution // given Y[t0 + t -1] and used to impute the latent data for // y[t0+t]. That latent data is now used to update the Kalman // filter for the next time period. It is important that we // discard the imputed state at this point. sparse_scalar_kalman_update(latent_observation - regression_contribution, ks.a, ks.P, ks.K, ks.F, ks.v, missing, observation_matrix(t + t0), latent_variance, *state_transition_matrix(t + t0), *state_variance_matrix(t + t0)); } return ans; }
double F77_SUB(cdflogis)(double *x, double *location, double *scale, int *lower_tail, int *give_log) { return plogis(*x, *location, *scale, *lower_tail, *give_log); }
double OrdinalLogitImputer::impute( RNG &rng, double eta, double lower_cutpoint, double upper_cutpoint) { return eta + qlogis(runif_mt( rng, plogis(lower_cutpoint - eta), plogis(upper_cutpoint - eta))); }
// Convert from the logit (log odds) scale to the probabliity scale. inline double logit_inv(double logit) { return plogis(logit); }