//---------------------------------------------------------------------- void ProbitBartPosteriorSampler::impute_latent_data_point(DataType *data) { double eta = data->prediction(); int n = data->n(); int number_positive = data->y(); int number_negative = n - number_positive; double sum_of_probits = 0; if (number_positive > 5) { double mean = 0; double variance = 1; trun_norm_moments(eta, 1, 0, true, &mean, &variance); sum_of_probits += rnorm_mt(rng(), number_positive * mean, sqrt(number_positive * variance)); } else { for (int i = 0; i < number_positive; ++i) { sum_of_probits += rtrun_norm_mt(rng(), eta, 1, 0, true); } } if (number_negative > 5) { double mean = 0; double variance = 1; trun_norm_moments(eta, 1, 0, false, &mean, &variance); sum_of_probits += rnorm_mt(rng(), number_negative * mean, sqrt(number_negative * variance)); } else { for (int i = 0; i < number_negative; ++i) { sum_of_probits += rtrun_norm_mt(rng(), eta, 1, 0, false); } } data->set_sum_of_residuals(sum_of_probits - (n * eta)); }
//---------------------------------------------------------------------- std::pair<double, double> BinomialLogitCltDataImputer::impute_large_sample( RNG &rng, double number_of_trials, double number_of_successes, double linear_predictor) const { double information = 0.0; const Vector &mixing_weights(mixture_approximation.weights()); const Vector &sigma(mixture_approximation.sigma()); double negative_logit_support = plogis(0, linear_predictor, 1, true); double positive_logit_support = plogis(0, linear_predictor, 1, false); Vector p0 = mixing_weights / negative_logit_support; Vector p1 = mixing_weights / positive_logit_support; for (int m = 0; m < mixture_approximation.dim(); ++m) { p0[m] *= pnorm(0, linear_predictor, sigma[m], true); p1[m] *= pnorm(0, linear_predictor, sigma[m], false); } // p0 is the probability distribution over the mixture component // indicators for the failures. N0 is the count of the number of // failures belonging to each mixture component. std::vector<int> N0 = rmultinom_mt(rng, number_of_trials - number_of_successes, p0 / sum(p0)); // p1 is the probability distribution over the mixture component // indicators for the successes. N1 is the count of the number // of successes in each mixture component. std::vector<int> N1 = rmultinom_mt(rng, number_of_successes, p1 / sum(p1)); double simulation_mean = 0; double simulation_variance = 0; for (int m = 0; m < N0.size(); ++m) { int total_obs = N0[m] + N1[m]; if (total_obs == 0) { continue; } double sigsq = square(sigma[m]); double sig4 = square(sigsq); information += total_obs / sigsq; double truncated_normal_mean; double truncated_normal_variance; double cutpoint = 0; if (N0[m] > 0) { trun_norm_moments(linear_predictor, sigma[m], cutpoint, false, &truncated_normal_mean, &truncated_normal_variance); simulation_mean += N0[m] * truncated_normal_mean / sigsq; simulation_variance += N0[m] * truncated_normal_variance / sig4; } if (N1[m] > 0) { trun_norm_moments(linear_predictor, sigma[m], cutpoint, true, &truncated_normal_mean, &truncated_normal_variance); simulation_mean += N1[m] * truncated_normal_mean / sigsq; simulation_variance += N1[m] * truncated_normal_variance / sig4; } } double information_weighted_sum = rnorm_mt(rng, simulation_mean, sqrt(simulation_variance)); return std::make_pair(information_weighted_sum, information); }
double BinomialProbitDataImputer::impute(RNG &rng, double number_of_trials, double number_of_successes, double eta) const { int64_t n = lround(number_of_trials); int64_t y = lround(number_of_successes); if (y < 0 || n < 0) { report_error( "Negative values not allowed in " "BinomialProbitDataImputer::impute()."); } if (y > n) { report_error( "Success count exceeds trial count in " "BinomialProbitDataImputer::impute."); } double mean, variance; double ans = 0; if (y > clt_threshold_) { trun_norm_moments(eta, 1, 0, true, &mean, &variance); // If we draw y deviates from the same truncated normal and add // them up we'll have a normal with mean (y * mean) and variance // (y * variance). ans += rnorm_mt(rng, y * mean, sqrt(y * variance)); } else { for (int i = 0; i < y; ++i) { // TODO: If y is large-ish but not quite // clt_threshold_ then we might waste some time here // constantly rebuilding the same TnSampler object. ans += rtrun_norm_mt(rng, eta, 1, 0, true); } } if (n - y > clt_threshold_) { trun_norm_moments(eta, 1, 0, false, &mean, &variance); ans += rnorm_mt(rng, (n - y) * mean, sqrt((n - y) * variance)); } else { for (int i = 0; i < n - y; ++i) { ans += rtrun_norm_mt(rng, eta, 1, 0, false); } } return ans; }