void betabinom(){ apop_model *beta = apop_model_set_parameters(apop_beta, 10, 5); apop_model *drawfrom = apop_model_copy(apop_multinomial); drawfrom->parameters = apop_data_falloc((2), 30, .4); drawfrom->dsize = 2; int draw_ct = 80; apop_data *draws = apop_model_draws(drawfrom, draw_ct); apop_model *betaup = apop_update(draws, beta, apop_binomial); apop_model_show(betaup); beta->more = apop_beta; beta->log_likelihood = fake_ll; apop_model *bi = apop_model_fix_params(apop_model_set_parameters(apop_binomial, 30, NAN)); apop_model *upd = apop_update(draws, beta, bi); apop_model *betaed = apop_estimate(upd->data, apop_beta); deciles(betaed, betaup, 1); beta->log_likelihood = NULL; apop_model *upd_r = apop_update(draws, beta, bi); betaed = apop_estimate(apop_data_pmf_expand(upd_r->data, 2000), apop_beta); deciles(betaed, betaup, 1); apop_data *d2 = apop_model_draws(upd, draw_ct*2); apop_model *d2m = apop_estimate(d2, apop_beta); deciles(d2m, betaup, 1); }
int main(){ gsl_rng *r = apop_rng_alloc(2468); double binom_start = 0.6; double beta_start_a = 0.3; double beta_start_b = 0.5; int i, draws = 1500; double n = 4000; //First, the easy estimation using the conjugate distribution table. apop_model *bin = apop_model_set_parameters(apop_binomial, n, binom_start); apop_model *beta = apop_model_set_parameters(apop_beta, beta_start_a, beta_start_b); apop_model *updated = apop_update(.prior= beta, .likelihood=bin,.rng=r); //Now estimate via MCMC. //Requires a one-parameter binomial, with n fixed, //and a data set of n data points with the right p. apop_model *bcopy = apop_model_set_parameters(apop_binomial, n, GSL_NAN); apop_data *bin_draws = apop_data_falloc((1,2), n*(1-binom_start), n*binom_start); bin = apop_model_fix_params(bcopy); Apop_settings_add_group(beta, apop_mcmc, .burnin=.1, .periods=1e4); apop_model *out_h = apop_update(bin_draws, beta, bin, NULL); //We now have a histogram of values for p. What's the closest beta //distribution? apop_data *d = apop_data_alloc(draws, 1); for(i=0; i < draws; i ++) apop_draw(apop_data_ptr(d, i, 0), r, out_h); apop_model *out_beta = apop_estimate(d, apop_beta); //Finally, we can compare the conjugate and Gibbs results: apop_vector_normalize(updated->parameters->vector); apop_vector_normalize(out_beta->parameters->vector); double error = apop_vector_distance(updated->parameters->vector, out_beta->parameters->vector, .metric='m'); double updated_size = apop_vector_sum(updated->parameters->vector); Apop_assert(error/updated_size < 0.01, "The error is %g, which is too big.", error/updated_size); }
void gammafish(){ printf("gamma/poisson\n"); apop_model *gamma = apop_model_set_parameters(apop_gamma, 1.5, 2.2); apop_model *drawfrom = apop_model_set_parameters(apop_poisson, 3.1); int draw_ct = 90; apop_data *draws = apop_model_draws(drawfrom, draw_ct); apop_model *gammaup = apop_update(draws, gamma, apop_poisson); apop_model_show(gammaup); gamma->more = apop_gamma; gamma->log_likelihood = fake_ll; apop_model *proposal = apop_model_fix_params(apop_model_set_parameters(apop_normal, NAN, 1)); proposal->parameters = apop_data_falloc((1), .9); //apop_data_set(apop_settings_get(gamma, apop_mcmc, proposal)->parameters, .val=.9); Apop_settings_add_group(gamma, apop_mcmc, .burnin=.1, .periods=1e4, .proposal=proposal); apop_model *upd = apop_update(draws, gamma, apop_poisson); apop_model *gammafied = apop_estimate(upd->data, apop_gamma); deciles(gammafied, gammaup, 5); //Apop_settings_add_group(beta, apop_mcmc, .burnin=.4, .periods=1e4); gamma->log_likelihood = NULL; apop_model *upd_r = apop_update(draws, gamma, apop_poisson); apop_model *gammafied2 = apop_estimate(apop_data_pmf_expand(upd_r->data, 2000), apop_gamma); deciles(gammafied2, gammaup, 5); deciles(gammafied, gammafied2, 5); }
int main() { /* This test is thanks to Nick Eriksson, who sent it to me in the form of a bug report. */ apop_data * testdata = apop_data_falloc((2, 3), 30, 50, 45, 34, 12, 17 ); apop_data * t2 = apop_test_fisher_exact(testdata); assert(fabs(apop_data_get(t2,1) - 0.0001761) < 1e-6); }
void deciles(apop_model *m1, apop_model *m2, double max){ double width = 30; for (double i=0; i< max; i+=1/width){ apop_data *x = apop_data_falloc((1), i); double L = apop_cdf(x, m1); double R = apop_cdf(x, m2); assert(fabs(L-R) < 0.18); //wide, I know. } }
int main(){ apop_data *data = apop_data_falloc((2, 2), 30, 86, 24, 38 ); double stat, chisq; stat = calc_chi_squared(data); chisq = gsl_cdf_chisq_Q(stat, (data->matrix->size1 - 1)* (data->matrix->size2 - 1)); printf("chi squared statistic: %g; p, Chi-squared: %g\n", stat,chisq); apop_data_show(apop_test_anova_independence(data)); apop_data_show(apop_test_fisher_exact(data)); }
int main(){ apop_data *locations = apop_data_falloc((5, 2), 1.1, 2.2, 4.8, 7.4, 2.9, 8.6, -1.3, 3.7, 2.9, 1.1); Apop_model_add_group(min_distance, apop_mle, .method= "NM simplex", .tolerance=1e-5); apop_model *est = apop_estimate(locations, min_distance); apop_model_show(est); }
int main(){ apop_data *d = apop_data_falloc((8,3), 1, 0, 0, .8, .1, 0, .9, 0, .1, 12, 4, 1, 0, 1, 0, 1, 2, 2, 2, 1, 2, 2, 2, 1); apop_name_add(d->names, "first", 'c'); apop_name_add(d->names, "second", 'c'); apop_name_add(d->names, "third", 'c'); apop_plot_triangle(d, "out.gnup"); }
int main(){ apop_model *uniform_20 = apop_model_set_parameters(apop_uniform, 0, 20); apop_data *d = apop_model_draws(uniform_20, 10); //Estimate a Normal distribution from the data: apop_model *N = apop_estimate(d, apop_normal); print_draws(N); //estimate a one-dimensional multivariate Normal from the data: apop_model *mvN = apop_estimate(d, apop_multivariate_normal); print_draws(mvN); //fixed parameter list: apop_model *std_normal = apop_model_set_parameters(apop_normal, 0, 1); print_draws(std_normal); //variable-size parameter list: apop_model *std_multinormal = apop_model_copy(apop_multivariate_normal); std_multinormal->msize1 = std_multinormal->msize2 = std_multinormal->vsize = std_multinormal->dsize = 3; std_multinormal->parameters = apop_data_falloc((3, 3, 3), 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1); print_draws(std_multinormal); //estimate a KDE using the defaults: apop_model *k = apop_estimate(d, apop_kernel_density); print_draws(k); /*the documentation tells us that a KDE estimation consists of filling an apop_kernel_density_settings group, so we can set it to use a Normal(μ, 2) kernel via: */ apop_model *k2 = apop_model_copy_set(apop_kernel_density, apop_kernel_density, .base_data=d, .kernel = apop_model_set_parameters(apop_normal, 0, 2)); print_draws(k2); }
void make_draws(){ apop_model *multinom = apop_model_copy(apop_multivariate_normal); multinom->parameters = apop_data_falloc((2, 2, 2), 1, 1, .1, 8, .1, 1); multinom->dsize = 2; apop_model *d1 = apop_estimate(apop_model_draws(multinom), apop_multivariate_normal); for (int i=0; i< 2; i++) for (int j=-1; j< 2; j++) assert(fabs(apop_data_get(multinom->parameters, i, j) - apop_data_get(d1->parameters, i, j)) < .25); multinom->draw = NULL; //so draw via MCMC apop_model *d2 = apop_estimate(apop_model_draws(multinom, 10000), apop_multivariate_normal); for (int i=0; i< 2; i++) for (int j=-1; j< 2; j++) assert(fabs(apop_data_get(multinom->parameters, i, j) - apop_data_get(d2->parameters, i, j)) < .25); }
int main(){ size_t ct = 5e4; //set up the model & params apop_data *params = apop_data_falloc((2,2,2), 8, 1, 0.5, 2, 0.5, 1); apop_model *pvm = apop_model_copy(apop_multivariate_normal); pvm->parameters = apop_data_copy(params); pvm->dsize = 2; apop_data *d = apop_model_draws(pvm, ct); //set up and estimate a model with fixed covariance matrix but free means gsl_vector_set_all(pvm->parameters->vector, GSL_NAN); apop_model *mep1 = apop_model_fix_params(pvm); apop_model *e1 = apop_estimate(d, mep1); //compare results printf("original params: "); apop_vector_print(params->vector); printf("estimated params: "); apop_vector_print(e1->parameters->vector); assert(apop_vector_distance(params->vector, e1->parameters->vector)<1e-2); }
int main(){ //Set up an apop_data set with only one number. //Most of these functions will only look at the first data point encountered. apop_data *onept = apop_data_falloc((1), 23); apop_model *norm = apop_model_set_parameters(apop_normal, 23, 138.8); double val = apop_cdf(onept, norm); assert(fabs(val - 0.5) < 1e-4); double tolerance = 1e-8; //Macroizing the sample routine above: #define model_val_cdf(model, value, cdf_result) { \ apop_data_set(onept, .val=(value)); \ assert(fabs((apop_cdf(onept, model))-(cdf_result))< tolerance); \ } apop_model *uni = apop_model_set_parameters(apop_uniform, 20, 26); model_val_cdf(uni, 0, 0); model_val_cdf(uni, 20, 0); model_val_cdf(uni, 21, 1./6); model_val_cdf(uni, 23, 0.5); model_val_cdf(uni, 25, 5./6); model_val_cdf(uni, 26, 1); model_val_cdf(uni, 260, 1); //Improper uniform always returns 1/2. model_val_cdf(apop_improper_uniform, 0, 0.5); model_val_cdf(apop_improper_uniform, 228, 0.5); model_val_cdf(apop_improper_uniform, INFINITY, 0.5); apop_model *binom = apop_model_set_parameters(apop_binomial, 2001, 0.5); model_val_cdf(binom, 0, 0); model_val_cdf(binom, 1000, .5); model_val_cdf(binom, 2000, 1); apop_model *bernie = apop_model_set_parameters(apop_bernoulli, 0.75); //p(0)=.25; p(1)=.75; that determines the CDF. //Notice that the CDF's integral is over a closed interval. model_val_cdf(bernie, -1, 0); model_val_cdf(bernie, 0, 0.25); model_val_cdf(bernie, 0.1, 0.25); model_val_cdf(bernie, .99, 0.25); model_val_cdf(bernie, 1, 1); model_val_cdf(bernie, INFINITY, 1); //alpha=beta -> symmetry apop_model *beta = apop_model_set_parameters(apop_beta, 2, 2); model_val_cdf(beta, -INFINITY, 0); model_val_cdf(beta, 0.5, 0.5); model_val_cdf(beta, INFINITY, 1); //This beta distribution -> uniform apop_model *beta_uni = apop_model_set_parameters(apop_beta, 1, 1); model_val_cdf(beta_uni, 0, 0); model_val_cdf(beta_uni, 1./6, 1./6); model_val_cdf(beta_uni, 0.5, 0.5); model_val_cdf(beta_uni, 1, 1); beta_uni->cdf = NULL; //With no closed-form CDF; make random draws to estimate the CDF. Apop_model_add_group(beta_uni, apop_cdf, .draws=1e6); //extra draws to improve accuracy, but we have to lower our tolerance anyway. tolerance=1e-3; model_val_cdf(beta_uni, 0, 0); model_val_cdf(beta_uni, 1./6, 1./6); model_val_cdf(beta_uni, 0.5, 0.5); model_val_cdf(beta_uni, 1, 1); //sum of three symmetric distributions: still symmetric. apop_model *sum_of_three = apop_model_mixture(beta, apop_improper_uniform, beta_uni); model_val_cdf(sum_of_three, 0.5, 0.5); apop_data *threepts = apop_data_falloc((3,1), -1, 0, 1); apop_model *kernels = apop_estimate(threepts, apop_kernel_density); model_val_cdf(kernels, -5, 0); model_val_cdf(kernels, 0, 0.5); model_val_cdf(kernels, 10, 1); }