int main(){ gsl_rng *r = apop_rng_alloc(2468); double binom_start = 0.6; double beta_start_a = 0.3; double beta_start_b = 0.5; int i, draws = 1500; double n = 4000; //First, the easy estimation using the conjugate distribution table. apop_model *bin = apop_model_set_parameters(apop_binomial, n, binom_start); apop_model *beta = apop_model_set_parameters(apop_beta, beta_start_a, beta_start_b); apop_model *updated = apop_update(.prior= beta, .likelihood=bin,.rng=r); //Now estimate via Gibbs sampling. //Requires a one-parameter binomial, with n fixed, //and a data set of n data points with the right p. apop_model *bcopy = apop_model_set_parameters(apop_binomial, n, GSL_NAN); apop_data *bin_draws = apop_data_fill(apop_data_alloc(1,2), n*(1-binom_start), n*binom_start); bin = apop_model_fix_params(bcopy); apop_model_add_group(beta, apop_update, .burnin=.1, .periods=1e4); apop_model *out_h = apop_update(bin_draws, beta, bin, NULL); //We now have a histogram of values for p. What's the closest beta //distribution? apop_data *d = apop_data_alloc(0, draws, 1); for(i=0; i < draws; i ++) apop_draw(apop_data_ptr(d, i, 0), r, out_h); apop_model *out_beta = apop_estimate(d, apop_beta); //Finally, we can compare the conjugate and Gibbs results: apop_vector_normalize(updated->parameters->vector); apop_vector_normalize(out_beta->parameters->vector); double error = apop_vector_distance(updated->parameters->vector, out_beta->parameters->vector, .metric='m'); double updated_size = apop_vector_sum(updated->parameters->vector); Apop_assert(error/updated_size < 0.01, "The error is %g, which is too big.", error/updated_size); }
int main(){ apop_data *d = apop_text_alloc(apop_data_alloc(6), 6, 1); apop_data_fill(d, 1, 2, 3, 3, 1, 2); apop_text_fill(d, "A", "A", "A", "A", "A", "B"); asprintf(&d->names->title, "Original data set"); printdata(d); //binned, where bin ends are equidistant but not necessarily in the data apop_data *binned = apop_data_to_bins(d, NULL); asprintf(&binned->names->title, "Post binning"); printdata(binned); assert(apop_sum(binned->weights)==6); assert(fabs(//equal distance between bins (apop_data_get(binned, 1, -1) - apop_data_get(binned, 0, -1)) - (apop_data_get(binned, 2, -1) - apop_data_get(binned, 1, -1))) < 1e-5); //compressed, where the data is as in the original, but weights //are redome to accommodate repeated observations. apop_data_pmf_compress(d); asprintf(&d->names->title, "Post compression"); printdata(d); assert(apop_sum(d->weights)==6); apop_model *d_as_pmf = apop_estimate(d, apop_pmf); Apop_row(d, 0, firstrow); //1A assert(fabs(apop_p(firstrow, d_as_pmf) - 2./6 < 1e-5)); }
int main(){ gsl_rng *r = apop_rng_alloc(10); size_t i, ct = 5e4; //set up the model & params apop_data *d = apop_data_alloc(ct,2); apop_data *params = apop_data_alloc(2,2,2); apop_data_fill(params, 8, 1, 0.5, 2, 0.5, 1); apop_model *pvm = apop_model_copy(apop_multivariate_normal); pvm->parameters = apop_data_copy(params); //make random draws from the multivar. normal //this `pull a row view, fill its data element' form works for rows but not cols. for(i=0; i< ct; i++){ Apop_row(d, i, onerow); apop_draw(onerow->data, r, pvm); } //set up and estimate a model with fixed covariance matrix but free means gsl_vector_set_all(pvm->parameters->vector, GSL_NAN); apop_model *mep1 = apop_model_fix_params(pvm); apop_model *e1 = apop_estimate(d, *mep1); //compare results printf("original params: "); apop_vector_show(params->vector); printf("estimated params: "); apop_vector_show(e1->parameters->vector); }