int main(){ gsl_rng *r = apop_rng_alloc(10); size_t i, ct = 5e4; //set up the model & params apop_data *d = apop_data_alloc(ct,2); apop_data *params = apop_data_alloc(2,2,2); apop_data_fill(params, 8, 1, 0.5, 2, 0.5, 1); apop_model *pvm = apop_model_copy(apop_multivariate_normal); pvm->parameters = apop_data_copy(params); //make random draws from the multivar. normal //this `pull a row view, fill its data element' form works for rows but not cols. for(i=0; i< ct; i++){ Apop_row(d, i, onerow); apop_draw(onerow->data, r, pvm); } //set up and estimate a model with fixed covariance matrix but free means gsl_vector_set_all(pvm->parameters->vector, GSL_NAN); apop_model *mep1 = apop_model_fix_params(pvm); apop_model *e1 = apop_estimate(d, *mep1); //compare results printf("original params: "); apop_vector_show(params->vector); printf("estimated params: "); apop_vector_show(e1->parameters->vector); }
int main(){ gsl_rng *r = apop_rng_alloc(2468); double binom_start = 0.6; double beta_start_a = 0.3; double beta_start_b = 0.5; int i, draws = 1500; double n = 4000; //First, the easy estimation using the conjugate distribution table. apop_model *bin = apop_model_set_parameters(apop_binomial, n, binom_start); apop_model *beta = apop_model_set_parameters(apop_beta, beta_start_a, beta_start_b); apop_model *updated = apop_update(.prior= beta, .likelihood=bin,.rng=r); //Now estimate via Gibbs sampling. //Requires a one-parameter binomial, with n fixed, //and a data set of n data points with the right p. apop_model *bcopy = apop_model_set_parameters(apop_binomial, n, GSL_NAN); apop_data *bin_draws = apop_data_fill(apop_data_alloc(1,2), n*(1-binom_start), n*binom_start); bin = apop_model_fix_params(bcopy); apop_model_add_group(beta, apop_update, .burnin=.1, .periods=1e4); apop_model *out_h = apop_update(bin_draws, beta, bin, NULL); //We now have a histogram of values for p. What's the closest beta //distribution? apop_data *d = apop_data_alloc(0, draws, 1); for(i=0; i < draws; i ++) apop_draw(apop_data_ptr(d, i, 0), r, out_h); apop_model *out_beta = apop_estimate(d, apop_beta); //Finally, we can compare the conjugate and Gibbs results: apop_vector_normalize(updated->parameters->vector); apop_vector_normalize(out_beta->parameters->vector); double error = apop_vector_distance(updated->parameters->vector, out_beta->parameters->vector, .metric='m'); double updated_size = apop_vector_sum(updated->parameters->vector); Apop_assert(error/updated_size < 0.01, "The error is %g, which is too big.", error/updated_size); }
//Use this function to produce test data below. apop_data *draw_exponentiated_normal(double mu, double sigma, double draws){ apop_model *n01 = apop_model_set_parameters(apop_normal, mu, sigma); apop_data *d = apop_data_alloc(draws); gsl_rng *r = apop_rng_alloc(13); for (int i=0; i< draws; i++) apop_draw(gsl_vector_ptr(d->vector,i), r, n01); apop_vector_exp(d->vector); return d; }
//generate a vector that is the original vector + noise void add_noise(gsl_vector *in, gsl_rng *r, double size){ apop_model *nnoise = apop_model_set_parameters(apop_normal, 0, size); for (int i=0; i< in->size; i++){ double noise; apop_draw(&noise, r, nnoise); apop_vector_increment(in, i, noise); } apop_model_free(nnoise); }
int main(){ gsl_rng *r = apop_rng_alloc(2312311); int empirical_size = 5e3; apop_model *expo = apop_model_set_parameters(apop_exponential, 1.7); assert (apop_kl_divergence(expo, expo) < 1e-4); apop_data *empirical = apop_data_alloc(empirical_size, 1); for (int i=0; i<empirical_size; i++) apop_draw(apop_data_ptr(empirical, i, 0), r, expo); apop_model *pmf = apop_estimate(empirical, apop_pmf); assert(apop_kl_divergence(pmf,expo) < 1e-4); apop_data_free(empirical); }
apop_model *fuzz(apop_model sim){ int draws = 100; gsl_rng *r = apop_rng_alloc(1); apop_model *prior = apop_model_cross( apop_model_set_parameters(apop_normal, 10, 2), apop_model_set_parameters(apop_normal, 10, 2)); apop_data *outdata = apop_data_alloc(draws, weibull->vsize); double *params = sim.parameters->vector->data; for (int i=0; i< draws; i++){ do { apop_draw(params, r, prior); } while (params[1]*2 > pow(params[0], 2)); sim.dsize=params[1]; apop_model *est = apop_estimate(apop_model_draws(&sim, 1000), weibull); Apop_row_v(outdata, i, onerow); gsl_vector_memcpy(onerow, est->parameters->vector); apop_model_free(est); } return apop_estimate(outdata, apop_pmf); }
int main(){ //bind together a Poisson and a Normal; //make a draw producing a 2-element vector apop_model *m1 = apop_model_set_parameters(apop_poisson, 3); apop_model *m2 = apop_model_set_parameters(apop_normal, -5, 1); apop_model *mm = apop_model_stack(m1, m2); int len = 1e5; gsl_rng *r = apop_rng_alloc(1); apop_data *draws = apop_data_alloc(len, 2); for (int i=0; i< len; i++){ Apop_row (draws, i, onev); apop_draw(onev->data, r, mm); assert((int)onev->data[0] == onev->data[0]); assert(onev->data[1]<0); } //The rest of the test script recovers the parameters. //First, set up a two-page data set: poisson data on p1, Normal on p2: apop_data *comeback = apop_data_alloc(); Apop_col(draws, 0,fishdraws) comeback->vector = apop_vector_copy(fishdraws); apop_data_add_page(comeback, apop_data_alloc(), "p2"); Apop_col(draws, 1, meandraws) comeback->more->vector = apop_vector_copy(meandraws); //set up the un-parameterized stacked model, including //the name at which to split the data set apop_model *estme = apop_model_stack(apop_model_copy(apop_poisson), apop_model_copy(apop_normal)); Apop_settings_add(estme, apop_stack, splitpage, "p2"); apop_model *ested = apop_estimate(comeback, *estme); //test that the parameters are as promised. apop_model *m1back = apop_settings_get(ested, apop_stack, model1); apop_model *m2back = apop_settings_get(ested, apop_stack, model2); assert(fabs(apop_data_get(m1back->parameters, .col=-1) - 3) < 1e-2); assert(fabs(apop_data_get(m2back->parameters, .col=-1) - -5) < 1e-2); assert(fabs(apop_data_get(m2back->parameters, .col=-1, .row=1) - 1) < 1e-2); }
/** Make random draws from an \ref apop_model, and bin them using a binspec in the style of \ref apop_data_to_bins. If you have a data set that used the same binspec, you now have synced histograms, which you can plot or sensibly test hypotheses about. The output is normalized to integrate to one. \param binspec A description of the bins in which to place the draws; see \ref apop_data_to_bins. (default: as in \ref apop_data_to_bins.) \param model The model to be drawn from. Because this function works via random draws, the model needs to have a \c draw method. (No default) \param draws The number of random draws to make. (arbitrary default = 10,000) \param bin_count If no bin spec, the number of bins to use (default: as per \ref apop_data_to_bins, \f$\sqrt(N)\f$) \param rng The \c gsl_rng used to make random draws. (default: see note on \ref autorng) \return An \ref apop_pmf model. \li This function uses the \ref designated syntax for inputs. \ingroup histograms */ APOP_VAR_HEAD apop_model *apop_model_to_pmf(apop_model *model, apop_data *binspec, long int draws, int bin_count, gsl_rng *rng){ apop_model* apop_varad_var(model, NULL); Apop_assert(model && model->draw, "The second argument needs to be an apop_model with a 'draw' function " "that I can use to make random draws."); apop_data* apop_varad_var(binspec, NULL); int apop_varad_var(bin_count, 0); long int apop_varad_var(draws, 1e4); gsl_rng *apop_varad_var(rng, NULL) static gsl_rng *spare = NULL; if (!rng && !spare) spare = apop_rng_alloc(++apop_opts.rng_seed); if (!rng) rng = spare; APOP_VAR_ENDHEAD Get_vmsizes(binspec); apop_data *outd = apop_data_alloc(draws, model->dsize); for (long int i=0; i< draws; i++){ Apop_row(outd, i, ach); apop_draw(ach->data, rng, model); } apop_data *outbinned = apop_data_to_bins(outd, binspec, .bin_count=bin_count); apop_data_free(outd); apop_vector_normalize(outbinned->weights); return apop_estimate(outbinned, apop_pmf); }
static long double wishart_ll(apop_data *in, apop_model *m){ Nullcheck_mpd(in, m, GSL_NAN); wishartstruct_t ws = { .paraminv = apop_matrix_inverse(m->parameters->matrix), .len = sqrt(in->matrix->size2), .df = m->parameters->vector->data[0] }; double paramdet = apop_matrix_determinant(m->parameters->matrix); if (paramdet < 1e-3) return GSL_NEGINF; double ll = apop_map_sum(in, .fn_vp = one_wishart_row, .param=&ws, .part='r'); double k = log(ws.df)*ws.df/2.; k -= M_LN2 * ws.len* ws.df/2.; k -= log(paramdet) * ws.df/2.; k -= apop_multivariate_lngamma(ws.df/2., ws.len); return ll + k*in->matrix->size1; } static int apop_wishart_draw(double *out, gsl_rng *r, apop_model *m){ /* Translated from the Fortran by BK. Fortran comments: C SUBROUTINE DWSHRT(D, N, NP, NNP, SB, SA) C C ALGORITHM AS 53 APPL. STATIST. (1972) VOL.21, NO.3 C C Wishart variate generator. On output, SA is an upper-triangular C matrix of size NP * NP [...] C whose elements have a Wishart(N, SIGMA) distribution. */ Nullcheck_mp(m, ); int np = m->parameters->matrix->size1; int n = m->parameters->vector->data[0]; if (!m->more) { gsl_matrix *ccc = apop_matrix_copy(m->parameters->matrix); gsl_linalg_cholesky_decomp(ccc); for (int i=0; i < ccc->size1; i++) //zero out the upper diagonal for (int j=i+1; j < ccc->size2; j++) gsl_matrix_set(ccc, i, j, 0); m->more = apop_matrix_to_data(ccc); m->more_size = sizeof(apop_data); } apop_data *Chol = m->more; apop_data *rmatrix = apop_data_calloc(np, np); Staticdef(apop_model *, std_normal, apop_model_set_parameters(apop_normal, 0, 1)); //C Load diagonal elements with square root of chi-square variates for(int i = 0; i< np; i++){ int DF = n - i; apop_data_set(rmatrix, i, i, sqrt(gsl_ran_chisq(r, DF))); } for(int i = 1; i< np; i++) //off-diagonal triangles: Normals. for(int j = 0; j< i; j++){ double ndraw; apop_draw(&ndraw, r, std_normal); assert (!gsl_isnan(ndraw)); apop_data_set(rmatrix, i, j, ndraw); } //Now find C * rand * rand' * C' apop_data *cr = apop_dot(Chol, rmatrix); apop_data *crr = apop_dot(cr, rmatrix, .form2='t'); apop_data *crrc = apop_dot(crr, Chol, .form2='t'); memmove(out, crrc->matrix->data, sizeof(double)*np*np); apop_data_free(rmatrix); apop_data_free(cr); apop_data_free(crrc); apop_data_free(crr); return 0; }