Ejemplo n.º 1
0
int main(){
    gsl_rng *r = apop_rng_alloc(2468);
    double binom_start = 0.6;
    double beta_start_a = 0.3;
    double beta_start_b = 0.5;
    int i, draws = 1500;
    double n = 4000;
    //First, the easy estimation using the conjugate distribution table.
    apop_model *bin = apop_model_set_parameters(apop_binomial, n, binom_start);
    apop_model *beta = apop_model_set_parameters(apop_beta, beta_start_a, beta_start_b);
    apop_model *updated = apop_update(.prior= beta, .likelihood=bin,.rng=r);

    //Now estimate via Gibbs sampling. 
    //Requires a one-parameter binomial, with n fixed,
    //and a data set of n data points with the right p.
    apop_model *bcopy = apop_model_set_parameters(apop_binomial, n, GSL_NAN);
    apop_data *bin_draws = apop_data_fill(apop_data_alloc(1,2), n*(1-binom_start), n*binom_start);
    bin = apop_model_fix_params(bcopy);
    apop_model_add_group(beta, apop_update, .burnin=.1, .periods=1e4);
    apop_model *out_h = apop_update(bin_draws, beta, bin, NULL);

    //We now have a histogram of values for p. What's the closest beta
    //distribution?
    apop_data *d = apop_data_alloc(0, draws, 1);
    for(i=0; i < draws; i ++)
        apop_draw(apop_data_ptr(d, i, 0), r, out_h);
    apop_model *out_beta = apop_estimate(d, apop_beta);
    //Finally, we can compare the conjugate and Gibbs results:
    apop_vector_normalize(updated->parameters->vector);
    apop_vector_normalize(out_beta->parameters->vector);
    double error = apop_vector_distance(updated->parameters->vector, out_beta->parameters->vector, .metric='m');
    double updated_size = apop_vector_sum(updated->parameters->vector);
    Apop_assert(error/updated_size < 0.01, "The error is %g, which is too big.", error/updated_size);
}
Ejemplo n.º 2
0
void plot(apop_model *k, char *outname){
    double max = 4.3, increment = 0.01;
    apop_data *out = apop_data_alloc(max/increment+1);
    out->weights =  gsl_vector_alloc(max/increment+1);
    for (int c=0; c<max/increment; c++){
        Apop_row(out, c, a_point);
        gsl_vector_set(a_point->vector, 0, c* increment);
        gsl_vector_set(a_point->weights, 0, apop_p(a_point, k));
    }
    apop_vector_normalize(out->weights); //Q: why is this always necessary?
    apop_data_print(out, .output_name=outname);
}
Ejemplo n.º 3
0
apop_data *apop_data_pmf_expand(apop_data *in, int factor){
    apop_data *expanded = apop_data_alloc();
    apop_vector_normalize(in->weights);
    for (int i=0; i< in->weights->size;i++){
        int wt = gsl_vector_get(in->weights, i)* factor;
        if (wt){
            apop_data *next = apop_data_alloc(wt);
            gsl_vector_set_all(next->vector, apop_data_get(in, i));
            apop_data_stack(expanded, next, .inplace='y');
        }
    }
    if (expanded->vector) return expanded;
    else return NULL;
}
Ejemplo n.º 4
0
int main(){
    apop_db_open("data-climate.db");

    apop_data *data = apop_query_to_data("select pcp from precip");
    apop_data_pmf_compress(data); //creates a weights vector
    apop_vector_normalize(data->weights);
    apop_data_sort(data);
    apop_model *pmf = apop_estimate(data, apop_pmf);
    FILE *outfile = fopen("out.h", "w");
    apop_model_print(pmf, outfile);
    apop_model *kernel = apop_model_set_parameters(apop_normal, 0., 0.1);
    apop_model *k = apop_model_copy(apop_kernel_density);
    Apop_settings_add_group(k, apop_kernel_density, .base_pmf=pmf, .kernel=kernel);
    plot(k, "out.k");
    printf("plot 'out.h' with lines title 'data', 'out.k' with lines title 'smoothed'\n");
}
Ejemplo n.º 5
0
/** Make random draws from an \ref apop_model, and bin them using a binspec in the style
 of \ref apop_data_to_bins. If you have a data set that used the same binspec, you now have synced histograms, which you can plot or sensibly test hypotheses about.

The output is normalized to integrate to one.

\param binspec A description of the bins in which to place the draws; see \ref apop_data_to_bins. (default: as in \ref apop_data_to_bins.)
\param model The model to be drawn from. Because this function works via random draws, the model needs to have a 
\c draw method. (No default)
\param draws The number of random draws to make. (arbitrary default = 10,000)
\param bin_count If no bin spec, the number of bins to use (default: as per \ref apop_data_to_bins, \f$\sqrt(N)\f$)
\param rng The \c gsl_rng used to make random draws. (default: see note on \ref autorng)

\return An \ref apop_pmf model.

\li This function uses the \ref designated syntax for inputs.

\ingroup histograms
*/
APOP_VAR_HEAD apop_model *apop_model_to_pmf(apop_model *model, apop_data *binspec, long int draws, int bin_count, gsl_rng *rng){
    apop_model* apop_varad_var(model, NULL);
    Apop_assert(model && model->draw, "The second argument needs to be an apop_model with a 'draw' function "
                              "that I can use to make random draws.");
    apop_data* apop_varad_var(binspec, NULL);
    int apop_varad_var(bin_count, 0);
    long int apop_varad_var(draws, 1e4);
    gsl_rng *apop_varad_var(rng, NULL)
    static gsl_rng *spare = NULL;
    if (!rng && !spare) 
        spare = apop_rng_alloc(++apop_opts.rng_seed);
    if (!rng) rng = spare;
APOP_VAR_ENDHEAD
    Get_vmsizes(binspec);
    apop_data *outd = apop_data_alloc(draws, model->dsize); 
    for (long int i=0; i< draws; i++){
        Apop_row(outd, i, ach);
        apop_draw(ach->data, rng, model);
    }
    apop_data *outbinned = apop_data_to_bins(outd, binspec, .bin_count=bin_count);
    apop_data_free(outd);
    apop_vector_normalize(outbinned->weights);
    return apop_estimate(outbinned, apop_pmf);
}