示例#1
0
void betabinom(){
    apop_model *beta = apop_model_set_parameters(apop_beta, 10, 5);

    apop_model *drawfrom = apop_model_copy(apop_multinomial);
    drawfrom->parameters = apop_data_falloc((2), 30, .4);
    drawfrom->dsize = 2;
    int draw_ct = 80;
    apop_data *draws = apop_model_draws(drawfrom, draw_ct);

    apop_model *betaup = apop_update(draws, beta, apop_binomial);
    apop_model_show(betaup);

    beta->more = apop_beta;
    beta->log_likelihood = fake_ll;
    apop_model *bi = apop_model_fix_params(apop_model_set_parameters(apop_binomial, 30, NAN));
    apop_model *upd = apop_update(draws, beta, bi);
    apop_model *betaed = apop_estimate(upd->data, apop_beta);
    deciles(betaed, betaup, 1);

    beta->log_likelihood = NULL;
    apop_model *upd_r = apop_update(draws, beta, bi);
    betaed = apop_estimate(apop_data_pmf_expand(upd_r->data, 2000), apop_beta);
    deciles(betaed, betaup, 1);

    apop_data *d2 = apop_model_draws(upd, draw_ct*2);
    apop_model *d2m = apop_estimate(d2, apop_beta);
    deciles(d2m, betaup, 1);
}
示例#2
0
int main(){
    gsl_rng *r = apop_rng_alloc(2468);
    double binom_start = 0.6;
    double beta_start_a = 0.3;
    double beta_start_b = 0.5;
    int i, draws = 1500;
    double n = 4000;
    //First, the easy estimation using the conjugate distribution table.
    apop_model *bin = apop_model_set_parameters(apop_binomial, n, binom_start);
    apop_model *beta = apop_model_set_parameters(apop_beta, beta_start_a, beta_start_b);
    apop_model *updated = apop_update(.prior= beta, .likelihood=bin,.rng=r);

    //Now estimate via MCMC. 
    //Requires a one-parameter binomial, with n fixed,
    //and a data set of n data points with the right p.
    apop_model *bcopy = apop_model_set_parameters(apop_binomial, n, GSL_NAN);
    apop_data *bin_draws = apop_data_falloc((1,2), n*(1-binom_start), n*binom_start);
    bin = apop_model_fix_params(bcopy);
    Apop_settings_add_group(beta, apop_mcmc, .burnin=.1, .periods=1e4);
    apop_model *out_h = apop_update(bin_draws, beta, bin, NULL);

    //We now have a histogram of values for p. What's the closest beta
    //distribution?
    apop_data *d = apop_data_alloc(draws, 1);
    for(i=0; i < draws; i ++)
        apop_draw(apop_data_ptr(d, i, 0), r, out_h);
    apop_model *out_beta = apop_estimate(d, apop_beta);
    //Finally, we can compare the conjugate and Gibbs results:
    apop_vector_normalize(updated->parameters->vector);
    apop_vector_normalize(out_beta->parameters->vector);
    double error = apop_vector_distance(updated->parameters->vector, out_beta->parameters->vector, .metric='m');
    double updated_size = apop_vector_sum(updated->parameters->vector);
    Apop_assert(error/updated_size < 0.01, "The error is %g, which is too big.", error/updated_size);
}
示例#3
0
void gammafish(){
    printf("gamma/poisson\n");
    apop_model *gamma = apop_model_set_parameters(apop_gamma, 1.5, 2.2);

    apop_model *drawfrom = apop_model_set_parameters(apop_poisson, 3.1);
    int draw_ct = 90;
    apop_data *draws = apop_model_draws(drawfrom, draw_ct);

    apop_model *gammaup = apop_update(draws, gamma, apop_poisson);
    apop_model_show(gammaup);

    gamma->more = apop_gamma;
    gamma->log_likelihood = fake_ll;
    apop_model *proposal = apop_model_fix_params(apop_model_set_parameters(apop_normal, NAN, 1));
    proposal->parameters = apop_data_falloc((1), .9);
    //apop_data_set(apop_settings_get(gamma, apop_mcmc, proposal)->parameters, .val=.9);
    Apop_settings_add_group(gamma, apop_mcmc, .burnin=.1, .periods=1e4, .proposal=proposal);
    apop_model *upd = apop_update(draws, gamma, apop_poisson);
    apop_model *gammafied = apop_estimate(upd->data, apop_gamma);
    deciles(gammafied, gammaup, 5);
    //Apop_settings_add_group(beta, apop_mcmc, .burnin=.4, .periods=1e4);
    gamma->log_likelihood = NULL;
    apop_model *upd_r = apop_update(draws, gamma, apop_poisson);
    apop_model *gammafied2 = apop_estimate(apop_data_pmf_expand(upd_r->data, 2000), apop_gamma);
    deciles(gammafied2, gammaup, 5);
    deciles(gammafied, gammafied2, 5);
}
示例#4
0
int main() {
    /* This test is thanks to Nick Eriksson, who sent it to me in the form of a bug report. */
    apop_data * testdata = apop_data_falloc((2, 3),
                              30, 50, 45, 
                              34, 12, 17 );
    apop_data * t2 = apop_test_fisher_exact(testdata);
    assert(fabs(apop_data_get(t2,1) - 0.0001761) < 1e-6);
}
示例#5
0
void deciles(apop_model *m1, apop_model *m2, double max){
    double width = 30;
    for (double i=0; i< max; i+=1/width){
        apop_data *x = apop_data_falloc((1), i);
        double L = apop_cdf(x, m1);
        double R = apop_cdf(x, m2);
        assert(fabs(L-R) < 0.18); //wide, I know.
    }
}
示例#6
0
int main(){
    apop_data *data = apop_data_falloc((2, 2),
                           30, 86,
                           24, 38 );
    double stat, chisq;
    stat = calc_chi_squared(data);
    chisq = gsl_cdf_chisq_Q(stat, (data->matrix->size1 - 1)* (data->matrix->size2 - 1));
    printf("chi squared statistic: %g; p, Chi-squared: %g\n", stat,chisq);
    apop_data_show(apop_test_anova_independence(data));
    apop_data_show(apop_test_fisher_exact(data));
}
int main(){
    apop_data *locations = apop_data_falloc((5, 2),
                            1.1, 2.2,
                            4.8, 7.4,
                            2.9, 8.6,
                            -1.3, 3.7,
                            2.9, 1.1);
    Apop_model_add_group(min_distance, apop_mle, .method= "NM simplex",
                                                  .tolerance=1e-5);
    apop_model *est = apop_estimate(locations, min_distance);
    apop_model_show(est);
}
示例#8
0
int main(){
    apop_data *d = apop_data_falloc((8,3),   
            1,  0,  0,
           .8, .1,  0,
           .9,  0, .1,
           12,  4,  1,
            0,  1,  0,
            1,  2,  2,
            2,  1,  2,
            2,  2,  1);
    apop_name_add(d->names, "first", 'c');
    apop_name_add(d->names, "second", 'c');
    apop_name_add(d->names, "third", 'c');
    apop_plot_triangle(d, "out.gnup");
}
示例#9
0
int main(){
    apop_model *uniform_20 = apop_model_set_parameters(apop_uniform, 0, 20);
    apop_data *d = apop_model_draws(uniform_20, 10);

    //Estimate a Normal distribution from the data:
    apop_model *N = apop_estimate(d, apop_normal);
    print_draws(N);

    //estimate a one-dimensional multivariate Normal from the data:
    apop_model *mvN = apop_estimate(d, apop_multivariate_normal);
    print_draws(mvN);


    //fixed parameter list:
    apop_model *std_normal = apop_model_set_parameters(apop_normal, 0, 1);
    print_draws(std_normal);

    //variable-size parameter list:
    apop_model *std_multinormal = apop_model_copy(apop_multivariate_normal);
    std_multinormal->msize1 =
    std_multinormal->msize2 =
    std_multinormal->vsize =
    std_multinormal->dsize = 3;
    std_multinormal->parameters = apop_data_falloc((3, 3, 3),
                                1,  1, 0, 0, 
                                1,  0, 1, 0,
                                1,  0, 0, 1);
    print_draws(std_multinormal);


    //estimate a KDE using the defaults:
    apop_model *k = apop_estimate(d, apop_kernel_density);
    print_draws(k);

    /*the documentation tells us that a KDE estimation consists of filling 
      an apop_kernel_density_settings group, so we can set it to use a 
      Normal(μ, 2) kernel via: */

    apop_model *k2 = apop_model_copy_set(apop_kernel_density, apop_kernel_density, 
                         .base_data=d,
                         .kernel = apop_model_set_parameters(apop_normal, 0, 2));
    print_draws(k2);
}
示例#10
0
void make_draws(){
    apop_model *multinom = apop_model_copy(apop_multivariate_normal);
    multinom->parameters = apop_data_falloc((2, 2, 2), 
                                        1,  1, .1,
                                        8, .1,  1);
    multinom->dsize = 2;

    apop_model *d1 = apop_estimate(apop_model_draws(multinom), apop_multivariate_normal);
    for (int i=0; i< 2; i++)
        for (int j=-1; j< 2; j++)
            assert(fabs(apop_data_get(multinom->parameters, i, j)
                    - apop_data_get(d1->parameters, i, j)) < .25);
    multinom->draw = NULL; //so draw via MCMC
    apop_model *d2 = apop_estimate(apop_model_draws(multinom, 10000), apop_multivariate_normal);
    for (int i=0; i< 2; i++)
        for (int j=-1; j< 2; j++)
            assert(fabs(apop_data_get(multinom->parameters, i, j)
                    - apop_data_get(d2->parameters, i, j)) < .25);
}
示例#11
0
int main(){
    size_t ct = 5e4;

    //set up the model & params
    apop_data *params = apop_data_falloc((2,2,2), 8,  1, 0.5,
                                                  2,  0.5, 1);
    apop_model *pvm = apop_model_copy(apop_multivariate_normal);
    pvm->parameters = apop_data_copy(params);
    pvm->dsize = 2;
    apop_data *d = apop_model_draws(pvm, ct);

    //set up and estimate a model with fixed covariance matrix but free means
    gsl_vector_set_all(pvm->parameters->vector, GSL_NAN);
    apop_model *mep1 = apop_model_fix_params(pvm);
    apop_model *e1 = apop_estimate(d, mep1);
    
    //compare results
    printf("original params: ");
    apop_vector_print(params->vector);
    printf("estimated params: ");
    apop_vector_print(e1->parameters->vector);
    assert(apop_vector_distance(params->vector, e1->parameters->vector)<1e-2); 
}
示例#12
0
文件: some_cdfs.c 项目: b-k/apophenia
int main(){
    //Set up an apop_data set with only one number.
    //Most of these functions will only look at the first data point encountered.
    apop_data *onept = apop_data_falloc((1), 23);

    apop_model *norm = apop_model_set_parameters(apop_normal, 23, 138.8);
    double val = apop_cdf(onept, norm);
    assert(fabs(val - 0.5) < 1e-4);

    double tolerance = 1e-8;
    //Macroizing the sample routine above:
    #define model_val_cdf(model, value, cdf_result) {   \
        apop_data_set(onept, .val=(value));             \
        assert(fabs((apop_cdf(onept, model))-(cdf_result))< tolerance);   \
    }

    apop_model *uni = apop_model_set_parameters(apop_uniform, 20, 26);
    model_val_cdf(uni, 0, 0);
    model_val_cdf(uni, 20, 0);
    model_val_cdf(uni, 21, 1./6);
    model_val_cdf(uni, 23, 0.5);
    model_val_cdf(uni, 25, 5./6);
    model_val_cdf(uni, 26, 1);
    model_val_cdf(uni, 260, 1);

    //Improper uniform always returns 1/2.
    model_val_cdf(apop_improper_uniform, 0, 0.5);
    model_val_cdf(apop_improper_uniform, 228, 0.5);
    model_val_cdf(apop_improper_uniform, INFINITY, 0.5);

    apop_model *binom = apop_model_set_parameters(apop_binomial, 2001, 0.5);
    model_val_cdf(binom, 0, 0);
    model_val_cdf(binom, 1000, .5);
    model_val_cdf(binom, 2000, 1);

    apop_model *bernie = apop_model_set_parameters(apop_bernoulli, 0.75);
    //p(0)=.25; p(1)=.75; that determines the CDF.
    //Notice that the CDF's integral is over a closed interval.
    model_val_cdf(bernie, -1, 0);
    model_val_cdf(bernie, 0, 0.25);
    model_val_cdf(bernie, 0.1, 0.25);
    model_val_cdf(bernie, .99, 0.25);
    model_val_cdf(bernie, 1, 1);
    model_val_cdf(bernie, INFINITY, 1);

    //alpha=beta -> symmetry
    apop_model *beta = apop_model_set_parameters(apop_beta, 2, 2);
    model_val_cdf(beta, -INFINITY, 0);
    model_val_cdf(beta, 0.5, 0.5);
    model_val_cdf(beta, INFINITY, 1);

    //This beta distribution -> uniform
    apop_model *beta_uni = apop_model_set_parameters(apop_beta, 1, 1);
    model_val_cdf(beta_uni, 0, 0);
    model_val_cdf(beta_uni, 1./6, 1./6);
    model_val_cdf(beta_uni, 0.5, 0.5);
    model_val_cdf(beta_uni, 1, 1);


    beta_uni->cdf = NULL; //With no closed-form CDF; make random draws to estimate the CDF.
    Apop_model_add_group(beta_uni, apop_cdf, .draws=1e6); //extra draws to improve accuracy, but we have to lower our tolerance anyway.
    tolerance=1e-3;
    model_val_cdf(beta_uni, 0, 0);
    model_val_cdf(beta_uni, 1./6, 1./6);
    model_val_cdf(beta_uni, 0.5, 0.5);
    model_val_cdf(beta_uni, 1, 1);


    //sum of three symmetric distributions: still symmetric.
    apop_model *sum_of_three = apop_model_mixture(beta, apop_improper_uniform, beta_uni);
    model_val_cdf(sum_of_three, 0.5, 0.5);


    apop_data *threepts = apop_data_falloc((3,1), -1, 0, 1);
    apop_model *kernels = apop_estimate(threepts, apop_kernel_density);
    model_val_cdf(kernels, -5, 0);
    model_val_cdf(kernels, 0, 0.5);
    model_val_cdf(kernels, 10, 1);
}