コード例 #1
0
ファイル: update_via_rng.c プロジェクト: ClinImmune/Apophenia
void betabinom(){
    apop_model *beta = apop_model_set_parameters(apop_beta, 10, 5);

    apop_model *drawfrom = apop_model_copy(apop_multinomial);
    drawfrom->parameters = apop_data_falloc((2), 30, .4);
    drawfrom->dsize = 2;
    int draw_ct = 80;
    apop_data *draws = apop_model_draws(drawfrom, draw_ct);

    apop_model *betaup = apop_update(draws, beta, apop_binomial);
    apop_model_show(betaup);

    beta->more = apop_beta;
    beta->log_likelihood = fake_ll;
    apop_model *bi = apop_model_fix_params(apop_model_set_parameters(apop_binomial, 30, NAN));
    apop_model *upd = apop_update(draws, beta, bi);
    apop_model *betaed = apop_estimate(upd->data, apop_beta);
    deciles(betaed, betaup, 1);

    beta->log_likelihood = NULL;
    apop_model *upd_r = apop_update(draws, beta, bi);
    betaed = apop_estimate(apop_data_pmf_expand(upd_r->data, 2000), apop_beta);
    deciles(betaed, betaup, 1);

    apop_data *d2 = apop_model_draws(upd, draw_ct*2);
    apop_model *d2m = apop_estimate(d2, apop_beta);
    deciles(d2m, betaup, 1);
}
コード例 #2
0
ファイル: update_via_rng.c プロジェクト: ClinImmune/Apophenia
void gammaexpo(){
    printf("gamma/exponential\n");
    apop_model *gamma = apop_model_set_parameters(apop_gamma, 1, 0.4);

    apop_model *drawfrom = apop_model_set_parameters(apop_exponential, 0.4);
    int draw_ct = 120;
    apop_data *draws = apop_model_draws(drawfrom, draw_ct);

    apop_model *gammaup = apop_update(draws, gamma, apop_exponential);
    apop_model_show(gammaup);

    gamma->more = apop_gamma;
    gamma->log_likelihood = fake_ll;
    Apop_settings_add_group(gamma, apop_mcmc, .burnin=.1, .periods=1e5,
            .proposal=apop_model_set_parameters(apop_normal, 1, .001));
    apop_model *upd = apop_update(draws, gamma, apop_exponential);
    apop_model *gammaed = apop_estimate(upd->data, apop_gamma);
    apop_model_show(gammaed);
    deciles(gammaed, gammaup, 3);

    Apop_settings_add_group(gamma, apop_mcmc, .burnin=.1, .periods=1e5,
            .proposal=apop_model_set_parameters(apop_normal, 1, .01));
    gamma->log_likelihood = NULL;
    apop_model *upd_r = apop_update(draws, gamma, apop_exponential);
    apop_model *gammafied2 = apop_estimate(apop_data_pmf_expand(upd_r->data, 2000), apop_gamma);
    deciles(gammafied2, gammaup, 5);
}
コード例 #3
0
ファイル: test_updating.c プロジェクト: RayRacine/Apophenia
int main(){
    gsl_rng *r = apop_rng_alloc(2468);
    double binom_start = 0.6;
    double beta_start_a = 0.3;
    double beta_start_b = 0.5;
    int i, draws = 1500;
    double n = 4000;
    //First, the easy estimation using the conjugate distribution table.
    apop_model *bin = apop_model_set_parameters(apop_binomial, n, binom_start);
    apop_model *beta = apop_model_set_parameters(apop_beta, beta_start_a, beta_start_b);
    apop_model *updated = apop_update(.prior= beta, .likelihood=bin,.rng=r);

    //Now estimate via Gibbs sampling. 
    //Requires a one-parameter binomial, with n fixed,
    //and a data set of n data points with the right p.
    apop_model *bcopy = apop_model_set_parameters(apop_binomial, n, GSL_NAN);
    apop_data *bin_draws = apop_data_fill(apop_data_alloc(1,2), n*(1-binom_start), n*binom_start);
    bin = apop_model_fix_params(bcopy);
    apop_model_add_group(beta, apop_update, .burnin=.1, .periods=1e4);
    apop_model *out_h = apop_update(bin_draws, beta, bin, NULL);

    //We now have a histogram of values for p. What's the closest beta
    //distribution?
    apop_data *d = apop_data_alloc(0, draws, 1);
    for(i=0; i < draws; i ++)
        apop_draw(apop_data_ptr(d, i, 0), r, out_h);
    apop_model *out_beta = apop_estimate(d, apop_beta);
    //Finally, we can compare the conjugate and Gibbs results:
    apop_vector_normalize(updated->parameters->vector);
    apop_vector_normalize(out_beta->parameters->vector);
    double error = apop_vector_distance(updated->parameters->vector, out_beta->parameters->vector, .metric='m');
    double updated_size = apop_vector_sum(updated->parameters->vector);
    Apop_assert(error/updated_size < 0.01, "The error is %g, which is too big.", error/updated_size);
}
コード例 #4
0
ファイル: update_via_rng.c プロジェクト: ClinImmune/Apophenia
void gammafish(){
    printf("gamma/poisson\n");
    apop_model *gamma = apop_model_set_parameters(apop_gamma, 1.5, 2.2);

    apop_model *drawfrom = apop_model_set_parameters(apop_poisson, 3.1);
    int draw_ct = 90;
    apop_data *draws = apop_model_draws(drawfrom, draw_ct);

    apop_model *gammaup = apop_update(draws, gamma, apop_poisson);
    apop_model_show(gammaup);

    gamma->more = apop_gamma;
    gamma->log_likelihood = fake_ll;
    apop_model *proposal = apop_model_fix_params(apop_model_set_parameters(apop_normal, NAN, 1));
    proposal->parameters = apop_data_falloc((1), .9);
    //apop_data_set(apop_settings_get(gamma, apop_mcmc, proposal)->parameters, .val=.9);
    Apop_settings_add_group(gamma, apop_mcmc, .burnin=.1, .periods=1e4, .proposal=proposal);
    apop_model *upd = apop_update(draws, gamma, apop_poisson);
    apop_model *gammafied = apop_estimate(upd->data, apop_gamma);
    deciles(gammafied, gammaup, 5);
    //Apop_settings_add_group(beta, apop_mcmc, .burnin=.4, .periods=1e4);
    gamma->log_likelihood = NULL;
    apop_model *upd_r = apop_update(draws, gamma, apop_poisson);
    apop_model *gammafied2 = apop_estimate(apop_data_pmf_expand(upd_r->data, 2000), apop_gamma);
    deciles(gammafied2, gammaup, 5);
    deciles(gammafied, gammafied2, 5);
}
コード例 #5
0
ファイル: jacobian.c プロジェクト: b-k/apophenia
//Use this function to produce test data below.
apop_data *draw_exponentiated_normal(double mu, double sigma, double draws){
    apop_model *n01 = apop_model_set_parameters(apop_normal, mu, sigma);
    apop_data *d = apop_data_alloc(draws);
    gsl_rng *r = apop_rng_alloc(13);
    for (int i=0; i< draws; i++) apop_draw(gsl_vector_ptr(d->vector,i), r, n01);
    apop_vector_exp(d->vector);
    return d;
}
コード例 #6
0
ファイル: apop_asst.c プロジェクト: RayRacine/Apophenia
/** The Beta distribution is useful for modeling because it is bounded between zero and one, and can be either unimodal (if the variance is low) or bimodal (if the variance is high), and can have either a slant toward the bottom or top of the range (depending on the mean).

The distribution has two parameters, typically named \f$\alpha\f$ and \f$\beta\f$, which can be difficult to interpret. However, there is a one-to-one mapping between (alpha, beta) pairs and (mean, variance) pairs. Since we have good intuition about the meaning of means and variances, this function takes in a mean and variance, calculates alpha and beta behind the scenes, and returns a random draw from the appropriate Beta distribution.

\param m
The mean the Beta distribution should have. Notice that m
is in [0,1].

\param v
The variance which the Beta distribution should have. It is in (0, 1/12), where (1/12) is the variance of a Uniform(0,1) distribution. Funny things happen with variance near 1/12 and mean far from 1/2.

\return
Returns an \c apop_beta model with its parameters appropriately set.

*/
apop_model *apop_beta_from_mean_var(double m, double v){
    Apop_assert(m<1&&m > 0, "You asked for a beta distribution "
                        "with mean %g, but the mean of the beta will always "
                        "be strictly between zero and one.", m);
    double k     = (m * (1- m)/ v) -1;
    double alpha = m*k;
    double beta  = k * (1-m);
    return apop_model_set_parameters(apop_beta, alpha, beta);
}
コード例 #7
0
ファイル: iv.c プロジェクト: RayRacine/Apophenia
//generate a vector that is the original vector + noise
void add_noise(gsl_vector *in, gsl_rng *r, double size){
    apop_model *nnoise = apop_model_set_parameters(apop_normal, 0, size);
    for (int i=0; i< in->size; i++){
        double noise;
        apop_draw(&noise, r, nnoise);
        apop_vector_increment(in, i, noise);
    }
    apop_model_free(nnoise);
}
コード例 #8
0
ファイル: parameterization.c プロジェクト: juroland/apophenia
int main(){
    apop_model *uniform_20 = apop_model_set_parameters(apop_uniform, 0, 20);
    apop_data *d = apop_model_draws(uniform_20, 10);

    //Estimate a Normal distribution from the data:
    apop_model *N = apop_estimate(d, apop_normal);
    print_draws(N);

    //estimate a one-dimensional multivariate Normal from the data:
    apop_model *mvN = apop_estimate(d, apop_multivariate_normal);
    print_draws(mvN);


    //fixed parameter list:
    apop_model *std_normal = apop_model_set_parameters(apop_normal, 0, 1);
    print_draws(std_normal);

    //variable-size parameter list:
    apop_model *std_multinormal = apop_model_copy(apop_multivariate_normal);
    std_multinormal->msize1 =
    std_multinormal->msize2 =
    std_multinormal->vsize =
    std_multinormal->dsize = 3;
    std_multinormal->parameters = apop_data_falloc((3, 3, 3),
                                1,  1, 0, 0, 
                                1,  0, 1, 0,
                                1,  0, 0, 1);
    print_draws(std_multinormal);


    //estimate a KDE using the defaults:
    apop_model *k = apop_estimate(d, apop_kernel_density);
    print_draws(k);

    /*the documentation tells us that a KDE estimation consists of filling 
      an apop_kernel_density_settings group, so we can set it to use a 
      Normal(μ, 2) kernel via: */

    apop_model *k2 = apop_model_copy_set(apop_kernel_density, apop_kernel_density, 
                         .base_data=d,
                         .kernel = apop_model_set_parameters(apop_normal, 0, 2));
    print_draws(k2);
}
コード例 #9
0
ファイル: 156-find.c プロジェクト: b-k/modeling_examples
apop_model *fuzz(apop_model sim){
    int draws = 100;
    gsl_rng *r = apop_rng_alloc(1);
    apop_model *prior = apop_model_cross(
                            apop_model_set_parameters(apop_normal, 10, 2),
                            apop_model_set_parameters(apop_normal, 10, 2));
    apop_data *outdata = apop_data_alloc(draws, weibull->vsize);
    double *params = sim.parameters->vector->data;
    for (int i=0; i< draws; i++){
        do {
            apop_draw(params, r, prior);
        } while (params[1]*2 > pow(params[0], 2));
        sim.dsize=params[1];
        apop_model *est = apop_estimate(apop_model_draws(&sim, 1000), weibull);
        Apop_row_v(outdata, i, onerow);
        gsl_vector_memcpy(onerow, est->parameters->vector);
        apop_model_free(est);
    }
    return apop_estimate(outdata, apop_pmf);
}
コード例 #10
0
int main(){
    gsl_rng *r = apop_rng_alloc(2312311);
    int empirical_size = 5e3;
    apop_model *expo = apop_model_set_parameters(apop_exponential, 1.7);
    assert (apop_kl_divergence(expo, expo) < 1e-4);
    apop_data *empirical = apop_data_alloc(empirical_size, 1);
    for (int i=0; i<empirical_size; i++)
        apop_draw(apop_data_ptr(empirical, i, 0), r, expo);
    apop_model *pmf = apop_estimate(empirical, apop_pmf);
    assert(apop_kl_divergence(pmf,expo) < 1e-4);
    apop_data_free(empirical);
}
コード例 #11
0
ファイル: 150-update.c プロジェクト: b-k/modeling_examples
int main(){
    apop_model_print (
        apop_estimate(
             apop_update(
                apop_model_draws(
                    apop_model_mixture(
                        apop_model_set_parameters(apop_poisson, 2.8),
                        apop_model_set_parameters(apop_poisson, 2.0),
                        apop_model_set_parameters(apop_poisson, 1.3)
                    ), 
                    1e4
                ),
                truncate_model(
                    apop_model_set_parameters(apop_normal, 2, 1), 
                    0
                ), 
                apop_poisson
            )->data,
            apop_normal
        )
    , NULL);
}
コード例 #12
0
ファイル: stack_models.c プロジェクト: RayRacine/Apophenia
int main(){
    //bind together a Poisson and a Normal;
    //make a draw producing a 2-element vector
    apop_model *m1 = apop_model_set_parameters(apop_poisson, 3);
    apop_model *m2 = apop_model_set_parameters(apop_normal, -5, 1);
    apop_model *mm = apop_model_stack(m1, m2);
    int len = 1e5;
    gsl_rng *r = apop_rng_alloc(1);
    apop_data *draws = apop_data_alloc(len, 2);
    for (int i=0; i< len; i++){
        Apop_row (draws, i, onev);
        apop_draw(onev->data, r, mm);
        assert((int)onev->data[0] == onev->data[0]);
        assert(onev->data[1]<0);
    }

    //The rest of the test script recovers the parameters.
    //First, set up a two-page data set: poisson data on p1, Normal on p2:
    apop_data *comeback = apop_data_alloc();
    Apop_col(draws, 0,fishdraws)
    comeback->vector = apop_vector_copy(fishdraws);
    apop_data_add_page(comeback, apop_data_alloc(), "p2");
    Apop_col(draws, 1, meandraws)
    comeback->more->vector = apop_vector_copy(meandraws);

    //set up the un-parameterized stacked model, including
    //the name at which to split the data set
    apop_model *estme = apop_model_stack(apop_model_copy(apop_poisson), apop_model_copy(apop_normal));
    Apop_settings_add(estme, apop_stack, splitpage, "p2");
    apop_model *ested = apop_estimate(comeback, *estme);

    //test that the parameters are as promised.
    apop_model *m1back = apop_settings_get(ested, apop_stack, model1);
    apop_model *m2back = apop_settings_get(ested, apop_stack, model2);
    assert(fabs(apop_data_get(m1back->parameters, .col=-1) - 3) < 1e-2);
    assert(fabs(apop_data_get(m2back->parameters, .col=-1) - -5) < 1e-2);
    assert(fabs(apop_data_get(m2back->parameters, .col=-1, .row=1) - 1) < 1e-2);
}
コード例 #13
0
ファイル: smoothing.c プロジェクト: choct155/work_scratch
int main(){
    apop_db_open("data-climate.db");

    apop_data *data = apop_query_to_data("select pcp from precip");
    apop_data_pmf_compress(data); //creates a weights vector
    apop_vector_normalize(data->weights);
    apop_data_sort(data);
    apop_model *pmf = apop_estimate(data, apop_pmf);
    FILE *outfile = fopen("out.h", "w");
    apop_model_print(pmf, outfile);
    apop_model *kernel = apop_model_set_parameters(apop_normal, 0., 0.1);
    apop_model *k = apop_model_copy(apop_kernel_density);
    Apop_settings_add_group(k, apop_kernel_density, .base_pmf=pmf, .kernel=kernel);
    plot(k, "out.k");
    printf("plot 'out.h' with lines title 'data', 'out.k' with lines title 'smoothed'\n");
}
コード例 #14
0
ファイル: apop_histogram.c プロジェクト: biosmooth/Apophenia
\adoc Examples
This example sets up and uses KDEs based on a Normal and a Uniform distribution.

\include kernel.c
*/

static void apop_set_first_param(apop_data *in, apop_model *m){
    m->parameters->vector->data[0]  = in->vector ? in->vector->data[0] 
                                                 : gsl_matrix_get(in->matrix, 0, 0);
}

Apop_settings_init(apop_kernel_density, 
    //If there's a PMF associated with the model, run with it.
    //else, generate one from the data.
    Apop_varad_set(base_pmf, apop_estimate(in.base_data, apop_pmf));
    Apop_varad_set(kernel, apop_model_set_parameters(apop_normal, 0, 1));
    Apop_varad_set(set_fn, apop_set_first_param);
    out->own_pmf = !in.base_pmf;
    out->own_kernel = !in.kernel;
    if (!out->kernel->parameters) apop_prep(out->base_data, out->kernel);
)

Apop_settings_copy(apop_kernel_density,
    out->own_pmf    =
    out->own_kernel = 0;
)

Apop_settings_free(apop_kernel_density,
    if (in->own_pmf)    apop_model_free(in->base_pmf);
    if (in->own_kernel) apop_model_free(in->kernel);
)
コード例 #15
0
ファイル: jacobian.c プロジェクト: b-k/apophenia
apop_data *rev(apop_data *in){ return apop_map(in, .fn_d=log, .part='a'); }

/*The derivative of the transformed-to-base function. */
double inv(double in){return 1./in;} 
double rev_j(apop_data *in){ return fabs(apop_map_sum(in, .fn_d=inv, .part='a')); }

int main(){
    apop_model *ct = apop_model_coordinate_transform(
                        .transformed_to_base= rev, .jacobian_to_base=rev_j,
                        .base_model=apop_normal);
    //Apop_model_add_group(ct, apop_parts_wanted);//Speed up the MLE.

    //make fake data
    double mu=2, sigma=1;
    apop_data *d = draw_exponentiated_normal(mu, sigma, 2e5);

    //If we correctly replicated a Lognormal, mu and sigma will be right:
    apop_model *est = apop_estimate(d, ct);
    apop_model_free(ct);
    Diff(apop_data_get(est->parameters, 0), mu);
    Diff(apop_data_get(est->parameters, 1), sigma);

    /*The K-L divergence between our Lognormal and the stock Lognormal
      should be small. Try it with both the original params and the estimated ones. */
    apop_model *ln = apop_model_set_parameters(apop_lognormal, mu, sigma);
    apop_model *ln2 = apop_model_copy(apop_lognormal);
    ln2->parameters = est->parameters;
    Diff(apop_kl_divergence(ln, ln2,.draw_ct=1000), 0);
    Diff(apop_kl_divergence(ln, est,.draw_ct=1000), 0);
}
コード例 #16
0
ファイル: kernel.c プロジェクト: ClinImmune/Apophenia
apop_data *draw_some_data(){
    apop_model *uniform_0_20 = apop_model_set_parameters(apop_uniform, 0, 20);
    apop_data *d = apop_model_draws(uniform_0_20, 10);
    apop_data_print(apop_data_sort(d), .output_pipe=stderr);
    return d;
}
コード例 #17
0
ファイル: some_cdfs.c プロジェクト: b-k/apophenia
int main(){
    //Set up an apop_data set with only one number.
    //Most of these functions will only look at the first data point encountered.
    apop_data *onept = apop_data_falloc((1), 23);

    apop_model *norm = apop_model_set_parameters(apop_normal, 23, 138.8);
    double val = apop_cdf(onept, norm);
    assert(fabs(val - 0.5) < 1e-4);

    double tolerance = 1e-8;
    //Macroizing the sample routine above:
    #define model_val_cdf(model, value, cdf_result) {   \
        apop_data_set(onept, .val=(value));             \
        assert(fabs((apop_cdf(onept, model))-(cdf_result))< tolerance);   \
    }

    apop_model *uni = apop_model_set_parameters(apop_uniform, 20, 26);
    model_val_cdf(uni, 0, 0);
    model_val_cdf(uni, 20, 0);
    model_val_cdf(uni, 21, 1./6);
    model_val_cdf(uni, 23, 0.5);
    model_val_cdf(uni, 25, 5./6);
    model_val_cdf(uni, 26, 1);
    model_val_cdf(uni, 260, 1);

    //Improper uniform always returns 1/2.
    model_val_cdf(apop_improper_uniform, 0, 0.5);
    model_val_cdf(apop_improper_uniform, 228, 0.5);
    model_val_cdf(apop_improper_uniform, INFINITY, 0.5);

    apop_model *binom = apop_model_set_parameters(apop_binomial, 2001, 0.5);
    model_val_cdf(binom, 0, 0);
    model_val_cdf(binom, 1000, .5);
    model_val_cdf(binom, 2000, 1);

    apop_model *bernie = apop_model_set_parameters(apop_bernoulli, 0.75);
    //p(0)=.25; p(1)=.75; that determines the CDF.
    //Notice that the CDF's integral is over a closed interval.
    model_val_cdf(bernie, -1, 0);
    model_val_cdf(bernie, 0, 0.25);
    model_val_cdf(bernie, 0.1, 0.25);
    model_val_cdf(bernie, .99, 0.25);
    model_val_cdf(bernie, 1, 1);
    model_val_cdf(bernie, INFINITY, 1);

    //alpha=beta -> symmetry
    apop_model *beta = apop_model_set_parameters(apop_beta, 2, 2);
    model_val_cdf(beta, -INFINITY, 0);
    model_val_cdf(beta, 0.5, 0.5);
    model_val_cdf(beta, INFINITY, 1);

    //This beta distribution -> uniform
    apop_model *beta_uni = apop_model_set_parameters(apop_beta, 1, 1);
    model_val_cdf(beta_uni, 0, 0);
    model_val_cdf(beta_uni, 1./6, 1./6);
    model_val_cdf(beta_uni, 0.5, 0.5);
    model_val_cdf(beta_uni, 1, 1);


    beta_uni->cdf = NULL; //With no closed-form CDF; make random draws to estimate the CDF.
    Apop_model_add_group(beta_uni, apop_cdf, .draws=1e6); //extra draws to improve accuracy, but we have to lower our tolerance anyway.
    tolerance=1e-3;
    model_val_cdf(beta_uni, 0, 0);
    model_val_cdf(beta_uni, 1./6, 1./6);
    model_val_cdf(beta_uni, 0.5, 0.5);
    model_val_cdf(beta_uni, 1, 1);


    //sum of three symmetric distributions: still symmetric.
    apop_model *sum_of_three = apop_model_mixture(beta, apop_improper_uniform, beta_uni);
    model_val_cdf(sum_of_three, 0.5, 0.5);


    apop_data *threepts = apop_data_falloc((3,1), -1, 0, 1);
    apop_model *kernels = apop_estimate(threepts, apop_kernel_density);
    model_val_cdf(kernels, -5, 0);
    model_val_cdf(kernels, 0, 0.5);
    model_val_cdf(kernels, 10, 1);
}