Пример #1
0
int main(){
    gsl_rng *r = apop_rng_alloc(10);
    size_t i, ct = 5e4;

    //set up the model & params
    apop_data *d  = apop_data_alloc(ct,2);
    apop_data *params = apop_data_alloc(2,2,2);
    apop_data_fill(params, 8,  1, 0.5,
                           2,  0.5, 1);
    apop_model *pvm = apop_model_copy(apop_multivariate_normal);
    pvm->parameters = apop_data_copy(params);

    //make random draws from the multivar. normal
    //this `pull a row view, fill its data element' form works for rows but not cols.
    for(i=0; i< ct; i++){
        Apop_row(d, i, onerow);
        apop_draw(onerow->data, r, pvm);
    }

    //set up and estimate a model with fixed covariance matrix but free means
    gsl_vector_set_all(pvm->parameters->vector, GSL_NAN);
    apop_model *mep1   = apop_model_fix_params(pvm);
    apop_model *e1  = apop_estimate(d, *mep1);
    
    //compare results
    printf("original params: ");
    apop_vector_show(params->vector);
    printf("estimated params: ");
    apop_vector_show(e1->parameters->vector);
}
Пример #2
0
int main(){
    gsl_rng *r = apop_rng_alloc(2468);
    double binom_start = 0.6;
    double beta_start_a = 0.3;
    double beta_start_b = 0.5;
    int i, draws = 1500;
    double n = 4000;
    //First, the easy estimation using the conjugate distribution table.
    apop_model *bin = apop_model_set_parameters(apop_binomial, n, binom_start);
    apop_model *beta = apop_model_set_parameters(apop_beta, beta_start_a, beta_start_b);
    apop_model *updated = apop_update(.prior= beta, .likelihood=bin,.rng=r);

    //Now estimate via Gibbs sampling. 
    //Requires a one-parameter binomial, with n fixed,
    //and a data set of n data points with the right p.
    apop_model *bcopy = apop_model_set_parameters(apop_binomial, n, GSL_NAN);
    apop_data *bin_draws = apop_data_fill(apop_data_alloc(1,2), n*(1-binom_start), n*binom_start);
    bin = apop_model_fix_params(bcopy);
    apop_model_add_group(beta, apop_update, .burnin=.1, .periods=1e4);
    apop_model *out_h = apop_update(bin_draws, beta, bin, NULL);

    //We now have a histogram of values for p. What's the closest beta
    //distribution?
    apop_data *d = apop_data_alloc(0, draws, 1);
    for(i=0; i < draws; i ++)
        apop_draw(apop_data_ptr(d, i, 0), r, out_h);
    apop_model *out_beta = apop_estimate(d, apop_beta);
    //Finally, we can compare the conjugate and Gibbs results:
    apop_vector_normalize(updated->parameters->vector);
    apop_vector_normalize(out_beta->parameters->vector);
    double error = apop_vector_distance(updated->parameters->vector, out_beta->parameters->vector, .metric='m');
    double updated_size = apop_vector_sum(updated->parameters->vector);
    Apop_assert(error/updated_size < 0.01, "The error is %g, which is too big.", error/updated_size);
}
Пример #3
0
//Use this function to produce test data below.
apop_data *draw_exponentiated_normal(double mu, double sigma, double draws){
    apop_model *n01 = apop_model_set_parameters(apop_normal, mu, sigma);
    apop_data *d = apop_data_alloc(draws);
    gsl_rng *r = apop_rng_alloc(13);
    for (int i=0; i< draws; i++) apop_draw(gsl_vector_ptr(d->vector,i), r, n01);
    apop_vector_exp(d->vector);
    return d;
}
Пример #4
0
//generate a vector that is the original vector + noise
void add_noise(gsl_vector *in, gsl_rng *r, double size){
    apop_model *nnoise = apop_model_set_parameters(apop_normal, 0, size);
    for (int i=0; i< in->size; i++){
        double noise;
        apop_draw(&noise, r, nnoise);
        apop_vector_increment(in, i, noise);
    }
    apop_model_free(nnoise);
}
Пример #5
0
int main(){
    gsl_rng *r = apop_rng_alloc(2312311);
    int empirical_size = 5e3;
    apop_model *expo = apop_model_set_parameters(apop_exponential, 1.7);
    assert (apop_kl_divergence(expo, expo) < 1e-4);
    apop_data *empirical = apop_data_alloc(empirical_size, 1);
    for (int i=0; i<empirical_size; i++)
        apop_draw(apop_data_ptr(empirical, i, 0), r, expo);
    apop_model *pmf = apop_estimate(empirical, apop_pmf);
    assert(apop_kl_divergence(pmf,expo) < 1e-4);
    apop_data_free(empirical);
}
Пример #6
0
apop_model *fuzz(apop_model sim){
    int draws = 100;
    gsl_rng *r = apop_rng_alloc(1);
    apop_model *prior = apop_model_cross(
                            apop_model_set_parameters(apop_normal, 10, 2),
                            apop_model_set_parameters(apop_normal, 10, 2));
    apop_data *outdata = apop_data_alloc(draws, weibull->vsize);
    double *params = sim.parameters->vector->data;
    for (int i=0; i< draws; i++){
        do {
            apop_draw(params, r, prior);
        } while (params[1]*2 > pow(params[0], 2));
        sim.dsize=params[1];
        apop_model *est = apop_estimate(apop_model_draws(&sim, 1000), weibull);
        Apop_row_v(outdata, i, onerow);
        gsl_vector_memcpy(onerow, est->parameters->vector);
        apop_model_free(est);
    }
    return apop_estimate(outdata, apop_pmf);
}
Пример #7
0
int main(){
    //bind together a Poisson and a Normal;
    //make a draw producing a 2-element vector
    apop_model *m1 = apop_model_set_parameters(apop_poisson, 3);
    apop_model *m2 = apop_model_set_parameters(apop_normal, -5, 1);
    apop_model *mm = apop_model_stack(m1, m2);
    int len = 1e5;
    gsl_rng *r = apop_rng_alloc(1);
    apop_data *draws = apop_data_alloc(len, 2);
    for (int i=0; i< len; i++){
        Apop_row (draws, i, onev);
        apop_draw(onev->data, r, mm);
        assert((int)onev->data[0] == onev->data[0]);
        assert(onev->data[1]<0);
    }

    //The rest of the test script recovers the parameters.
    //First, set up a two-page data set: poisson data on p1, Normal on p2:
    apop_data *comeback = apop_data_alloc();
    Apop_col(draws, 0,fishdraws)
    comeback->vector = apop_vector_copy(fishdraws);
    apop_data_add_page(comeback, apop_data_alloc(), "p2");
    Apop_col(draws, 1, meandraws)
    comeback->more->vector = apop_vector_copy(meandraws);

    //set up the un-parameterized stacked model, including
    //the name at which to split the data set
    apop_model *estme = apop_model_stack(apop_model_copy(apop_poisson), apop_model_copy(apop_normal));
    Apop_settings_add(estme, apop_stack, splitpage, "p2");
    apop_model *ested = apop_estimate(comeback, *estme);

    //test that the parameters are as promised.
    apop_model *m1back = apop_settings_get(ested, apop_stack, model1);
    apop_model *m2back = apop_settings_get(ested, apop_stack, model2);
    assert(fabs(apop_data_get(m1back->parameters, .col=-1) - 3) < 1e-2);
    assert(fabs(apop_data_get(m2back->parameters, .col=-1) - -5) < 1e-2);
    assert(fabs(apop_data_get(m2back->parameters, .col=-1, .row=1) - 1) < 1e-2);
}
Пример #8
0
/** Make random draws from an \ref apop_model, and bin them using a binspec in the style
 of \ref apop_data_to_bins. If you have a data set that used the same binspec, you now have synced histograms, which you can plot or sensibly test hypotheses about.

The output is normalized to integrate to one.

\param binspec A description of the bins in which to place the draws; see \ref apop_data_to_bins. (default: as in \ref apop_data_to_bins.)
\param model The model to be drawn from. Because this function works via random draws, the model needs to have a 
\c draw method. (No default)
\param draws The number of random draws to make. (arbitrary default = 10,000)
\param bin_count If no bin spec, the number of bins to use (default: as per \ref apop_data_to_bins, \f$\sqrt(N)\f$)
\param rng The \c gsl_rng used to make random draws. (default: see note on \ref autorng)

\return An \ref apop_pmf model.

\li This function uses the \ref designated syntax for inputs.

\ingroup histograms
*/
APOP_VAR_HEAD apop_model *apop_model_to_pmf(apop_model *model, apop_data *binspec, long int draws, int bin_count, gsl_rng *rng){
    apop_model* apop_varad_var(model, NULL);
    Apop_assert(model && model->draw, "The second argument needs to be an apop_model with a 'draw' function "
                              "that I can use to make random draws.");
    apop_data* apop_varad_var(binspec, NULL);
    int apop_varad_var(bin_count, 0);
    long int apop_varad_var(draws, 1e4);
    gsl_rng *apop_varad_var(rng, NULL)
    static gsl_rng *spare = NULL;
    if (!rng && !spare) 
        spare = apop_rng_alloc(++apop_opts.rng_seed);
    if (!rng) rng = spare;
APOP_VAR_ENDHEAD
    Get_vmsizes(binspec);
    apop_data *outd = apop_data_alloc(draws, model->dsize); 
    for (long int i=0; i< draws; i++){
        Apop_row(outd, i, ach);
        apop_draw(ach->data, rng, model);
    }
    apop_data *outbinned = apop_data_to_bins(outd, binspec, .bin_count=bin_count);
    apop_data_free(outd);
    apop_vector_normalize(outbinned->weights);
    return apop_estimate(outbinned, apop_pmf);
} 
Пример #9
0
static long double wishart_ll(apop_data *in, apop_model *m){
    Nullcheck_mpd(in, m, GSL_NAN);
    wishartstruct_t ws = {
            .paraminv = apop_matrix_inverse(m->parameters->matrix),
            .len = sqrt(in->matrix->size2),
            .df = m->parameters->vector->data[0]
        };
    double paramdet = apop_matrix_determinant(m->parameters->matrix);
    if (paramdet < 1e-3) return GSL_NEGINF;
    double ll =  apop_map_sum(in, .fn_vp = one_wishart_row, .param=&ws, .part='r');
    double k = log(ws.df)*ws.df/2.;
    k -= M_LN2 * ws.len* ws.df/2.;
    k -= log(paramdet) * ws.df/2.;
    k -= apop_multivariate_lngamma(ws.df/2., ws.len);
    return ll + k*in->matrix->size1;
}

static int apop_wishart_draw(double *out, gsl_rng *r, apop_model *m){
    /*
Translated from the Fortran by BK. Fortran comments:

C          SUBROUTINE DWSHRT(D, N, NP, NNP, SB, SA)
C
C       ALGORITHM AS 53  APPL. STATIST. (1972) VOL.21, NO.3
C
C     Wishart variate generator.  On output, SA is an upper-triangular
C     matrix of size NP * NP [...]
C     whose elements have a Wishart(N, SIGMA) distribution.
*/
    Nullcheck_mp(m, );
    int np = m->parameters->matrix->size1;
    int n = m->parameters->vector->data[0];
    if (!m->more) { 
        gsl_matrix *ccc = apop_matrix_copy(m->parameters->matrix);
        gsl_linalg_cholesky_decomp(ccc);
        for (int i=0; i < ccc->size1; i++) //zero out the upper diagonal
            for (int j=i+1; j < ccc->size2; j++)
                gsl_matrix_set(ccc, i, j, 0);
        m->more = apop_matrix_to_data(ccc);
        m->more_size = sizeof(apop_data);
    }
    apop_data *Chol = m->more;
    apop_data *rmatrix = apop_data_calloc(np, np);
    Staticdef(apop_model *, std_normal, apop_model_set_parameters(apop_normal, 0, 1));

//C     Load diagonal elements with square root of chi-square variates
    for(int i = 0; i< np; i++){
        int DF = n - i;
        apop_data_set(rmatrix, i, i, sqrt(gsl_ran_chisq(r, DF)));
    }
    
    for(int i = 1; i< np; i++) //off-diagonal triangles: Normals.
          for(int j = 0; j< i; j++){
            double ndraw;
            apop_draw(&ndraw, r, std_normal);
            assert (!gsl_isnan(ndraw));
            apop_data_set(rmatrix, i, j, ndraw);
          }
    //Now find C * rand * rand' * C'
    apop_data *cr = apop_dot(Chol, rmatrix);
    apop_data *crr = apop_dot(cr, rmatrix, .form2='t');
    apop_data *crrc = apop_dot(crr, Chol, .form2='t');
    memmove(out, crrc->matrix->data, sizeof(double)*np*np);
    apop_data_free(rmatrix); apop_data_free(cr);
    apop_data_free(crrc);    apop_data_free(crr);
    return 0;
}