Пример #1
0
void betabinom(){
    apop_model *beta = apop_model_set_parameters(apop_beta, 10, 5);

    apop_model *drawfrom = apop_model_copy(apop_multinomial);
    drawfrom->parameters = apop_data_falloc((2), 30, .4);
    drawfrom->dsize = 2;
    int draw_ct = 80;
    apop_data *draws = apop_model_draws(drawfrom, draw_ct);

    apop_model *betaup = apop_update(draws, beta, apop_binomial);
    apop_model_show(betaup);

    beta->more = apop_beta;
    beta->log_likelihood = fake_ll;
    apop_model *bi = apop_model_fix_params(apop_model_set_parameters(apop_binomial, 30, NAN));
    apop_model *upd = apop_update(draws, beta, bi);
    apop_model *betaed = apop_estimate(upd->data, apop_beta);
    deciles(betaed, betaup, 1);

    beta->log_likelihood = NULL;
    apop_model *upd_r = apop_update(draws, beta, bi);
    betaed = apop_estimate(apop_data_pmf_expand(upd_r->data, 2000), apop_beta);
    deciles(betaed, betaup, 1);

    apop_data *d2 = apop_model_draws(upd, draw_ct*2);
    apop_model *d2m = apop_estimate(d2, apop_beta);
    deciles(d2m, betaup, 1);
}
Пример #2
0
int main(){
    gsl_rng *r = apop_rng_alloc(10);
    size_t i, ct = 5e4;

    //set up the model & params
    apop_data *d  = apop_data_alloc(ct,2);
    apop_data *params = apop_data_alloc(2,2,2);
    apop_data_fill(params, 8,  1, 0.5,
                           2,  0.5, 1);
    apop_model *pvm = apop_model_copy(apop_multivariate_normal);
    pvm->parameters = apop_data_copy(params);

    //make random draws from the multivar. normal
    //this `pull a row view, fill its data element' form works for rows but not cols.
    for(i=0; i< ct; i++){
        Apop_row(d, i, onerow);
        apop_draw(onerow->data, r, pvm);
    }

    //set up and estimate a model with fixed covariance matrix but free means
    gsl_vector_set_all(pvm->parameters->vector, GSL_NAN);
    apop_model *mep1   = apop_model_fix_params(pvm);
    apop_model *e1  = apop_estimate(d, *mep1);
    
    //compare results
    printf("original params: ");
    apop_vector_show(params->vector);
    printf("estimated params: ");
    apop_vector_show(e1->parameters->vector);
}
Пример #3
0
apop_model* apop_chi_estimate(apop_data *d, apop_model *m){
    Apop_assert(d, "No data with which to count df. (the default estimation method)");
    Get_vmsizes(d); //vsize, msize1, msize2
    apop_model *out = apop_model_copy(*m);
    apop_data_add_named_elmt(out->parameters, "df", tsize - 1);
    apop_data_add_named_elmt(out->info, "log likelihood", out->log_likelihood(d, out));
    return out;
}
Пример #4
0
int main(){
    apop_data *d = draw_some_data();    
    apop_model *k = apop_estimate(d, apop_kernel_density);
    apop_model *k2 = apop_model_copy(apop_kernel_density);
    apop_model_add_group(k2, apop_kernel_density, .base_data=d,
                                    .set_fn = set_uniform_edges,
                                    .kernel = &apop_uniform);
    plot(k, k2);
}
Пример #5
0
 apop_data * apop_bootstrap_cov_base(apop_data * data, apop_model *model, gsl_rng *rng, int iterations, char keep_boots, char ignore_nans, apop_data **boot_store){
#endif
    Get_vmsizes(data); //vsize, msize1, msize2
    apop_model *e = apop_model_copy(model);
    apop_data *subset = apop_data_copy(data);
    apop_data *array_of_boots = NULL,
              *summary;
    //prevent and infinite regression of covariance calculation.
    Apop_model_add_group(e, apop_parts_wanted); //default wants for nothing.
    size_t i, nan_draws=0;
    apop_name *tmpnames = (data && data->names) ? data->names : NULL; //save on some copying below.
    if (data && data->names) data->names = NULL;

    int height = GSL_MAX(msize1, GSL_MAX(vsize, (data?(*data->textsize):0)));
	for (i=0; i<iterations && nan_draws < iterations; i++){
		for (size_t j=0; j< height; j++){       //create the data set
			size_t randrow	= gsl_rng_uniform_int(rng, height);
            apop_data_memcpy(Apop_r(subset, j), Apop_r(data, randrow));
		}
		//get the parameter estimates.
		apop_model *est = apop_estimate(subset, e);
        gsl_vector *estp = apop_data_pack(est->parameters);
        if (!gsl_isnan(apop_sum(estp))){
            if (i==0){
                array_of_boots	      = apop_data_alloc(iterations, estp->size);
                apop_name_stack(array_of_boots->names, est->parameters->names, 'c', 'v');
                apop_name_stack(array_of_boots->names, est->parameters->names, 'c', 'c');
                apop_name_stack(array_of_boots->names, est->parameters->names, 'c', 'r');
            }
            gsl_matrix_set_row(array_of_boots->matrix, i, estp);
        } else if (ignore_nans=='y'){
            i--; 
            nan_draws++;
        }
        apop_model_free(est);
        gsl_vector_free(estp);
	}
    if(data) data->names = tmpnames;
    apop_data_free(subset);
    apop_model_free(e);
    int set_error=0;
    Apop_stopif(i == 0 && nan_draws == iterations, apop_return_data_error(N),
                1, "I ran into %i NaNs and no not-NaN estimations, and so stopped. "
                       , iterations);
    Apop_stopif(nan_draws == iterations,  set_error++;
            apop_matrix_realloc(array_of_boots->matrix, i, array_of_boots->matrix->size2),
                1, "I ran into %i NaNs, and so stopped. Returning results based "
                       "on %zu bootstrap iterations.", iterations, i);
	summary	= apop_data_covariance(array_of_boots);
    if (boot_store) *boot_store = array_of_boots;
    else            apop_data_free(array_of_boots);
    if (set_error) summary->error = 'N';
	return summary;
}
Пример #6
0
int main(){
    //bind together a Poisson and a Normal;
    //make a draw producing a 2-element vector
    apop_model *m1 = apop_model_set_parameters(apop_poisson, 3);
    apop_model *m2 = apop_model_set_parameters(apop_normal, -5, 1);
    apop_model *mm = apop_model_stack(m1, m2);
    int len = 1e5;
    gsl_rng *r = apop_rng_alloc(1);
    apop_data *draws = apop_data_alloc(len, 2);
    for (int i=0; i< len; i++){
        Apop_row (draws, i, onev);
        apop_draw(onev->data, r, mm);
        assert((int)onev->data[0] == onev->data[0]);
        assert(onev->data[1]<0);
    }

    //The rest of the test script recovers the parameters.
    //First, set up a two-page data set: poisson data on p1, Normal on p2:
    apop_data *comeback = apop_data_alloc();
    Apop_col(draws, 0,fishdraws)
    comeback->vector = apop_vector_copy(fishdraws);
    apop_data_add_page(comeback, apop_data_alloc(), "p2");
    Apop_col(draws, 1, meandraws)
    comeback->more->vector = apop_vector_copy(meandraws);

    //set up the un-parameterized stacked model, including
    //the name at which to split the data set
    apop_model *estme = apop_model_stack(apop_model_copy(apop_poisson), apop_model_copy(apop_normal));
    Apop_settings_add(estme, apop_stack, splitpage, "p2");
    apop_model *ested = apop_estimate(comeback, *estme);

    //test that the parameters are as promised.
    apop_model *m1back = apop_settings_get(ested, apop_stack, model1);
    apop_model *m2back = apop_settings_get(ested, apop_stack, model2);
    assert(fabs(apop_data_get(m1back->parameters, .col=-1) - 3) < 1e-2);
    assert(fabs(apop_data_get(m2back->parameters, .col=-1) - -5) < 1e-2);
    assert(fabs(apop_data_get(m2back->parameters, .col=-1, .row=1) - 1) < 1e-2);
}
Пример #7
0
int main(){
    apop_db_open("data-climate.db");

    apop_data *data = apop_query_to_data("select pcp from precip");
    apop_data_pmf_compress(data); //creates a weights vector
    apop_vector_normalize(data->weights);
    apop_data_sort(data);
    apop_model *pmf = apop_estimate(data, apop_pmf);
    FILE *outfile = fopen("out.h", "w");
    apop_model_print(pmf, outfile);
    apop_model *kernel = apop_model_set_parameters(apop_normal, 0., 0.1);
    apop_model *k = apop_model_copy(apop_kernel_density);
    Apop_settings_add_group(k, apop_kernel_density, .base_pmf=pmf, .kernel=kernel);
    plot(k, "out.k");
    printf("plot 'out.h' with lines title 'data', 'out.k' with lines title 'smoothed'\n");
}
Пример #8
0
/** Return a new histogram that is the moving average of the input histogram.
 \param m A histogram, in \c apop_model form.
 \param bandwidth The number of elements to be smoothed.
 */
apop_model *apop_histogram_moving_average(apop_model *m, size_t bandwidth) {
    apop_assert_c(m && !strcmp(m->name, "Histogram"), NULL, 0, "The first argument needs to be an apop_histogram model.");
    apop_assert_s(bandwidth, "bandwidth must be an integer >=1.");
    apop_model *out = apop_model_copy(*m);
    gsl_histogram *h     = Apop_settings_get(m, apop_histogram, pdf);
    gsl_histogram *hout  = Apop_settings_get(out, apop_histogram, pdf);
    gsl_vector *bins     = apop_array_to_vector(h->bin, h->n);
    gsl_vector *smoothed = apop_vector_moving_average(bins, bandwidth);
    for (int i=0; i< h->n; i++)
        if (i < bandwidth/2 || i>= smoothed->size+bandwidth/2)
            hout->bin[i] = 0;
        else
            hout->bin[i] = gsl_vector_get(smoothed, i-bandwidth/2);
    gsl_vector_free(bins);
    gsl_vector_free(smoothed);
    return out;
}
Пример #9
0
int main(){
    apop_model *uniform_20 = apop_model_set_parameters(apop_uniform, 0, 20);
    apop_data *d = apop_model_draws(uniform_20, 10);

    //Estimate a Normal distribution from the data:
    apop_model *N = apop_estimate(d, apop_normal);
    print_draws(N);

    //estimate a one-dimensional multivariate Normal from the data:
    apop_model *mvN = apop_estimate(d, apop_multivariate_normal);
    print_draws(mvN);


    //fixed parameter list:
    apop_model *std_normal = apop_model_set_parameters(apop_normal, 0, 1);
    print_draws(std_normal);

    //variable-size parameter list:
    apop_model *std_multinormal = apop_model_copy(apop_multivariate_normal);
    std_multinormal->msize1 =
    std_multinormal->msize2 =
    std_multinormal->vsize =
    std_multinormal->dsize = 3;
    std_multinormal->parameters = apop_data_falloc((3, 3, 3),
                                1,  1, 0, 0, 
                                1,  0, 1, 0,
                                1,  0, 0, 1);
    print_draws(std_multinormal);


    //estimate a KDE using the defaults:
    apop_model *k = apop_estimate(d, apop_kernel_density);
    print_draws(k);

    /*the documentation tells us that a KDE estimation consists of filling 
      an apop_kernel_density_settings group, so we can set it to use a 
      Normal(μ, 2) kernel via: */

    apop_model *k2 = apop_model_copy_set(apop_kernel_density, apop_kernel_density, 
                         .base_data=d,
                         .kernel = apop_model_set_parameters(apop_normal, 0, 2));
    print_draws(k2);
}
Пример #10
0
void make_draws(){
    apop_model *multinom = apop_model_copy(apop_multivariate_normal);
    multinom->parameters = apop_data_falloc((2, 2, 2), 
                                        1,  1, .1,
                                        8, .1,  1);
    multinom->dsize = 2;

    apop_model *d1 = apop_estimate(apop_model_draws(multinom), apop_multivariate_normal);
    for (int i=0; i< 2; i++)
        for (int j=-1; j< 2; j++)
            assert(fabs(apop_data_get(multinom->parameters, i, j)
                    - apop_data_get(d1->parameters, i, j)) < .25);
    multinom->draw = NULL; //so draw via MCMC
    apop_model *d2 = apop_estimate(apop_model_draws(multinom, 10000), apop_multivariate_normal);
    for (int i=0; i< 2; i++)
        for (int j=-1; j< 2; j++)
            assert(fabs(apop_data_get(multinom->parameters, i, j)
                    - apop_data_get(d2->parameters, i, j)) < .25);
}
Пример #11
0
apop_model* apop_t_estimate(apop_data *d, apop_model *m){
    Apop_assert(d, "No data with which to count df. (the default estimation method)");
    Get_vmsizes(d); //vsize, msize1, msize2, tsize
    apop_model *out = apop_model_copy(*m);
    double vmu = vsize ? apop_mean(d->vector) : 0;
    double v_sum_sq = vsize ? apop_var(d->vector)*(vsize-1) : 0;
    double m_sum_sq = 0;
    double mmu = 0;
   if (msize1) {
       apop_matrix_mean_and_var(d->matrix, &mmu, &m_sum_sq);
       m_sum_sq *= msize1*msize2-1;
   }
    apop_data_add_names(out->parameters, 'r', "mean", "standard deviation", "df");
    apop_data_set(out->parameters, 0, -1, (vmu *vsize + mmu * msize1*msize2)/tsize);
    apop_data_set(out->parameters, 1, -1, sqrt((v_sum_sq*vsize + m_sum_sq * msize1*msize2)/(tsize-1))); 
    apop_data_set(out->parameters, 2, -1, tsize-1);
    apop_data_add_named_elmt(out->info, "log likelihood", out->log_likelihood(d, out));
    return out;
}
Пример #12
0
/** Give me a data set and a model, and I'll give you the jackknifed covariance matrix of the model parameters.

The basic algorithm for the jackknife (glossing over the details): create a sequence of data
sets, each with exactly one observation removed, and then produce a new set of parameter estimates 
using that slightly shortened data set. Then, find the covariance matrix of the derived parameters.

\li Jackknife or bootstrap? As a broad rule of thumb, the jackknife works best on models
    that are closer to linear. The worse a linear approximation does (at the given data),
    the worse the jackknife approximates the variance.

\param in	    The data set. An \ref apop_data set where each row is a single data point.
\param model    An \ref apop_model, that will be used internally by \ref apop_estimate.
            
\exception out->error=='n'   \c NULL input data.
\return         An \c apop_data set whose matrix element is the estimated covariance matrix of the parameters.
\see apop_bootstrap_cov

For example:
\include jack.c
*/
apop_data * apop_jackknife_cov(apop_data *in, apop_model *model){
    Apop_stopif(!in, apop_return_data_error(n), 0, "The data input can't be NULL.");
    Get_vmsizes(in); //msize1, msize2, vsize
    apop_model *e = apop_model_copy(model);
    int i, n = GSL_MAX(msize1, GSL_MAX(vsize, in->textsize[0]));
    apop_model *overall_est = e->parameters ? e : apop_estimate(in, e);//if not estimated, do so
    gsl_vector *overall_params = apop_data_pack(overall_est->parameters);
    gsl_vector_scale(overall_params, n); //do it just once.
    gsl_vector *pseudoval = gsl_vector_alloc(overall_params->size);

    //Copy the original, minus the first row.
    apop_data *subset = apop_data_copy(Apop_rs(in, 1, n-1));
    apop_name *tmpnames = in->names; 
    in->names = NULL;  //save on some copying below.

    apop_data *array_of_boots = apop_data_alloc(n, overall_params->size);

    for(i = -1; i< n-1; i++){
        //Get a view of row i, and copy it to position i-1 in the short matrix.
        if (i >= 0) apop_data_memcpy(Apop_r(subset, i), Apop_r(in, i));
        apop_model *est = apop_estimate(subset, e);
        gsl_vector *estp = apop_data_pack(est->parameters);
        gsl_vector_memcpy(pseudoval, overall_params);// *n above.
        gsl_vector_scale(estp, n-1);
        gsl_vector_sub(pseudoval, estp);
        gsl_matrix_set_row(array_of_boots->matrix, i+1, pseudoval);
        apop_model_free(est);
        gsl_vector_free(estp);
    }
    in->names = tmpnames;
    apop_data *out = apop_data_covariance(array_of_boots);
    gsl_matrix_scale(out->matrix, 1./(n-1.));
    apop_data_free(subset);
    gsl_vector_free(pseudoval);
    apop_data_free(array_of_boots);
    if (e!=overall_est)
        apop_model_free(overall_est);
    apop_model_free(e);
    gsl_vector_free(overall_params);
    return out;
}
Пример #13
0
int main(){
    size_t ct = 5e4;

    //set up the model & params
    apop_data *params = apop_data_falloc((2,2,2), 8,  1, 0.5,
                                                  2,  0.5, 1);
    apop_model *pvm = apop_model_copy(apop_multivariate_normal);
    pvm->parameters = apop_data_copy(params);
    pvm->dsize = 2;
    apop_data *d = apop_model_draws(pvm, ct);

    //set up and estimate a model with fixed covariance matrix but free means
    gsl_vector_set_all(pvm->parameters->vector, GSL_NAN);
    apop_model *mep1 = apop_model_fix_params(pvm);
    apop_model *e1 = apop_estimate(d, mep1);
    
    //compare results
    printf("original params: ");
    apop_vector_print(params->vector);
    printf("estimated params: ");
    apop_vector_print(e1->parameters->vector);
    assert(apop_vector_distance(params->vector, e1->parameters->vector)<1e-2); 
}
Пример #14
0
}

static void wishart_estimate(apop_data *d, apop_model *m){
    Nullcheck_m(m, );
    //apop_data_set(m->parameters, 0, -1, d->matrix->size1);
    //Start with cov matrix via mean of inputs; df=NaN
    apop_data_set(m->parameters, 0, -1, GSL_NAN);
    apop_data *summ=apop_data_summarize(d);
    Apop_col_t(summ, "mean", means);
    gsl_vector *t = m->parameters->vector; //mask this while unpacking
    m->parameters->vector=NULL;
    apop_data_unpack(means, m->parameters);
    m->parameters->vector=t;

    //Estimate a model with fixed cov matrix and blank (NaN) df.
    apop_model *modified_wish = apop_model_copy(m);
    modified_wish->log_likelihood = fixed_wishart_ll;
    apop_model *fixed_wish = apop_model_fix_params(modified_wish);
    apop_model *est_via_fix = apop_estimate(d, fixed_wish);

    //copy df from fixed version to the real thing; clean up.
    t->data[0] = apop_data_get(est_via_fix->parameters, 0, -1);
    gsl_matrix_scale(m->parameters->matrix, 1./t->data[0]);
    apop_data_free(summ);
    apop_model_free(modified_wish);
    apop_model_free(fixed_wish);
}

/* amodel apop_wishart The Wishart distribution, which is currently somewhat untested. 

Here's the likelihood function. \f$p\f$ is the dimension of the data and covariance
Пример #15
0
apop_data *rev(apop_data *in){ return apop_map(in, .fn_d=log, .part='a'); }

/*The derivative of the transformed-to-base function. */
double inv(double in){return 1./in;} 
double rev_j(apop_data *in){ return fabs(apop_map_sum(in, .fn_d=inv, .part='a')); }

int main(){
    apop_model *ct = apop_model_coordinate_transform(
                        .transformed_to_base= rev, .jacobian_to_base=rev_j,
                        .base_model=apop_normal);
    //Apop_model_add_group(ct, apop_parts_wanted);//Speed up the MLE.

    //make fake data
    double mu=2, sigma=1;
    apop_data *d = draw_exponentiated_normal(mu, sigma, 2e5);

    //If we correctly replicated a Lognormal, mu and sigma will be right:
    apop_model *est = apop_estimate(d, ct);
    apop_model_free(ct);
    Diff(apop_data_get(est->parameters, 0), mu);
    Diff(apop_data_get(est->parameters, 1), sigma);

    /*The K-L divergence between our Lognormal and the stock Lognormal
      should be small. Try it with both the original params and the estimated ones. */
    apop_model *ln = apop_model_set_parameters(apop_lognormal, mu, sigma);
    apop_model *ln2 = apop_model_copy(apop_lognormal);
    ln2->parameters = est->parameters;
    Diff(apop_kl_divergence(ln, ln2,.draw_ct=1000), 0);
    Diff(apop_kl_divergence(ln, est,.draw_ct=1000), 0);
}