Exemple #1
0
void plot(apop_model *k, apop_model *k2){
    apop_data *onept = apop_data_alloc(0,1,1);
    FILE *outtab = fopen("kerneldata", "w");
    for (double i=0; i<20; i+=0.01){
        apop_data_set(onept,0,0, i);
        fprintf(outtab, "%g %g %g\n", i, apop_p(onept, k), apop_p(onept, k2));
    }
    fclose(outtab);
    printf("plot 'kerneldata' using 1:2\n"
           "replot 'kerneldata' using 1:3\n");
}
Exemple #2
0
/** Test the goodness-of-fit between two \ref apop_pmf models. 

If you send two histograms, I assume that the histograms are synced: for PMFs,
you've used \ref apop_data_to_bins to generate two histograms using the same binspec,
or you've used \ref apop_data_pmf_compress to guarantee that each observation value
appears exactly once in each data set.

In any case, you are confident that all values in the \c observed set appear in the \c
expected set with nonzero weight; otherwise this will return a \f$\chi^2\f$ statistic
of \c GSL_POSINF, indicating that it is impossible for the \c observed data to have
been drawn from the \c expected distribution.

\li If an observation row has weight zero, I skip it. if <tt>apop_opts.verbose >=1 </tt> I will show a warning.

  \ingroup histograms
*/
apop_data *apop_histograms_test_goodness_of_fit(apop_model *observed, apop_model *expected){
    int df = observed->data->weights->size;
    double diff = 0;
    for (int i=0; i< observed->data->weights->size; i++){
        Apop_data_row(observed->data, i, one_obs);
        double obs_val = gsl_vector_get(observed->data->weights, i);
        double exp_val = apop_p(one_obs, expected);
        if (exp_val == 0){
            diff = GSL_POSINF; 
            break;
        }
        if (obs_val==0){
            Apop_notify(1, "element %i of the observed data has weight zero. Skipping it.", i);
            df --;
        } else 
            diff += gsl_pow_2(obs_val - exp_val)/exp_val;
    }
    //Data gathered. Now output
    apop_data   *out    = apop_data_alloc();
    double      toptail = gsl_cdf_chisq_Q(diff, df-1);
    sprintf(out->names->title, "Goodness-of-fit test via Chi-squared statistic");
    apop_data_add_named_elmt(out, "Chi squared statistic", diff);
    apop_data_add_named_elmt(out, "df", df-1);
    apop_data_add_named_elmt(out, "p value",  toptail); 
    apop_data_add_named_elmt(out, "confidence", 1 - toptail);
    return out;
}
Exemple #3
0
int main(){
    apop_data *d = apop_text_alloc(apop_data_alloc(6), 6, 1);
    apop_data_fill(d,   1,   2,   3,   3,   1,   2);
    apop_text_fill(d,  "A", "A", "A", "A", "A", "B");

    asprintf(&d->names->title, "Original data set");
    printdata(d);

        //binned, where bin ends are equidistant but not necessarily in the data
    apop_data *binned = apop_data_to_bins(d, NULL);
    asprintf(&binned->names->title, "Post binning");
    printdata(binned);
    assert(apop_sum(binned->weights)==6);
    assert(fabs(//equal distance between bins
              (apop_data_get(binned, 1, -1) - apop_data_get(binned, 0, -1))
            - (apop_data_get(binned, 2, -1) - apop_data_get(binned, 1, -1))) < 1e-5);

        //compressed, where the data is as in the original, but weights 
        //are redome to accommodate repeated observations.
    apop_data_pmf_compress(d);
    asprintf(&d->names->title, "Post compression");
    printdata(d);
    assert(apop_sum(d->weights)==6);

    apop_model *d_as_pmf = apop_estimate(d, apop_pmf);
    Apop_row(d, 0, firstrow); //1A
    assert(fabs(apop_p(firstrow, d_as_pmf) - 2./6 < 1e-5));
}
Exemple #4
0
//The probability: draw from the rng, smooth with a kernel density, calculate p.
long double p(apop_data *d, apop_model *m){
    int draw_ct = 100;
    apop_data *draws = apop_model_draws(m, draw_ct);
    apop_model *smoothed = apop_model_copy_set(apop_kernel_density, apop_kernel_density,
            .base_data =draws, .kernel=apop_uniform, .set_fn=set_midpoint);
    double out = apop_p(d, smoothed);
    apop_data_free(draws);
    apop_model_free(smoothed);
    return out;
}
Exemple #5
0
void plot(apop_model *k, char *outname){
    double max = 4.3, increment = 0.01;
    apop_data *out = apop_data_alloc(max/increment+1);
    out->weights =  gsl_vector_alloc(max/increment+1);
    for (int c=0; c<max/increment; c++){
        Apop_row(out, c, a_point);
        gsl_vector_set(a_point->vector, 0, c* increment);
        gsl_vector_set(a_point->weights, 0, apop_p(a_point, k));
    }
    apop_vector_normalize(out->weights); //Q: why is this always necessary?
    apop_data_print(out, .output_name=outname);
}