Example #1
0
int main(){
    apop_data *d = apop_text_alloc(apop_data_alloc(6), 6, 1);
    apop_data_fill(d,   1,   2,   3,   3,   1,   2);
    apop_text_fill(d,  "A", "A", "A", "A", "A", "B");

    asprintf(&d->names->title, "Original data set");
    printdata(d);

        //binned, where bin ends are equidistant but not necessarily in the data
    apop_data *binned = apop_data_to_bins(d, NULL);
    asprintf(&binned->names->title, "Post binning");
    printdata(binned);
    assert(apop_sum(binned->weights)==6);
    assert(fabs(//equal distance between bins
              (apop_data_get(binned, 1, -1) - apop_data_get(binned, 0, -1))
            - (apop_data_get(binned, 2, -1) - apop_data_get(binned, 1, -1))) < 1e-5);

        //compressed, where the data is as in the original, but weights 
        //are redome to accommodate repeated observations.
    apop_data_pmf_compress(d);
    asprintf(&d->names->title, "Post compression");
    printdata(d);
    assert(apop_sum(d->weights)==6);

    apop_model *d_as_pmf = apop_estimate(d, apop_pmf);
    Apop_row(d, 0, firstrow); //1A
    assert(fabs(apop_p(firstrow, d_as_pmf) - 2./6 < 1e-5));
}
Example #2
0
int main(){
    gsl_rng *r = apop_rng_alloc(10);
    size_t i, ct = 5e4;

    //set up the model & params
    apop_data *d  = apop_data_alloc(ct,2);
    apop_data *params = apop_data_alloc(2,2,2);
    apop_data_fill(params, 8,  1, 0.5,
                           2,  0.5, 1);
    apop_model *pvm = apop_model_copy(apop_multivariate_normal);
    pvm->parameters = apop_data_copy(params);

    //make random draws from the multivar. normal
    //this `pull a row view, fill its data element' form works for rows but not cols.
    for(i=0; i< ct; i++){
        Apop_row(d, i, onerow);
        apop_draw(onerow->data, r, pvm);
    }

    //set up and estimate a model with fixed covariance matrix but free means
    gsl_vector_set_all(pvm->parameters->vector, GSL_NAN);
    apop_model *mep1   = apop_model_fix_params(pvm);
    apop_model *e1  = apop_estimate(d, *mep1);
    
    //compare results
    printf("original params: ");
    apop_vector_show(params->vector);
    printf("estimated params: ");
    apop_vector_show(e1->parameters->vector);
}
Example #3
0
void plot(apop_model *k, char *outname){
    double max = 4.3, increment = 0.01;
    apop_data *out = apop_data_alloc(max/increment+1);
    out->weights =  gsl_vector_alloc(max/increment+1);
    for (int c=0; c<max/increment; c++){
        Apop_row(out, c, a_point);
        gsl_vector_set(a_point->vector, 0, c* increment);
        gsl_vector_set(a_point->weights, 0, apop_p(a_point, k));
    }
    apop_vector_normalize(out->weights); //Q: why is this always necessary?
    apop_data_print(out, .output_name=outname);
}
Example #4
0
static void rearrange(apop_data *data, size_t height, size_t *perm){
    size_t i, start=0;
    size_t sorted[height];
    memset(sorted, 0, sizeof(size_t)*height);
    while (1){
        i     =
        start = find_min_unsorted(sorted, height, start);
        if (i==-1) break;
        Apop_row(data, start, firstrow);
        apop_data *first_row_storage = apop_data_copy(firstrow);
        sorted[start]++;
        while (perm[i]!=start){
            //copy from perm[i] to i
            Apop_row(data, perm[i], onerow);
            apop_data_set_row(data, onerow, i);
            sorted[perm[i]]++;
            i = perm[i];
        }
        apop_data_set_row(data, first_row_storage, i);
        apop_data_free(first_row_storage);
    }
}
Example #5
0
File: em_weight.c Project: b-k/tea
//This is a substitute for apop_pmf_compress, because
//we can use knowledge of our special case to work more efficiently
void merge_two_sets(apop_data *left, apop_data *right){
    for  (int i=0; i< right->matrix->size1; i++) {
        Apop_row(right, i, Rrow);
        double *r = gsl_vector_ptr(Rrow->weights, 0);
        if (!*r) continue;
        int j;
        bool done = false;
        #pragma omp parallel for private(j) shared(done)
        for (j=0; j< left->matrix->size1; j++){
            Apop_row(left, j, Lrow);
            if (are_equal(Rrow, Lrow)){
                *gsl_vector_ptr(Lrow->weights, 0) += *r;
                *r = 0;
                done = true;
                if (done) j = left->matrix->size1;
            }
        }
    }
    apop_data_rm_rows(right, .do_drop=weightless);
    //apop_data_listwise_delete(left, .inplace='y');
    apop_data_stack(left, right, .inplace='y');
}
Example #6
0
File: em_weight.c Project: b-k/tea
// Zero out the weights of those rows that don't match.
// Rescale the weights of those rows that are near-misses.
static double cull2(apop_data const *onerow, apop_data *cullback){
    if (!cullback) return 0;
    #pragma omp parallel for
    for (int row=0; row<cullback->matrix->size1; row++){
        Apop_row(cullback, row, cull_row);
        if (!*cull_row->weights->data) continue;
        for (int i=0; i< cull_row->matrix->size2; i++){
            double this= onerow->matrix->data[i];
            if (isnan(this)) continue;
            double crthis= cull_row->matrix->data[i];
            if (onerow->more && onerow->more->text[i][0][0]=='r'){//near-misses OK.
                double dist = fabs(this - crthis);
                *cull_row->weights->data *= 1/(1+dist);
            }
            else if (crthis != this) {
                *cull_row->weights->data= 0;
                break;
            }
        }
    }
    return 0;
}
Example #7
0
File: em_weight.c Project: b-k/tea
/* In this version, both the reference row and the weight set to be culled
   may have NaNs. We still require compabibility in those fields where both
   have data, but where one has a NaN and the other doesn't, we write down 
   the nonmissing value for that field, regardless of which side it came from.
   Therefore, the resultant data has fewer NaN fields than either source, and 
   repeating this over several iterations can eventually produce a NaN-free set.

   Not all data sets can complete like this.

   If a row has NaNs, but there is no additional fill-in, then give it zero weight.

   The rules:
--If a row has any NaN data, skip self in the cullback.
--If there is another row with unambiguously more data, skip this row.
--As an elaboration, if there is complete data anywhere in the candidate set, ignore
any incomplete rows (even if they are complete after fillin).

So, I need two passes:
(1) mark whether the row has NaNs.
(2) mark whether any admissable row has no NaNs.
(3) Check whether the row has any fill-ins. If has NaNs but no fill-ins, then it is either self or has even less data.
2nd pass:
(4) If any admissable rows have no NaNs, zero out all previously admissable 
rows with NaNs.
*/
static double cull_w_nans(apop_data const *onerow, apop_data *cullback){
    if (!cullback || !cullback->matrix) return 0;
    bool has_nans[cullback->matrix->size1];
    bool complete_admissable_row = false;
    for (int row=0; row<cullback->matrix->size1; row++){
        Apop_row(cullback, row, cull_row);
        has_nans[row] = false;
        bool this_row_has_fillins = false;
        double *weight = gsl_vector_ptr(cullback->weights, row);
        for (int i=0; i< cull_row->matrix->size2; i++){
            double ref_field = apop_data_get(onerow, .col=i);
            double *cull_field = apop_data_ptr(cull_row, .col=i);
            has_nans[row] = has_nans[row] || isnan(*cull_field);  //step (1)
            if (!isnan(*cull_field) && !isnan(ref_field)){
                if (onerow->more && onerow->more->text[i][0][0]=='r'){//near-misses OK.
                    double dist = fabs(ref_field - *cull_field);
                    *cull_row->weights->data *= 1/(1+dist);
                    break;
                } else if ((*cull_field != ref_field) //mismatch
                        || (has_nans[row] && complete_admissable_row)) { //step (4)
                    *weight = 0;
                    break;
                }
            }
            if (isnan(*cull_field) && !isnan(ref_field)){
                *cull_field = ref_field;
                this_row_has_fillins = true;
            }
        }
        if (!has_nans[row] && *weight != 0)         //step (2)
            complete_admissable_row = true;
        if (has_nans[row] && !this_row_has_fillins) //step (3)
            *weight = 0;
    }
    if (complete_admissable_row)                    //step (4)
        for (int row=0; row<cullback->matrix->size1; row++)
            if (has_nans[row]) gsl_vector_set(cullback->weights, row, 0);
    return 0;
}
Example #8
0
int main(){
    //bind together a Poisson and a Normal;
    //make a draw producing a 2-element vector
    apop_model *m1 = apop_model_set_parameters(apop_poisson, 3);
    apop_model *m2 = apop_model_set_parameters(apop_normal, -5, 1);
    apop_model *mm = apop_model_stack(m1, m2);
    int len = 1e5;
    gsl_rng *r = apop_rng_alloc(1);
    apop_data *draws = apop_data_alloc(len, 2);
    for (int i=0; i< len; i++){
        Apop_row (draws, i, onev);
        apop_draw(onev->data, r, mm);
        assert((int)onev->data[0] == onev->data[0]);
        assert(onev->data[1]<0);
    }

    //The rest of the test script recovers the parameters.
    //First, set up a two-page data set: poisson data on p1, Normal on p2:
    apop_data *comeback = apop_data_alloc();
    Apop_col(draws, 0,fishdraws)
    comeback->vector = apop_vector_copy(fishdraws);
    apop_data_add_page(comeback, apop_data_alloc(), "p2");
    Apop_col(draws, 1, meandraws)
    comeback->more->vector = apop_vector_copy(meandraws);

    //set up the un-parameterized stacked model, including
    //the name at which to split the data set
    apop_model *estme = apop_model_stack(apop_model_copy(apop_poisson), apop_model_copy(apop_normal));
    Apop_settings_add(estme, apop_stack, splitpage, "p2");
    apop_model *ested = apop_estimate(comeback, *estme);

    //test that the parameters are as promised.
    apop_model *m1back = apop_settings_get(ested, apop_stack, model1);
    apop_model *m2back = apop_settings_get(ested, apop_stack, model2);
    assert(fabs(apop_data_get(m1back->parameters, .col=-1) - 3) < 1e-2);
    assert(fabs(apop_data_get(m2back->parameters, .col=-1) - -5) < 1e-2);
    assert(fabs(apop_data_get(m2back->parameters, .col=-1, .row=1) - 1) < 1e-2);
}
Example #9
0
/** Make random draws from an \ref apop_model, and bin them using a binspec in the style
 of \ref apop_data_to_bins. If you have a data set that used the same binspec, you now have synced histograms, which you can plot or sensibly test hypotheses about.

The output is normalized to integrate to one.

\param binspec A description of the bins in which to place the draws; see \ref apop_data_to_bins. (default: as in \ref apop_data_to_bins.)
\param model The model to be drawn from. Because this function works via random draws, the model needs to have a 
\c draw method. (No default)
\param draws The number of random draws to make. (arbitrary default = 10,000)
\param bin_count If no bin spec, the number of bins to use (default: as per \ref apop_data_to_bins, \f$\sqrt(N)\f$)
\param rng The \c gsl_rng used to make random draws. (default: see note on \ref autorng)

\return An \ref apop_pmf model.

\li This function uses the \ref designated syntax for inputs.

\ingroup histograms
*/
APOP_VAR_HEAD apop_model *apop_model_to_pmf(apop_model *model, apop_data *binspec, long int draws, int bin_count, gsl_rng *rng){
    apop_model* apop_varad_var(model, NULL);
    Apop_assert(model && model->draw, "The second argument needs to be an apop_model with a 'draw' function "
                              "that I can use to make random draws.");
    apop_data* apop_varad_var(binspec, NULL);
    int apop_varad_var(bin_count, 0);
    long int apop_varad_var(draws, 1e4);
    gsl_rng *apop_varad_var(rng, NULL)
    static gsl_rng *spare = NULL;
    if (!rng && !spare) 
        spare = apop_rng_alloc(++apop_opts.rng_seed);
    if (!rng) rng = spare;
APOP_VAR_ENDHEAD
    Get_vmsizes(binspec);
    apop_data *outd = apop_data_alloc(draws, model->dsize); 
    for (long int i=0; i< draws; i++){
        Apop_row(outd, i, ach);
        apop_draw(ach->data, rng, model);
    }
    apop_data *outbinned = apop_data_to_bins(outd, binspec, .bin_count=bin_count);
    apop_data_free(outd);
    apop_vector_normalize(outbinned->weights);
    return apop_estimate(outbinned, apop_pmf);
}