Esempio n. 1
0
double one_chi_sq(apop_data *d, int row, int col, int n){
    Apop_row_v(d, row, vr);
    Apop_col_v(d, col, vc);
    double rowexp = apop_vector_sum(vr)/n;
    double colexp = apop_vector_sum(vc)/n;
    double observed = apop_data_get(d, row, col);
    double expected = n * rowexp * colexp;
    return gsl_pow_2(observed - expected)/expected; 
}
/* The hard part is when your candidate point does not satisfy other
   constraints, so you need to translate the point until it meets the new hypersurface.
   How far is that? Project beta onto the new surface, and find the
   distance between that projection and the original surface. Then
   translate beta toward the original surface by that amount. The
   projection of the translated beta onto the new surface now also touches the old
   surface.
   */
static void get_candiate(gsl_vector *beta, apop_data *constraint, int current, gsl_vector *candidate, double margin){
    double k, ck, off_by, s;
    gsl_vector *pseudobeta        = NULL;
    gsl_vector *pseudocandidate   = NULL;
    gsl_vector *pseudocandidate2  = NULL;
    gsl_vector *fix               = NULL;
    Apop_row_v(constraint, current, cc);
    ck = gsl_vector_get(constraint->vector, current);
    find_nearest_point(beta, ck, cc, candidate);
    for (size_t i=0; i< constraint->vector->size; i++){
        if (i!=current){
            Apop_row_v(constraint, i, other);
            k   =apop_data_get(constraint, i, -1);
            if (binds(candidate, k, other, margin)){
                if (!pseudobeta){
                    pseudobeta          = gsl_vector_alloc(beta->size);
                    gsl_vector_memcpy(pseudobeta, beta);
                    pseudocandidate     = gsl_vector_alloc(beta->size);
                    pseudocandidate2    = gsl_vector_alloc(beta->size);
                    fix                 = gsl_vector_alloc(beta->size);
                }
                find_nearest_point(pseudobeta, k, other, pseudocandidate);
                find_nearest_point(pseudocandidate, ck, cc, pseudocandidate2);
                off_by  = apop_vector_distance(pseudocandidate, pseudocandidate2);
                s       = trig_bit(cc, other, off_by);
                gsl_vector_memcpy(fix, cc);
                gsl_vector_scale(fix, magnitude(cc));
                gsl_vector_scale(fix, s);
                gsl_vector_add(pseudobeta, fix);
                find_nearest_point(pseudobeta, k, other, candidate);
                gsl_vector_memcpy(pseudobeta, candidate);
            } 
        }
    }
    if (fix){ 
        gsl_vector_free(fix); gsl_vector_free(pseudobeta);
        gsl_vector_free(pseudocandidate); gsl_vector_free(pseudocandidate2);
    }
}
Esempio n. 3
0
apop_model *fuzz(apop_model sim){
    int draws = 100;
    gsl_rng *r = apop_rng_alloc(1);
    apop_model *prior = apop_model_cross(
                            apop_model_set_parameters(apop_normal, 10, 2),
                            apop_model_set_parameters(apop_normal, 10, 2));
    apop_data *outdata = apop_data_alloc(draws, weibull->vsize);
    double *params = sim.parameters->vector->data;
    for (int i=0; i< draws; i++){
        do {
            apop_draw(params, r, prior);
        } while (params[1]*2 > pow(params[0], 2));
        sim.dsize=params[1];
        apop_model *est = apop_estimate(apop_model_draws(&sim, 1000), weibull);
        Apop_row_v(outdata, i, onerow);
        gsl_vector_memcpy(onerow, est->parameters->vector);
        apop_model_free(est);
    }
    return apop_estimate(outdata, apop_pmf);
}
Esempio n. 4
0
apop_data *kappa_and_pi(apop_data const *tab_in){
    apop_data *out = apop_data_alloc();
    Apop_stopif(!tab_in, out->error='n'; return out, 0, "NULL input. Returning output with 'n' error code.");
    Apop_stopif(!tab_in->matrix, out->error='m'; return out, 0, "NULL input matrix. Returning output with 'm' error code.");
    Apop_stopif(tab_in->matrix->size1 != tab_in->matrix->size2, out->error='s'; return out, 0, "Input rows=%zu; input cols=%zu; "
                    "these need to be equal. Returning output with error code 's'.", tab_in->matrix->size1, tab_in->matrix->size2);

    apop_data *tab = apop_data_copy(tab_in);
    double total = apop_matrix_sum(tab->matrix);
    gsl_matrix_scale(tab->matrix, 1./total);
    double p_o = 0, p_e = 0, scott_pe = 0, ia = 0, row_ent = 0, col_ent = 0;
    for (int c=0; c< tab->matrix->size1; c++){
        double this_obs = apop_data_get(tab, c, c);
        p_o += this_obs;

        Apop_row_v(tab, c, row);
        Apop_col_v(tab, c, col);
        double rsum = apop_sum(row);
        double csum = apop_sum(col);
        p_e += rsum * csum;
        scott_pe += pow((rsum+csum)/2, 2);

        ia += this_obs * log2(this_obs/(rsum * csum));
        row_ent -= rsum * log2(rsum);
        col_ent -= csum * log2(csum);
    }
    apop_data_free(tab);

    asprintf(&out->names->title, "Scott's π and Cohen's κ");
    apop_data_add_named_elmt(out, "total count", total);
    apop_data_add_named_elmt(out, "percent agreement", p_o);
    apop_data_add_named_elmt(out, "κ", ((p_e==1)? 0: (p_o - p_e) / (1-p_e) ));
    apop_data_add_named_elmt(out, "π", ((p_e==1)? 0: (p_o - scott_pe) / (1-scott_pe)));
    apop_data_add_named_elmt(out, "P_I", ia/((row_ent+col_ent)/2));
    apop_data_add_named_elmt(out, "Cohen's p_e", p_e);
    apop_data_add_named_elmt(out, "Scott's p_e", scott_pe);
    apop_data_add_named_elmt(out, "information in agreement", ia);
    apop_data_add_named_elmt(out, "row entropy", row_ent);
    apop_data_add_named_elmt(out, "column entropy", col_ent);
    return out;
}
long double apop_linear_constraint(gsl_vector *beta, apop_data * constraint, double margin){
#else
apop_varad_head(long double, apop_linear_constraint){
    static threadlocal apop_data *default_constraint;
    gsl_vector * apop_varad_var(beta, NULL);
    double apop_varad_var(margin, 0);
    apop_data * apop_varad_var(constraint, NULL);
    Apop_assert(beta, "The vector to be checked is NULL.");
    if (!constraint){
        if (default_constraint && beta->size != default_constraint->vector->size){
            apop_data_free(default_constraint);
            default_constraint = NULL;
        }
        if (!default_constraint){
            default_constraint = apop_data_alloc(0,beta->size, beta->size);
            default_constraint->vector = gsl_vector_calloc(beta->size);
            gsl_matrix_set_identity(default_constraint->matrix);
        }
        constraint = default_constraint;
    }
    return apop_linear_constraint_base(beta, constraint, margin);
}

 long double apop_linear_constraint_base(gsl_vector *beta, apop_data * constraint, double margin){
#endif
    static threadlocal gsl_vector *closest_pt = NULL;
    static threadlocal gsl_vector *candidate  = NULL;
    static threadlocal gsl_vector *fix        = NULL;
    int constraint_ct = constraint->matrix->size1;
    int bindlist[constraint_ct];
    int i, bound = 0;
    /* For added efficiency, keep a scratch vector or two on hand. */
    if (closest_pt==NULL || closest_pt->size != constraint->matrix->size2){
        closest_pt  = gsl_vector_calloc(beta->size);
        candidate   = gsl_vector_alloc(beta->size);
        fix         = gsl_vector_alloc(beta->size);
        closest_pt->data[0] = GSL_NEGINF;
    }
    /* Do any constraints bind?*/
    memset(bindlist, 0, sizeof(int)*constraint_ct);
    for (i=0; i< constraint_ct; i++){
        Apop_row_v(constraint, i, c);
        bound           +=
        bindlist[i]      = binds(beta, apop_data_get(constraint, i, -1), c, margin);
    }
    if (!bound) return 0;   //All constraints met.
    gsl_vector *base_beta = apop_vector_copy(beta);
    /* With only one constraint, it's easy. */
    if (constraint->vector->size==1){
        Apop_row_v(constraint, 0, c);
        find_nearest_point(base_beta, constraint->vector->data[0], c, beta);
        goto add_margin;
    }
    /* Finally, multiple constraints, at least one binding.
       For each surface, pick a candidate point.
       Check whether the point meets the other constraints. 
            if not, translate to a new point that works.
            [Do this by maintaining a pseudopoint that translates by the
            necessary amount.]
        Once you have a candidate point, compare its distance to the
        current favorite; keep the best.
     */
    for (i=0; i< constraint_ct; i++)
        if (bindlist[i]){
            get_candiate(base_beta, constraint, i, candidate, margin);
            if(apop_vector_distance(base_beta, candidate) < apop_vector_distance(base_beta, closest_pt))
                gsl_vector_memcpy(closest_pt, candidate);
        }
    gsl_vector_memcpy(beta, closest_pt);
add_margin:
    for (i=0; i< constraint_ct; i++){
        if(bindlist[i]){
            Apop_row_v(constraint, i, c);
            gsl_vector_memcpy(fix, c);
            gsl_vector_scale(fix, magnitude(fix));
            gsl_vector_scale(fix, margin);
            gsl_vector_add(beta, fix);
        }
    }
    long double out = apop_vector_distance(base_beta, beta);
    gsl_vector_free(base_beta);
    return out;
}