Ejemplo n.º 1
0
 apop_data * apop_bootstrap_cov_base(apop_data * data, apop_model *model, gsl_rng *rng, int iterations, char keep_boots, char ignore_nans, apop_data **boot_store){
#endif
    Get_vmsizes(data); //vsize, msize1, msize2
    apop_model *e = apop_model_copy(model);
    apop_data *subset = apop_data_copy(data);
    apop_data *array_of_boots = NULL,
              *summary;
    //prevent and infinite regression of covariance calculation.
    Apop_model_add_group(e, apop_parts_wanted); //default wants for nothing.
    size_t i, nan_draws=0;
    apop_name *tmpnames = (data && data->names) ? data->names : NULL; //save on some copying below.
    if (data && data->names) data->names = NULL;

    int height = GSL_MAX(msize1, GSL_MAX(vsize, (data?(*data->textsize):0)));
	for (i=0; i<iterations && nan_draws < iterations; i++){
		for (size_t j=0; j< height; j++){       //create the data set
			size_t randrow	= gsl_rng_uniform_int(rng, height);
            apop_data_memcpy(Apop_r(subset, j), Apop_r(data, randrow));
		}
		//get the parameter estimates.
		apop_model *est = apop_estimate(subset, e);
        gsl_vector *estp = apop_data_pack(est->parameters);
        if (!gsl_isnan(apop_sum(estp))){
            if (i==0){
                array_of_boots	      = apop_data_alloc(iterations, estp->size);
                apop_name_stack(array_of_boots->names, est->parameters->names, 'c', 'v');
                apop_name_stack(array_of_boots->names, est->parameters->names, 'c', 'c');
                apop_name_stack(array_of_boots->names, est->parameters->names, 'c', 'r');
            }
            gsl_matrix_set_row(array_of_boots->matrix, i, estp);
        } else if (ignore_nans=='y'){
            i--; 
            nan_draws++;
        }
        apop_model_free(est);
        gsl_vector_free(estp);
	}
    if(data) data->names = tmpnames;
    apop_data_free(subset);
    apop_model_free(e);
    int set_error=0;
    Apop_stopif(i == 0 && nan_draws == iterations, apop_return_data_error(N),
                1, "I ran into %i NaNs and no not-NaN estimations, and so stopped. "
                       , iterations);
    Apop_stopif(nan_draws == iterations,  set_error++;
            apop_matrix_realloc(array_of_boots->matrix, i, array_of_boots->matrix->size2),
                1, "I ran into %i NaNs, and so stopped. Returning results based "
                       "on %zu bootstrap iterations.", iterations, i);
	summary	= apop_data_covariance(array_of_boots);
    if (boot_store) *boot_store = array_of_boots;
    else            apop_data_free(array_of_boots);
    if (set_error) summary->error = 'N';
	return summary;
}
Ejemplo n.º 2
0
static double cook_math(apop_data *reduced){
  apop_model *r = apop_estimate(reduced, apop_ols);
  apop_data * new_predicted = project(ols_data, r); 
  double out = sum_squared_diff(new_predicted->vector, predicted)/p_dot_mse;
  apop_data_free(new_predicted);
  apop_model_free(r);
  return out;
}
Ejemplo n.º 3
0
//The probability: draw from the rng, smooth with a kernel density, calculate p.
long double p(apop_data *d, apop_model *m){
    int draw_ct = 100;
    apop_data *draws = apop_model_draws(m, draw_ct);
    apop_model *smoothed = apop_model_copy_set(apop_kernel_density, apop_kernel_density,
            .base_data =draws, .kernel=apop_uniform, .set_fn=set_midpoint);
    double out = apop_p(d, smoothed);
    apop_data_free(draws);
    apop_model_free(smoothed);
    return out;
}
Ejemplo n.º 4
0
Archivo: em_weight.c Proyecto: b-k/tea
void rescale_cp_and_merge_into_candidate(apop_data *candidate, apop_data **cp, double scale_to){
    if (!*cp) return;
    double cp_weight_sum = apop_sum((*cp)->weights);
    if (cp_weight_sum) {
        gsl_vector_scale((*cp)->weights, scale_to/cp_weight_sum);
        merge_in_weights_so_far(candidate, *cp);//append cp into candidate
    }
    if (!(*cp)->weights || (candidate? candidate->weights->size : 0) != (*cp)->weights->size)
        apop_data_free(*cp);
}
Ejemplo n.º 5
0
/** Give me a data set and a model, and I'll give you the jackknifed covariance matrix of the model parameters.

The basic algorithm for the jackknife (glossing over the details): create a sequence of data
sets, each with exactly one observation removed, and then produce a new set of parameter estimates 
using that slightly shortened data set. Then, find the covariance matrix of the derived parameters.

\li Jackknife or bootstrap? As a broad rule of thumb, the jackknife works best on models
    that are closer to linear. The worse a linear approximation does (at the given data),
    the worse the jackknife approximates the variance.

\param in	    The data set. An \ref apop_data set where each row is a single data point.
\param model    An \ref apop_model, that will be used internally by \ref apop_estimate.
            
\exception out->error=='n'   \c NULL input data.
\return         An \c apop_data set whose matrix element is the estimated covariance matrix of the parameters.
\see apop_bootstrap_cov

For example:
\include jack.c
*/
apop_data * apop_jackknife_cov(apop_data *in, apop_model *model){
    Apop_stopif(!in, apop_return_data_error(n), 0, "The data input can't be NULL.");
    Get_vmsizes(in); //msize1, msize2, vsize
    apop_model *e = apop_model_copy(model);
    int i, n = GSL_MAX(msize1, GSL_MAX(vsize, in->textsize[0]));
    apop_model *overall_est = e->parameters ? e : apop_estimate(in, e);//if not estimated, do so
    gsl_vector *overall_params = apop_data_pack(overall_est->parameters);
    gsl_vector_scale(overall_params, n); //do it just once.
    gsl_vector *pseudoval = gsl_vector_alloc(overall_params->size);

    //Copy the original, minus the first row.
    apop_data *subset = apop_data_copy(Apop_rs(in, 1, n-1));
    apop_name *tmpnames = in->names; 
    in->names = NULL;  //save on some copying below.

    apop_data *array_of_boots = apop_data_alloc(n, overall_params->size);

    for(i = -1; i< n-1; i++){
        //Get a view of row i, and copy it to position i-1 in the short matrix.
        if (i >= 0) apop_data_memcpy(Apop_r(subset, i), Apop_r(in, i));
        apop_model *est = apop_estimate(subset, e);
        gsl_vector *estp = apop_data_pack(est->parameters);
        gsl_vector_memcpy(pseudoval, overall_params);// *n above.
        gsl_vector_scale(estp, n-1);
        gsl_vector_sub(pseudoval, estp);
        gsl_matrix_set_row(array_of_boots->matrix, i+1, pseudoval);
        apop_model_free(est);
        gsl_vector_free(estp);
    }
    in->names = tmpnames;
    apop_data *out = apop_data_covariance(array_of_boots);
    gsl_matrix_scale(out->matrix, 1./(n-1.));
    apop_data_free(subset);
    gsl_vector_free(pseudoval);
    apop_data_free(array_of_boots);
    if (e!=overall_est)
        apop_model_free(overall_est);
    apop_model_free(e);
    gsl_vector_free(overall_params);
    return out;
}
Ejemplo n.º 6
0
static gsl_vector *cooks_distance(apop_model *in){
  apop_data  *c = apop_data_copy(in->data);
  apop_ols->prep(in->data, in);
  ols_data = in->data;
  apop_data * t = project(in->data, in);
  predicted = t->vector;
  p_dot_mse  = c->matrix->size2 * sum_squared_diff(in->data->vector, t->vector);
  
  gsl_vector* out = jack_iteration(c->matrix, cook_math);
  apop_data_free(t);
  return out;
}
Ejemplo n.º 7
0
int main(){
    gsl_rng *r = apop_rng_alloc(2312311);
    int empirical_size = 5e3;
    apop_model *expo = apop_model_set_parameters(apop_exponential, 1.7);
    assert (apop_kl_divergence(expo, expo) < 1e-4);
    apop_data *empirical = apop_data_alloc(empirical_size, 1);
    for (int i=0; i<empirical_size; i++)
        apop_draw(apop_data_ptr(empirical, i, 0), r, expo);
    apop_model *pmf = apop_estimate(empirical, apop_pmf);
    assert(apop_kl_divergence(pmf,expo) < 1e-4);
    apop_data_free(empirical);
}
Ejemplo n.º 8
0
ykl_s make_yule(char const *zila, int y) {
    static gsl_matrix *indices;
    if (!indices) {
        indices = gsl_matrix_calloc(65,1);
        for (int i=0; i< 64; i++) gsl_matrix_set(indices, i,0, i);
    }
    apop_data *col = make_histo(zila, y);
    apop_data ww = (apop_data) {
        .weights=col->vector, .matrix=indices
    };
    apop_data *d = apop_data_transpose(col);
    apop_data *exp = apop_data_rank_expand(d);
    apop_model *m = apop_estimate(exp, apop_yule);
    apop_model *n = apop_estimate(exp, apop_lognormal);
    ykl_s out = (ykl_s) {
        .yule=apop_data_get(m->parameters, .col=-1/*, .rowname="mu"*/),
         .ln=apop_data_get(n->parameters, .col=-1/*, .rowname="mu"*/),
          .lnstderr=sqrt(apop_data_get(n->parameters, .col=-1, .row=1/*, .rowname="mu"*/)),
           .kl = apop_kl_divergence(apop_estimate(&ww, apop_pmf), m),
            .lnkl = apop_kl_divergence(apop_estimate(&ww, apop_pmf), n),
             .mean = apop_matrix_mean(col->matrix)
    };
    apop_data_free(d);
    apop_data_free(exp);
    apop_model_free(m);
    return out;
}

int main() {
    printf("zila|year|yule_p|kl_div|mu|ln_mu|ln_sigma|ln_kl\n");
    apop_db_open("b.db");
    apop_data *zilas = apop_query_to_text("select admname from ppl");
    for (int i=0; i< *zilas->textsize; i++)
        for (int y=2001; y<= 2005; y++) {
            ykl_s ykl = make_yule(*zilas->text[i], y);
            printf("%20s| %i| %g| %g| %g| %g| %g|%g\n", *zilas->text[i], y, ykl.yule, ykl.kl, ykl.mean, ykl.ln, ykl.lnstderr, ykl.lnkl);
        }
    //apop_plot_histogram(m->data->weights, 64, .output_file="histo");
}
Ejemplo n.º 9
0
void test_regex(){
    char string1[] = "Hello. I am a string.";
    assert(apop_regex(string1, "hell"));
    apop_data *subs;
    apop_regex(string1, "(e).*I.*(xxx)*(am)", .substrings = &subs);
    //apop_data_show(subs);
    assert(!strcmp(subs->text[0][0], "e"));
    assert(!strlen(subs->text[0][1]));
    assert(!strcmp(subs->text[0][2], "am"));
    apop_data_free(subs);

    //Split a comma-delimited list, throwing out white space.
    //Notice that the regex includes only one instance of a non-comma blob 
    //ending in a non-space followed by a comma, but the function keeps 
    //applying it until the end of string.
    char string2[] = " one, two , three ,four";
    apop_regex(string2, " *([^,]*[^ ]) *(,|$) *", &subs);
    assert(!strcmp(*subs->text[0], "one"));
    assert(!strcmp(*subs->text[1], "two"));
    assert(!strcmp(*subs->text[2], "three"));
    assert(!strcmp(*subs->text[3], "four"));
    apop_data_free(subs);
}
Ejemplo n.º 10
0
/** This function sorts the whole of a \c apop_data set based on one column. Sorts in place, with little additional memory used.

 Uses the \c gsl_sort_vector_index function internally, and that function just ignores NaNs; therefore this function just leaves NaNs exactly where they lay.

 \param data    The input set to be modified. (No default, must not be \c NULL.)
 \param sortby  The column of data by which the sorting will take place. As usual, -1 indicates the vector element. (default: column zero of the matrix if there is a matrix; if there's a vector but no matrix, then -1).
 \param asc   If 'd' or 'D', sort in descending order; else sort in ascending order. (Default: ascending)
 \return A pointer to the data set, so you can do things like \c apop_data_show(apop_data_sort(d, -1)).

This function uses the \ref designated syntax for inputs.
*/
APOP_VAR_HEAD apop_data * apop_data_sort(apop_data *data, int sortby, char asc){
    apop_data * apop_varad_var(data, NULL);
    apop_assert_s(data, "You gave me NULL data to sort.");
    int apop_varad_var(sortby, 0);
    if (sortby==0 && !data->matrix && data->vector) //you meant sort the vector
        sortby = -1;
    char apop_varad_var(asc, 0);
APOP_VAR_ENDHEAD
    size_t height  = (sortby==-1) ? data->vector->size: data->matrix->size1;
    size_t sorted[height];
    size_t i, *perm, start=0;
    gsl_permutation *p = gsl_permutation_alloc(height);
    memset(sorted, 0, sizeof(size_t)*height);
    if (sortby == -1)
        gsl_sort_vector_index (p, data->vector);
    else {
        APOP_COL(data, sortby, v);
        gsl_sort_vector_index (p, v);
    }
    perm = p->data;
    if (asc=='d' || asc=='D') //reverse the perm matrix.
        for (size_t j=0; j< height/2; j++){
            double t         = perm[j];
            perm[j]          = perm[height-1-j];
            perm[height-1-j] = t;
        }
    while (1){
        i     =
        start = find_min_unsorted(sorted, height, start);
        if (i==-1) 
            break;
        Apop_data_row(data, start, firstrow);
        apop_data *first_row_storage = apop_data_copy(firstrow);
        sorted[start]++;
        while (perm[i]!=start){
            //copy from perm[i] to i
            Apop_data_row(data, perm[i], onerow);
            apop_data_set_row(data, onerow, i);
            sorted[perm[i]]++;
            i = perm[i];
        }
        apop_data_set_row(data, first_row_storage, i);
        apop_data_free(first_row_storage);
    }
    gsl_permutation_free(p);
    return data;
}
Ejemplo n.º 11
0
int main(){
    int len = 3000;
    gsl_vector *v = gsl_vector_alloc(len);
    for (double i=0; i< len; i++) gsl_vector_set(v, i, 1./(i+1));
    double square;
    gsl_blas_ddot(v, v, &square);
    printf("1 + (1/2)^2 + (1/3)^2 + ...= %g\n", square);

    double pi_over_six = gsl_pow_2(M_PI)/6.;
    Diff(square, pi_over_six);

    /* Now using apop_dot, in a few forms.
       First, vector-as-data dot itself.
       If one of the inputs is a vector,
       apop_dot puts the output in a vector-as-data:*/
    apop_data *v_as_data = &(apop_data){.vector=v};
    apop_data *vdotv = apop_dot(v_as_data, v_as_data);
    Diff(gsl_vector_get(vdotv->vector, 0), pi_over_six);

    /* Wrap matrix in an apop_data set. */
    gsl_matrix *v_as_matrix = apop_vector_to_matrix(v);
    apop_data dm = (apop_data){.matrix=v_as_matrix};

    // (1 X len) vector dot (len X 1) matrix --- produce a scalar (one item vector).
    apop_data *mdotv = apop_dot(v_as_data, &dm);
    double scalarval = apop_data_get(mdotv);
    Diff(scalarval, pi_over_six);

    //(len X 1) dot (len X 1) --- bad dimensions.
    apop_opts.verbose=-1; //don't print an error.
    apop_data *mdotv2 = apop_dot(&dm, v_as_data);
    apop_opts.verbose=0; //back to safety.
    assert(mdotv2->error);

    // If we want (len X 1) dot (1 X len) --> (len X len),
    // use apop_vector_to_matrix.
    apop_data dmr = (apop_data){.matrix=apop_vector_to_matrix(v, .row_col='r')};
    apop_data *product_matrix = apop_dot(&dm, &dmr);
    //The trace is the sum of squares:
    gsl_vector_view trace = gsl_matrix_diagonal(product_matrix->matrix);
    double tracesum = apop_sum(&trace.vector);
    Diff(tracesum, pi_over_six);

    apop_data_free(product_matrix);
    gsl_matrix_free(dmr.matrix);
}
Ejemplo n.º 12
0
static double one_wishart_row(gsl_vector *in, void *ws_in){
    wishartstruct_t *ws = ws_in;
    gsl_matrix *invparams_dot_data = gsl_matrix_alloc(ws->len, ws->len);
    apop_data *square= apop_data_alloc(ws->len, ws->len);
    apop_data_unpack(in, square);
    double datadet = apop_matrix_determinant(square->matrix);
    assert(datadet);

    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1, ws->paraminv, square->matrix, 0, invparams_dot_data);   
    gsl_vector_view diag = gsl_matrix_diagonal(invparams_dot_data);
    double trace = apop_sum(&diag.vector);
    gsl_matrix_free(invparams_dot_data);
    apop_data_free(square);
    double out= log(datadet) * (ws->df - ws->len -1.)/2. - trace*ws->df/2.;
    assert(isfinite(out));
    return out;
}
Ejemplo n.º 13
0
static void rearrange(apop_data *data, size_t height, size_t *perm){
    size_t i, start=0;
    size_t sorted[height];
    memset(sorted, 0, sizeof(size_t)*height);
    while (1){
        i     =
        start = find_min_unsorted(sorted, height, start);
        if (i==-1) break;
        apop_data *first_row_storage = apop_data_copy(Apop_r(data, start));
        sorted[start]++;
        while (perm[i]!=start){
            //copy from perm[i] to i
            apop_data_set_row(data, Apop_r(data, perm[i]), i);
            sorted[perm[i]]++;
            i = perm[i];
        }
        apop_data_set_row(data, first_row_storage, i);
        apop_data_free(first_row_storage);
    }
}
Ejemplo n.º 14
0
static long double multinomial_constraint(apop_data *data, apop_model *b){
  //constraint is that 0 < all elmts, and  1>all ps.
    int size = b->parameters->vector->size;
    static threadlocal apop_data *constr;
    if (constr && constr->matrix->size2 != size)
        apop_data_free(constr);
    if (!constr){
        constr = apop_data_calloc(size*2-1, size*2-1, size);

        //top half: 0 < [param], including param 0
        gsl_matrix_set_identity(Apop_subm(constr->matrix, 0, 0, size, size));

        //bottom (almost) half: 1 >= [param], excluding param 0
        for (int i=size; i < size*2-1; i++){
            apop_data_set(constr, i, -1, -1);
            apop_data_set(constr, i, i-size+1, -1);
        }
    }
    return apop_linear_constraint(b->parameters->vector, constr);
}
Ejemplo n.º 15
0
apop_data *kappa_and_pi(apop_data const *tab_in){
    apop_data *out = apop_data_alloc();
    Apop_stopif(!tab_in, out->error='n'; return out, 0, "NULL input. Returning output with 'n' error code.");
    Apop_stopif(!tab_in->matrix, out->error='m'; return out, 0, "NULL input matrix. Returning output with 'm' error code.");
    Apop_stopif(tab_in->matrix->size1 != tab_in->matrix->size2, out->error='s'; return out, 0, "Input rows=%zu; input cols=%zu; "
                    "these need to be equal. Returning output with error code 's'.", tab_in->matrix->size1, tab_in->matrix->size2);

    apop_data *tab = apop_data_copy(tab_in);
    double total = apop_matrix_sum(tab->matrix);
    gsl_matrix_scale(tab->matrix, 1./total);
    double p_o = 0, p_e = 0, scott_pe = 0, ia = 0, row_ent = 0, col_ent = 0;
    for (int c=0; c< tab->matrix->size1; c++){
        double this_obs = apop_data_get(tab, c, c);
        p_o += this_obs;

        Apop_row_v(tab, c, row);
        Apop_col_v(tab, c, col);
        double rsum = apop_sum(row);
        double csum = apop_sum(col);
        p_e += rsum * csum;
        scott_pe += pow((rsum+csum)/2, 2);

        ia += this_obs * log2(this_obs/(rsum * csum));
        row_ent -= rsum * log2(rsum);
        col_ent -= csum * log2(csum);
    }
    apop_data_free(tab);

    asprintf(&out->names->title, "Scott's π and Cohen's κ");
    apop_data_add_named_elmt(out, "total count", total);
    apop_data_add_named_elmt(out, "percent agreement", p_o);
    apop_data_add_named_elmt(out, "κ", ((p_e==1)? 0: (p_o - p_e) / (1-p_e) ));
    apop_data_add_named_elmt(out, "π", ((p_e==1)? 0: (p_o - scott_pe) / (1-scott_pe)));
    apop_data_add_named_elmt(out, "P_I", ia/((row_ent+col_ent)/2));
    apop_data_add_named_elmt(out, "Cohen's p_e", p_e);
    apop_data_add_named_elmt(out, "Scott's p_e", scott_pe);
    apop_data_add_named_elmt(out, "information in agreement", ia);
    apop_data_add_named_elmt(out, "row entropy", row_ent);
    apop_data_add_named_elmt(out, "column entropy", col_ent);
    return out;
}
Ejemplo n.º 16
0
Archivo: checkout.c Proyecto: b-k/tea
void check_out_impute(char **origin, char **destin, int *imputation_number, char **subset, char **filltabin){
    char *filltab = (filltabin && *filltabin) ? *filltabin : "filled";
    Tea_stopif(!origin || !*origin, return, 0, "NULL origin table, but I need that.");
    char *id_column= get_key_word(NULL, "id");
    const char *dest = destin ? *destin : NULL;
    int use_rowids = 0;
    if (!id_column) {
        use_rowids++;
        id_column = strdup("rowid");
    }
    sprintf(apop_opts.db_name_column, "%s",  id_column);
    begin_transaction();
    if (dest && strcmp(*origin, dest)){
        apop_table_exists(dest, 'd');
        apop_query("create table %s as select %s * from %s %s %s", 
                        dest, 
                        use_rowids ? "rowid as id_col, " : " ", *origin,
                        (subset && *subset) ? "where" : " ",
                        (subset && *subset) ? *subset : " "
                        );
    } else dest = *origin;
    create_index(dest, use_rowids ? "id_col" : id_column);
    Tea_stopif(!apop_table_exists(filltab), return , 0, "No table named '%s'; did you already doMImpute()?", filltab);
    apop_data *fills = apop_query_to_text("select %s, field, value from %s where (draw=%i or draw = -1)"
                                              , id_column, filltab, *imputation_number);
    Tea_stopif(!fills || fills->error, return, 0, "Expected fill-in table "
                "%s, but couldn't query it.", filltab);
    for(int i=0; i< *fills->textsize; i++){
        _Bool is_null = !strcmp(fills->text[i][1], apop_opts.nan_string);
        char tick = is_null ? ' ' : '\'';
        apop_query("update %s set %s = %c%s%c "
                   "where cast(%s as numeric) = %s", 
                      dest, fills->text[i][0], 
                      tick, is_null ? "NULL" : fills->text[i][1], tick,
                      id_column, fills->names->row[i]);
    }
    commit_transaction();
    apop_data_free(fills);
    free(id_column);
}
Ejemplo n.º 17
0
void check_out_impute(char **origin, char **destin, int *imputation_number, char **subset, char **filltabin){
    char *filltab = (filltabin && *filltabin) ? *filltabin : "filled";
    Apop_stopif(!origin || !*origin, return, 0, "NULL origin table, but I need that.");
    char *id_column= get_key_word(NULL, "id");
    const char *dest = destin ? *destin : NULL;
    int use_rowids = 0;
    if (!id_column) {
        use_rowids++;
        id_column = strdup("rowid");
    }
    sprintf(apop_opts.db_name_column, "%s",  id_column);
    if (dest){
        apop_table_exists(dest, 'd');
        apop_query("create table %s as select %s * from %s %s %s", 
                        dest, 
                        use_rowids ? "rowid as id_col, " : " ", *origin,
                        (subset && *subset) ? "where" : " ",
                        (subset && *subset) ? *subset : " "
                        );
    } else dest = *origin;
    has_sqlite3_index(dest, use_rowids ? "id_col" : id_column, 'y');
    Apop_stopif(!apop_table_exists(filltab), return , 0, "No table named '%s'; did you already doMImpute()?", filltab);
    apop_data *fills = apop_query_to_text("select %s, field, value from %s where draw+0.0=%i"
                                              , id_column, filltab, *imputation_number);
    Apop_stopif(!fills || fills->error, return, 0, "Expected fill-in table "
                "%s, but couldn't query it.", filltab);
    begin_transaction();
    if (fills)
        for(int i=0; i< *fills->textsize; i++)
            apop_query("update %s set %s = '%s' "
                       "where %s = %s", 
                          dest, fills->text[i][0], fills->text[i][1], 
                          id_column, fills->names->row[i]);
    commit_transaction();
    apop_data_free(fills);
    free(id_column);
}
Ejemplo n.º 18
0
/** Make random draws from an \ref apop_model, and bin them using a binspec in the style
 of \ref apop_data_to_bins. If you have a data set that used the same binspec, you now have synced histograms, which you can plot or sensibly test hypotheses about.

The output is normalized to integrate to one.

\param binspec A description of the bins in which to place the draws; see \ref apop_data_to_bins. (default: as in \ref apop_data_to_bins.)
\param model The model to be drawn from. Because this function works via random draws, the model needs to have a 
\c draw method. (No default)
\param draws The number of random draws to make. (arbitrary default = 10,000)
\param bin_count If no bin spec, the number of bins to use (default: as per \ref apop_data_to_bins, \f$\sqrt(N)\f$)
\param rng The \c gsl_rng used to make random draws. (default: see note on \ref autorng)

\return An \ref apop_pmf model.

\li This function uses the \ref designated syntax for inputs.

\ingroup histograms
*/
APOP_VAR_HEAD apop_model *apop_model_to_pmf(apop_model *model, apop_data *binspec, long int draws, int bin_count, gsl_rng *rng){
    apop_model* apop_varad_var(model, NULL);
    Apop_assert(model && model->draw, "The second argument needs to be an apop_model with a 'draw' function "
                              "that I can use to make random draws.");
    apop_data* apop_varad_var(binspec, NULL);
    int apop_varad_var(bin_count, 0);
    long int apop_varad_var(draws, 1e4);
    gsl_rng *apop_varad_var(rng, NULL)
    static gsl_rng *spare = NULL;
    if (!rng && !spare) 
        spare = apop_rng_alloc(++apop_opts.rng_seed);
    if (!rng) rng = spare;
APOP_VAR_ENDHEAD
    Get_vmsizes(binspec);
    apop_data *outd = apop_data_alloc(draws, model->dsize); 
    for (long int i=0; i< draws; i++){
        Apop_row(outd, i, ach);
        apop_draw(ach->data, rng, model);
    }
    apop_data *outbinned = apop_data_to_bins(outd, binspec, .bin_count=bin_count);
    apop_data_free(outd);
    apop_vector_normalize(outbinned->weights);
    return apop_estimate(outbinned, apop_pmf);
} 
Ejemplo n.º 19
0
/* This function prettyprints the \c apop_data set to a screen.

It is currently not in the documentation. It'd be nice to merge this w/apop_data_print.

This takes a lot of machinery. I write every last element to a text array, then measure column widths, then print to screen with padding to guarantee that everything lines up.  There's no way to have the first element of a column line up with the last unless you interrogate the width of every element in the column, so printing columns really can't be a one-pass process.

So, I produce an \ref apop_data set with no numeric elements and a text element to be
filled with the input data set, and then print that. That means that I'll be using
(more than) twice the memory to print this. If this is a problem, you can use \ref
apop_data_print to dump your data to a text file, and view the text file, or print
subsets.

For more machine-readable printing, see \ref apop_data_print.
*/
void apop_data_show(const apop_data *in){
    if (!in) {printf("NULL\n"); return;}
    Get_vmsizes(in) //vsize, msize1, msize2, tsize
//Take inventory and get sizes
    size_t hasrownames = (in->names && in->names->rowct) ? 1 : 0;
    size_t hascolnames = in->names && 
                    (in->names->vector || in->names->colct || in->names->textct);
    size_t hasweights = (in->weights != NULL);

    size_t outsize_r = GSL_MAX(in->matrix ? in->matrix->size1 : 0, in->vector ? in->vector->size: 0);
    outsize_r = GSL_MAX(outsize_r, in->textsize[0]);
    outsize_r = GSL_MAX(outsize_r, wsize);
    if (in->names) outsize_r = GSL_MAX(outsize_r, in->names->rowct);
    outsize_r += hascolnames;

    size_t outsize_c = msize2;
    outsize_c += in->textsize[1];
    outsize_c += (vsize>0);
    outsize_c += (wsize>0);
    outsize_c += hasrownames + hasweights;

//Write to the printout data set.
    apop_data *printout = apop_text_alloc(NULL , outsize_r, outsize_c);
    if (hasrownames)
        for (size_t i=0; i < in->names->rowct; i ++)
            apop_text_set(printout, i + hascolnames, 0, "%s", in->names->row[i]);
    for (size_t i=0; i < vsize; i ++) //vsize may be zero.
        apop_text_set(printout, i + hascolnames, hasrownames, "%g", gsl_vector_get(in->vector, i));
    for (size_t i=0; i < msize1; i ++) //msize1 may be zero.
        for (size_t j=0; j < msize2; j ++)
            apop_text_set(printout, i + hascolnames, hasrownames + (vsize >0)+ j, "%g", gsl_matrix_get(in->matrix, i, j));
    if (in->textsize[0])
        for (size_t i=0; i < in->textsize[0]; i ++)
            for (size_t j=0; j < in->textsize[1]; j ++)
                apop_text_set(printout, i + hascolnames, hasrownames + (vsize>0)+ msize2 + j, "%s", in->text[i][j]);
    if (hasweights)
        for (size_t i=0; i < in->weights->size; i ++)
            apop_text_set(printout, i + hascolnames, outsize_c-1, "%g", gsl_vector_get(in->weights, i));

//column names
    if (hascolnames){
        if (vsize && in->names->vector)
            apop_text_set(printout, 0 , hasrownames, "%s", in->names->vector);
        if (msize2 && in->names)
            for (size_t i=0; i < in->names->colct; i ++)
                apop_text_set(printout, 0 , hasrownames + (vsize>0) + i, "%s", in->names->col[i]);
        if (in->textsize[1] && in->names)
            for (size_t i=0; i < in->names->textct; i ++)
                apop_text_set(printout, 0 , hasrownames + (vsize>0) + msize2 + i, "%s", in->names->text[i]);
        if (hasweights)
            apop_text_set(printout, 0 , outsize_c-1, "Weights");
    }

//get column sizes
    int colsizes[outsize_c];
    for (size_t i=0; i < outsize_c; i ++){
        colsizes[i] = strlen(printout->text[0][i]);
        for (size_t j=1; j < outsize_r; j ++)
            colsizes[i] = GSL_MAX(colsizes[i], strlen(printout->text[j][i]));
    }

//Finally, print
    if (in->names && in->names->title && strlen(in->names->title))
        printf("\t%s\n\n", in->names->title);
    for (size_t j=0; j < outsize_r; j ++){
        for (size_t i=0; i < outsize_c; i ++){
            white_pad(colsizes[i] - strlen(printout->text[j][i]) + 1);//one spare space.
            printf("%s", printout->text[j][i]);
            if (i > 0 && i< outsize_c-1) 
                printf(" %s ", apop_opts.output_delimiter);
        }
        printf("\n");
    }

    if (in->more) {
        printf("\n");
        apop_data_show(in->more);
    }
    apop_data_free(printout);
}
Ejemplo n.º 20
0
Archivo: em_weight.c Proyecto: b-k/tea
void save_candidate(apop_data *candidate, apop_data **prior_candidate){
    apop_data_free(*prior_candidate);
    *prior_candidate = candidate;
    //apop_data_pmf_compress(*prior_candidate);
}
Ejemplo n.º 21
0
/** This function creates a series of spec files with paste in macros used
  * instead of normal keys. The tests will ensure that the correct keys are 
  * getting written to the keys table by running read_spec() and then using 
  * apop functions to verify that the keys are indeed in the spec file
  */
void pastein_tests(){

char *spec1;
asprintf(&spec1, "1.spec");

char *spec2;
asprintf(&spec2, "2.spec");

char *spec3;
asprintf(&spec3, "3.spec");
        
char *spec4;
asprintf(&spec4, "4.spec");

char *spec5;
asprintf(&spec5, "5.spec");
        
    /* Standard test here: creating a macro with a few sub keys and calling it on its own
     * in the impute key. If something goes wrong here it's because there's something
     * fundamentally wrong with the paste in macro (because there's only one so there's
     * nothing too complex going on).
     */
     write_a_file(spec1,
     "\n"
     "database: demo.db\n"
     "verbose: 2\n"
     "catagesex{\n"
     "  min group size: 3\n"
     "  draw count: 3\n"
     "  seed: 2332\n"
     "  categories {\n"
     "      CATAGE\n"
     "      SEX\n"
     "  }\n"
     "}\n"
     "\n"
     "input {\n"
     "    input file: dc_pums_08.csv\n"
     "    output table: dc \n "
     "    overwrite: y \n "
     "} \n "
     " \n"
     "fields { \n"
     "SCHL: int 0-24 \n"
     "WAGP: real\n"
     "\n}"
     "impute{\n"
     "  input table: viewdc\n"
     "  output table: imputes\n"
     "  paste in: catagesex\n"
     "  method: hot deck\n"
     "  output vars: SCHL\n"
     "}\n"
     "impute{\n"
     "  input table: viewdc\n"
     "  output table: imputes\n"
     "  paste in: catagesex\n"
     "  method: hot deck\n"
     "  output vars: WAGP\n"
     "}\n"
     );

     /* Creating test here that uses two macros that are used concurrently but that do not
      * call each other. tables{...} and catagesex{...} are each used in impute{...} but
      * they do not "paste each other in". This will be tested in spec 3.
      */
     write_a_file(spec2,
     "\n"
     "database: demo.db\n"
     "verbose: 2\n"
     "catagesex{\n"
     "  min group size: 3\n"
     "  draw count: 3\n"
     "  seed: 2332\n"
     "  categories {\n"
     "      CATAGE\n"
     "      SEX\n"
     "  }\n"
     "}\n"
     "tables{\n"
     "  input table: viewdc\n"
     "  output table: impuTable\n" //To account for analysts who like camel case
     "}\n"
     "\n"
     "input {\n"
     "    paste in: tables\n"
     "    input file: dc_pums_08.csv\n"
     "    output table: dc \n "
     "    overwrite: y \n "
     "} \n "
     " \n"
     "fields { \n"
     "SCHL: int 0-24 \n"
     "WAGP: real\n"
     "\n}"
     "impute{\n"
     "  paste in: tables\n"
     "  paste in: catagesex\n"
     "  method: hot deck\n"
     "  output vars: SCHL\n"
     "}\n"
     "impute{\n"
     "  paste in: tables\n"
     "  paste in: catagesex\n"
     "  method: hot deck\n"
     "  output vars: WAGP\n"
     "}\n"
     );

    /* More complicated test that tests the ability of a macro to use another macro in its
     * own definition. For instance, it tests something along the lines of
     * catagesex{paste in: impute stuff \n paste in: categories}
     */
     write_a_file(spec3,
     "\n"
     "database: demo.db\n"
     "verbose: 2\n"
     "imputestuff{\n"
     "  min group size: 3\n"
     "  draw count: 3\n"
     "  seed: 2332\n"
     "}\n"
     "categoriesstuff {\n"
     "  categories{\n"
     "      CATAGE\n"
     "      SEX\n"
     "  }\n"
     "}\n"
     "catagesex{\n"
     "  paste in: imputestuff\n"
     "  paste in: categoriesstuff\n"
     "}\n"
     "\n"
     "input {\n"
     "    input file: dc_pums_08.csv\n"
     "    output table: dc \n "
     "    overwrite: y \n "
     "} \n "
     " \n"
     "fields { \n"
     "SCHL: int 0-24 \n"
     "WAGP: real\n"
     "\n}"
     "impute{\n"
     "  input table: viewdc\n"
     "  output table: imputes\n"
     "  paste in: catagesex\n"
     "  method: hot deck\n"
     "  output vars: SCHL\n"
     "}\n"
     "impute{\n"
     "  input table: viewdc\n"
     "  output table: imputes\n"
     "  paste in: catagesex\n"
     "  method: hot deck\n"
     "  output vars: WAGP\n"
     "}\n"
     );

    /* Tests whether it's possible to create a macro that comprises the entire spec file
     * (which, of course, is then pasted in on its own). This includes other macros that
     * are written within the overarching macro itself. Possibly overkill? But I think
     * it's worth it to test given that different analysts might include big portions of
     * the spec file separately and could decide to use a macro to do so.
     */
     write_a_file(spec4,
     "\n"
     "database: demo.db\n"
     "wholeSpec{\n"
     "verbose: 2\n"
     "catagesex{\n"
     "  min group size: 3\n"
     "  draw count: 3\n"
     "  seed: 2332\n"
     "  categories {\n"
     "      CATAGE\n"
     "      SEX\n"
     "  }\n"
     "}\n"
     "\n"
     "input {\n"
     "    input file: dc_pums_08.csv\n"
     "    output table: dc \n "
     "    overwrite: y \n "
     "} \n "
     " \n"
     "fields { \n"
     "SCHL: int 0-24 \n"
     "WAGP: real\n"
     "\n}"
     "impute{\n"
     "  input table: viewdc\n"
     "  output table: imputes\n"
     "  paste in: catagesex\n"
     "  method: hot deck\n"
     "  output vars: SCHL\n"
     "}\n"
     "impute{\n"
     "  input table: viewdc\n"
     "  output table: imputes\n"
     "  paste in: catagesex\n"
     "  method: hot deck\n"
     "  output vars: WAGP\n"
     "}\n"
     "}\n"
     "paste in: wholeSpec\n"
     );

char *db_dummy;

     char *imp_min_grp, *imp_drw_cnt, *imp_seed, *imp_categories;

     read_spec(&spec1, &db_dummy);
     asprintf(&imp_min_grp, "impute/min group size");
     asprintf(&imp_drw_cnt, "impute/draw count");
     asprintf(&imp_seed, "impute/seed");
     asprintf(&imp_categories, "impute/categories");

     apop_data *spec1_keys1 = apop_query_to_text("select * from keys where key like "
             "'impute/m%%'");
     printf("spec1_keys1->text[0][0] is given by: %s.\n", spec1_keys1->text[0][0]);
     assert(!strcmp(imp_min_grp, spec1_keys1->text[0][0]));


     apop_data *spec1_keys2 = apop_query_to_text("select * from keys where key like "
             "'impute/d%%'");
     printf("spec1_keys2->text[0][0] is given by: %s.\n", spec1_keys2->text[0][0]);
     assert(!strcmp(imp_drw_cnt, spec1_keys2->text[0][0]));


     apop_data *spec1_keys3 = apop_query_to_text("select * from keys where key like "
             "'impute/s%%'");
     printf("spec1_keys3->text[0][0] is given by: %s.\n", spec1_keys3->text[0][0]);
     assert(!strcmp(imp_seed, spec1_keys3->text[0][0]));

     
     apop_data *spec1_keys4 = apop_query_to_text("select * from keys where key like "
             "'impute/c%%'");
     printf("spec1_keys4->text[0][0] is given by: %s.\n", spec1_keys4->text[0][0]);
     assert(!strcmp(imp_categories, spec1_keys4->text[0][0]));
     
     apop_data_free(spec1_keys1);
     apop_data_free(spec1_keys2);
     apop_data_free(spec1_keys3);
     apop_data_free(spec1_keys4);
      

     read_spec(&spec2, &db_dummy);
     char *inpt_inpt_table;
     char *inpt_otpt_table;

     asprintf(&inpt_inpt_table, "input/input table");
     asprintf(&inpt_otpt_table, "input/output table");

     apop_data *spec2_keys1 = apop_query_to_text("select * from keys where key like "
             "'impute/m%%'");
     printf("spec2_keys1->text[0][0] is given by: %s.\n", spec2_keys1->text[0][0]);
     assert(!strcmp(imp_min_grp, spec2_keys1->text[0][0]));


     apop_data *spec2_keys2 = apop_query_to_text("select * from keys where key like "
             "'impute/d%%'");
     printf("spec2_keys2->text[0][0] is given by: %s.\n", spec2_keys2->text[0][0]);
     assert(!strcmp(imp_drw_cnt, spec2_keys2->text[0][0]));


     apop_data *spec2_keys3 = apop_query_to_text("select * from keys where key like "
             "'impute/s%%'");
     printf("spec2_keys3->text[0][0] is given by: %s.\n", spec2_keys3->text[0][0]);
     assert(!strcmp(imp_seed, spec2_keys3->text[0][0]));

     
     apop_data *spec2_keys4 = apop_query_to_text("select * from keys where key like "
             "'impute/c%%'");
     printf("spec2_keys4->text[0][0] is given by: %s.\n", spec2_keys4->text[0][0]);
     assert(!strcmp(imp_categories, spec2_keys4->text[0][0]));
     
     apop_data *spec2_keys5 = apop_query_to_text("select * from keys where key like "
             "'input/input t%%'");
     printf("spec2_keys5->text[0][0] is given by: %s.\n", spec2_keys5->text[0][0]);
     assert(!strcmp(inpt_inpt_table, spec2_keys5->text[0][0]));


     apop_data *spec2_keys6 = apop_query_to_text("select * from keys where key like "
             "'input/output t%%'");
     printf("spec2_keys6->text[0][0] is given by: %s.\n", spec2_keys6->text[0][0]);
     assert(!strcmp(inpt_otpt_table, spec2_keys6->text[0][0]));

     apop_data_free(spec2_keys1);
     apop_data_free(spec2_keys2);
     apop_data_free(spec2_keys3);
     apop_data_free(spec2_keys4);
     apop_data_free(spec2_keys5);
     apop_data_free(spec2_keys6);

     read_spec(&spec3, &db_dummy);
     
     apop_data *spec3_keys1 = apop_query_to_text("select * from keys where key like "
             "'impute/m%%'");
     printf("spec3_keys1->text[0][0] is given by: %s.\n", spec3_keys1->text[0][0]);
     assert(!strcmp(imp_min_grp, spec3_keys1->text[0][0]));


     apop_data *spec3_keys2 = apop_query_to_text("select * from keys where key like "
             "'impute/d%%'");
     printf("spec3_keys2->text[0][0] is given by: %s.\n", spec3_keys2->text[0][0]);
     assert(!strcmp(imp_drw_cnt, spec3_keys2->text[0][0]));


     apop_data *spec3_keys3 = apop_query_to_text("select * from keys where key like "
             "'impute/s%%'");
     printf("spec3_keys3->text[0][0] is given by: %s.\n", spec3_keys3->text[0][0]);
     assert(!strcmp(imp_seed, spec3_keys3->text[0][0]));

     
     apop_data *spec3_keys4 = apop_query_to_text("select * from keys where key like "
             "'impute/c%%'");
     printf("spec3_keys4->text[0][0] is given by: %s.\n", spec3_keys4->text[0][0]);
     assert(!strcmp(imp_categories, spec3_keys4->text[0][0]));

     apop_data_free(spec3_keys1);
     apop_data_free(spec3_keys2);
     apop_data_free(spec3_keys3);
     apop_data_free(spec3_keys4);

     /* This is spec file that tests whether paste in works for pasting in the entire spec
      * file (without the database -- pasting in database has not been tested yet). spec 4
      * paste in stuff is tested by just testing for an assortment of keys.
      */
     read_spec(&spec4, &db_dummy);


     /* DV - ATTENTION:
      * This test is failing right now so I've put in an if statement below to exit when
      * there's no impute key to avoid a segfault in the testing. We need to fix the bug
      * that is preventing paste in from allowing an entire spec file (minus the database)
      * to be pasted in.
      */


     apop_data *spec4_keys1 = apop_query_to_text("select * from keys where key like "
             "'impute/m%%'");

     if(get_key_word("impute", NULL) == NULL) return;
     printf("spec4_keys1->text[0][0] is given by: %s.\n", spec4_keys1->text[0][0]);
     assert(!strcmp(imp_min_grp, spec4_keys1->text[0][0]));


     apop_data *spec4_keys2 = apop_query_to_text("select * from keys where key like "
             "'impute/d%%'");
     printf("spec4_keys2->text[0][0] is given by: %s.\n", spec4_keys2->text[0][0]);
     assert(!strcmp(imp_drw_cnt, spec4_keys2->text[0][0]));


     apop_data *spec4_keys3 = apop_query_to_text("select * from keys where key like "
             "'impute/s%%'");
     printf("spec4_keys3->text[0][0] is given by: %s.\n", spec4_keys3->text[0][0]);
     assert(!strcmp(imp_seed, spec4_keys3->text[0][0]));

     
     apop_data *spec4_keys4 = apop_query_to_text("select * from keys where key like "
             "'impute/c%%'");
     printf("spec4_keys4->text[0][0] is given by: %s.\n", spec4_keys4->text[0][0]);
     assert(!strcmp(imp_categories, spec4_keys4->text[0][0]));
     
     apop_data *spec4_keys5 = apop_query_to_text("select * from keys where key like "
             "'input/input t%%'");
     printf("spec4_keys5->text[0][0] is given by: %s.\n", spec4_keys5->text[0][0]);
     assert(!strcmp(inpt_inpt_table, spec4_keys5->text[0][0]));


     apop_data *spec4_keys6 = apop_query_to_text("select * from keys where key like "
             "'input/output t%%'");
     printf("spec4_keys6->text[0][0] is given by: %s.\n", spec4_keys6->text[0][0]);
     assert(!strcmp(inpt_otpt_table, spec4_keys6->text[0][0]));

     apop_data_free(spec4_keys1);
     apop_data_free(spec4_keys2);
     apop_data_free(spec4_keys3);
     apop_data_free(spec4_keys4);
     apop_data_free(spec4_keys5);
     apop_data_free(spec4_keys6);

     free(imp_min_grp);
     free(imp_drw_cnt);
     free(imp_seed);
     free(imp_categories);
     free(inpt_inpt_table);
     free(inpt_otpt_table);
     free(spec1);
     free(spec2);
     free(spec3);
     free(spec4);
     free(spec5);

     printf("Reached end of test.\n");

}
Ejemplo n.º 22
0
long double apop_linear_constraint(gsl_vector *beta, apop_data * constraint, double margin){
#else
apop_varad_head(long double, apop_linear_constraint){
    static threadlocal apop_data *default_constraint;
    gsl_vector * apop_varad_var(beta, NULL);
    double apop_varad_var(margin, 0);
    apop_data * apop_varad_var(constraint, NULL);
    Apop_assert(beta, "The vector to be checked is NULL.");
    if (!constraint){
        if (default_constraint && beta->size != default_constraint->vector->size){
            apop_data_free(default_constraint);
            default_constraint = NULL;
        }
        if (!default_constraint){
            default_constraint = apop_data_alloc(0,beta->size, beta->size);
            default_constraint->vector = gsl_vector_calloc(beta->size);
            gsl_matrix_set_identity(default_constraint->matrix);
        }
        constraint = default_constraint;
    }
    return apop_linear_constraint_base(beta, constraint, margin);
}

 long double apop_linear_constraint_base(gsl_vector *beta, apop_data * constraint, double margin){
#endif
    static threadlocal gsl_vector *closest_pt = NULL;
    static threadlocal gsl_vector *candidate  = NULL;
    static threadlocal gsl_vector *fix        = NULL;
    int constraint_ct = constraint->matrix->size1;
    int bindlist[constraint_ct];
    int i, bound = 0;
    /* For added efficiency, keep a scratch vector or two on hand. */
    if (closest_pt==NULL || closest_pt->size != constraint->matrix->size2){
        closest_pt  = gsl_vector_calloc(beta->size);
        candidate   = gsl_vector_alloc(beta->size);
        fix         = gsl_vector_alloc(beta->size);
        closest_pt->data[0] = GSL_NEGINF;
    }
    /* Do any constraints bind?*/
    memset(bindlist, 0, sizeof(int)*constraint_ct);
    for (i=0; i< constraint_ct; i++){
        Apop_row_v(constraint, i, c);
        bound           +=
        bindlist[i]      = binds(beta, apop_data_get(constraint, i, -1), c, margin);
    }
    if (!bound) return 0;   //All constraints met.
    gsl_vector *base_beta = apop_vector_copy(beta);
    /* With only one constraint, it's easy. */
    if (constraint->vector->size==1){
        Apop_row_v(constraint, 0, c);
        find_nearest_point(base_beta, constraint->vector->data[0], c, beta);
        goto add_margin;
    }
    /* Finally, multiple constraints, at least one binding.
       For each surface, pick a candidate point.
       Check whether the point meets the other constraints. 
            if not, translate to a new point that works.
            [Do this by maintaining a pseudopoint that translates by the
            necessary amount.]
        Once you have a candidate point, compare its distance to the
        current favorite; keep the best.
     */
    for (i=0; i< constraint_ct; i++)
        if (bindlist[i]){
            get_candiate(base_beta, constraint, i, candidate, margin);
            if(apop_vector_distance(base_beta, candidate) < apop_vector_distance(base_beta, closest_pt))
                gsl_vector_memcpy(closest_pt, candidate);
        }
    gsl_vector_memcpy(beta, closest_pt);
add_margin:
    for (i=0; i< constraint_ct; i++){
        if(bindlist[i]){
            Apop_row_v(constraint, i, c);
            gsl_vector_memcpy(fix, c);
            gsl_vector_scale(fix, magnitude(fix));
            gsl_vector_scale(fix, margin);
            gsl_vector_add(beta, fix);
        }
    }
    long double out = apop_vector_distance(base_beta, beta);
    gsl_vector_free(base_beta);
    return out;
}
Ejemplo n.º 23
0
static long double wishart_ll(apop_data *in, apop_model *m){
    Nullcheck_mpd(in, m, GSL_NAN);
    wishartstruct_t ws = {
            .paraminv = apop_matrix_inverse(m->parameters->matrix),
            .len = sqrt(in->matrix->size2),
            .df = m->parameters->vector->data[0]
        };
    double paramdet = apop_matrix_determinant(m->parameters->matrix);
    if (paramdet < 1e-3) return GSL_NEGINF;
    double ll =  apop_map_sum(in, .fn_vp = one_wishart_row, .param=&ws, .part='r');
    double k = log(ws.df)*ws.df/2.;
    k -= M_LN2 * ws.len* ws.df/2.;
    k -= log(paramdet) * ws.df/2.;
    k -= apop_multivariate_lngamma(ws.df/2., ws.len);
    return ll + k*in->matrix->size1;
}

static int apop_wishart_draw(double *out, gsl_rng *r, apop_model *m){
    /*
Translated from the Fortran by BK. Fortran comments:

C          SUBROUTINE DWSHRT(D, N, NP, NNP, SB, SA)
C
C       ALGORITHM AS 53  APPL. STATIST. (1972) VOL.21, NO.3
C
C     Wishart variate generator.  On output, SA is an upper-triangular
C     matrix of size NP * NP [...]
C     whose elements have a Wishart(N, SIGMA) distribution.
*/
    Nullcheck_mp(m, );
    int np = m->parameters->matrix->size1;
    int n = m->parameters->vector->data[0];
    if (!m->more) { 
        gsl_matrix *ccc = apop_matrix_copy(m->parameters->matrix);
        gsl_linalg_cholesky_decomp(ccc);
        for (int i=0; i < ccc->size1; i++) //zero out the upper diagonal
            for (int j=i+1; j < ccc->size2; j++)
                gsl_matrix_set(ccc, i, j, 0);
        m->more = apop_matrix_to_data(ccc);
        m->more_size = sizeof(apop_data);
    }
    apop_data *Chol = m->more;
    apop_data *rmatrix = apop_data_calloc(np, np);
    Staticdef(apop_model *, std_normal, apop_model_set_parameters(apop_normal, 0, 1));

//C     Load diagonal elements with square root of chi-square variates
    for(int i = 0; i< np; i++){
        int DF = n - i;
        apop_data_set(rmatrix, i, i, sqrt(gsl_ran_chisq(r, DF)));
    }
    
    for(int i = 1; i< np; i++) //off-diagonal triangles: Normals.
          for(int j = 0; j< i; j++){
            double ndraw;
            apop_draw(&ndraw, r, std_normal);
            assert (!gsl_isnan(ndraw));
            apop_data_set(rmatrix, i, j, ndraw);
          }
    //Now find C * rand * rand' * C'
    apop_data *cr = apop_dot(Chol, rmatrix);
    apop_data *crr = apop_dot(cr, rmatrix, .form2='t');
    apop_data *crrc = apop_dot(crr, Chol, .form2='t');
    memmove(out, crrc->matrix->data, sizeof(double)*np*np);
    apop_data_free(rmatrix); apop_data_free(cr);
    apop_data_free(crrc);    apop_data_free(crr);
    return 0;
}