Beispiel #1
0
int main(){
    apop_data *d = apop_text_alloc(apop_data_alloc(6), 6, 1);
    apop_data_fill(d,   1,   2,   3,   3,   1,   2);
    apop_text_fill(d,  "A", "A", "A", "A", "A", "B");

    asprintf(&d->names->title, "Original data set");
    printdata(d);

        //binned, where bin ends are equidistant but not necessarily in the data
    apop_data *binned = apop_data_to_bins(d, NULL);
    asprintf(&binned->names->title, "Post binning");
    printdata(binned);
    assert(apop_sum(binned->weights)==6);
    assert(fabs(//equal distance between bins
              (apop_data_get(binned, 1, -1) - apop_data_get(binned, 0, -1))
            - (apop_data_get(binned, 2, -1) - apop_data_get(binned, 1, -1))) < 1e-5);

        //compressed, where the data is as in the original, but weights 
        //are redome to accommodate repeated observations.
    apop_data_pmf_compress(d);
    asprintf(&d->names->title, "Post compression");
    printdata(d);
    assert(apop_sum(d->weights)==6);

    apop_model *d_as_pmf = apop_estimate(d, apop_pmf);
    Apop_row(d, 0, firstrow); //1A
    assert(fabs(apop_p(firstrow, d_as_pmf) - 2./6 < 1e-5));
}
Beispiel #2
0
static int multinomial_rng(double *out, gsl_rng *r, apop_model* est){
    Nullcheck_mp(est, 1);
    double * p = est->parameters->vector->data;
    //the trick where we turn the params into a p-vector
    int N = p[0];

    if (est->parameters->vector->size == 2) {
        *out = gsl_ran_binomial_knuth(r, 1-gsl_vector_get(est->parameters->vector, 1), N);
        out[1] = N-*out;
        goto done;
    }
    //else, multinomial
    //cut/pasted/modded from the GSL. Copyright them.
    p[0] = 1 - (apop_sum(est->parameters->vector)-N);
    double sum_p = 0.0;
    int sum_n = 0;

    for (int i = 0; i < est->parameters->vector->size; i++) {
        out[i] = (p[i] > 0)
                ? gsl_ran_binomial (r, p[i] / (1 - sum_p), N - sum_n)
                : 0;
        sum_p += p[i];
        sum_n += out[i];
    }
    done:
    p[0] = N;
    return 0;
}
Beispiel #3
0
static void multinomial_show(apop_model *est, FILE *out){
    double * p = est->parameters->vector->data;
    int N=p[0];
    p[0] = 1 - (apop_sum(est->parameters->vector)-N);
    fprintf(out, "%s, with %i draws.\nBin odds:\n", est->name, N);
    apop_vector_print(est->parameters->vector, .output_pipe=out);
    p[0]=N;
}
Beispiel #4
0
void rescale_cp_and_merge_into_candidate(apop_data *candidate, apop_data **cp, double scale_to){
    if (!*cp) return;
    double cp_weight_sum = apop_sum((*cp)->weights);
    if (cp_weight_sum) {
        gsl_vector_scale((*cp)->weights, scale_to/cp_weight_sum);
        merge_in_weights_so_far(candidate, *cp);//append cp into candidate
    }
    if (!(*cp)->weights || (candidate? candidate->weights->size : 0) != (*cp)->weights->size)
        apop_data_free(*cp);
}
Beispiel #5
0
 apop_data * apop_bootstrap_cov_base(apop_data * data, apop_model *model, gsl_rng *rng, int iterations, char keep_boots, char ignore_nans, apop_data **boot_store){
#endif
    Get_vmsizes(data); //vsize, msize1, msize2
    apop_model *e = apop_model_copy(model);
    apop_data *subset = apop_data_copy(data);
    apop_data *array_of_boots = NULL,
              *summary;
    //prevent and infinite regression of covariance calculation.
    Apop_model_add_group(e, apop_parts_wanted); //default wants for nothing.
    size_t i, nan_draws=0;
    apop_name *tmpnames = (data && data->names) ? data->names : NULL; //save on some copying below.
    if (data && data->names) data->names = NULL;

    int height = GSL_MAX(msize1, GSL_MAX(vsize, (data?(*data->textsize):0)));
	for (i=0; i<iterations && nan_draws < iterations; i++){
		for (size_t j=0; j< height; j++){       //create the data set
			size_t randrow	= gsl_rng_uniform_int(rng, height);
            apop_data_memcpy(Apop_r(subset, j), Apop_r(data, randrow));
		}
		//get the parameter estimates.
		apop_model *est = apop_estimate(subset, e);
        gsl_vector *estp = apop_data_pack(est->parameters);
        if (!gsl_isnan(apop_sum(estp))){
            if (i==0){
                array_of_boots	      = apop_data_alloc(iterations, estp->size);
                apop_name_stack(array_of_boots->names, est->parameters->names, 'c', 'v');
                apop_name_stack(array_of_boots->names, est->parameters->names, 'c', 'c');
                apop_name_stack(array_of_boots->names, est->parameters->names, 'c', 'r');
            }
            gsl_matrix_set_row(array_of_boots->matrix, i, estp);
        } else if (ignore_nans=='y'){
            i--; 
            nan_draws++;
        }
        apop_model_free(est);
        gsl_vector_free(estp);
	}
    if(data) data->names = tmpnames;
    apop_data_free(subset);
    apop_model_free(e);
    int set_error=0;
    Apop_stopif(i == 0 && nan_draws == iterations, apop_return_data_error(N),
                1, "I ran into %i NaNs and no not-NaN estimations, and so stopped. "
                       , iterations);
    Apop_stopif(nan_draws == iterations,  set_error++;
            apop_matrix_realloc(array_of_boots->matrix, i, array_of_boots->matrix->size2),
                1, "I ran into %i NaNs, and so stopped. Returning results based "
                       "on %zu bootstrap iterations.", iterations, i);
	summary	= apop_data_covariance(array_of_boots);
    if (boot_store) *boot_store = array_of_boots;
    else            apop_data_free(array_of_boots);
    if (set_error) summary->error = 'N';
	return summary;
}
apop_data *kappa_and_pi(apop_data const *tab_in){
    apop_data *out = apop_data_alloc();
    Apop_stopif(!tab_in, out->error='n'; return out, 0, "NULL input. Returning output with 'n' error code.");
    Apop_stopif(!tab_in->matrix, out->error='m'; return out, 0, "NULL input matrix. Returning output with 'm' error code.");
    Apop_stopif(tab_in->matrix->size1 != tab_in->matrix->size2, out->error='s'; return out, 0, "Input rows=%zu; input cols=%zu; "
                    "these need to be equal. Returning output with error code 's'.", tab_in->matrix->size1, tab_in->matrix->size2);

    apop_data *tab = apop_data_copy(tab_in);
    double total = apop_matrix_sum(tab->matrix);
    gsl_matrix_scale(tab->matrix, 1./total);
    double p_o = 0, p_e = 0, scott_pe = 0, ia = 0, row_ent = 0, col_ent = 0;
    for (int c=0; c< tab->matrix->size1; c++){
        double this_obs = apop_data_get(tab, c, c);
        p_o += this_obs;

        Apop_row_v(tab, c, row);
        Apop_col_v(tab, c, col);
        double rsum = apop_sum(row);
        double csum = apop_sum(col);
        p_e += rsum * csum;
        scott_pe += pow((rsum+csum)/2, 2);

        ia += this_obs * log2(this_obs/(rsum * csum));
        row_ent -= rsum * log2(rsum);
        col_ent -= csum * log2(csum);
    }
    apop_data_free(tab);

    asprintf(&out->names->title, "Scott's π and Cohen's κ");
    apop_data_add_named_elmt(out, "total count", total);
    apop_data_add_named_elmt(out, "percent agreement", p_o);
    apop_data_add_named_elmt(out, "κ", ((p_e==1)? 0: (p_o - p_e) / (1-p_e) ));
    apop_data_add_named_elmt(out, "π", ((p_e==1)? 0: (p_o - scott_pe) / (1-scott_pe)));
    apop_data_add_named_elmt(out, "P_I", ia/((row_ent+col_ent)/2));
    apop_data_add_named_elmt(out, "Cohen's p_e", p_e);
    apop_data_add_named_elmt(out, "Scott's p_e", scott_pe);
    apop_data_add_named_elmt(out, "information in agreement", ia);
    apop_data_add_named_elmt(out, "row entropy", row_ent);
    apop_data_add_named_elmt(out, "column entropy", col_ent);
    return out;
}
Beispiel #7
0
static double multinomial_log_likelihood(apop_data *d, apop_model *params){
    Nullcheck_mpd(d, params, GSL_NAN);
    double *pv = params->parameters->vector->data;
    double n = pv[0]; 
    Apop_assert_c(pv[1] <=1, GSL_NAN, 1, "The input parameters should be [n, p_1, (...)], but "
        "element 1 of the parameter vector is >1.") //mostly makes sense for the binomial.
    if (n==2)
        return apop_map_sum(d, .fn_vp=binomial_ll, .param=params->parameters->vector);

    pv[0] = 1 - (apop_sum(params->parameters->vector)-n);//making the params a p-vector. Put n back at the end.
    double out = apop_map_sum(d, .fn_vp=multinomial_ll, .param=params);
    pv[0]=n;
    return out;
}
Beispiel #8
0
int main(){
    int len = 3000;
    gsl_vector *v = gsl_vector_alloc(len);
    for (double i=0; i< len; i++) gsl_vector_set(v, i, 1./(i+1));
    double square;
    gsl_blas_ddot(v, v, &square);
    printf("1 + (1/2)^2 + (1/3)^2 + ...= %g\n", square);

    double pi_over_six = gsl_pow_2(M_PI)/6.;
    Diff(square, pi_over_six);

    /* Now using apop_dot, in a few forms.
       First, vector-as-data dot itself.
       If one of the inputs is a vector,
       apop_dot puts the output in a vector-as-data:*/
    apop_data *v_as_data = &(apop_data){.vector=v};
    apop_data *vdotv = apop_dot(v_as_data, v_as_data);
    Diff(gsl_vector_get(vdotv->vector, 0), pi_over_six);

    /* Wrap matrix in an apop_data set. */
    gsl_matrix *v_as_matrix = apop_vector_to_matrix(v);
    apop_data dm = (apop_data){.matrix=v_as_matrix};

    // (1 X len) vector dot (len X 1) matrix --- produce a scalar (one item vector).
    apop_data *mdotv = apop_dot(v_as_data, &dm);
    double scalarval = apop_data_get(mdotv);
    Diff(scalarval, pi_over_six);

    //(len X 1) dot (len X 1) --- bad dimensions.
    apop_opts.verbose=-1; //don't print an error.
    apop_data *mdotv2 = apop_dot(&dm, v_as_data);
    apop_opts.verbose=0; //back to safety.
    assert(mdotv2->error);

    // If we want (len X 1) dot (1 X len) --> (len X len),
    // use apop_vector_to_matrix.
    apop_data dmr = (apop_data){.matrix=apop_vector_to_matrix(v, .row_col='r')};
    apop_data *product_matrix = apop_dot(&dm, &dmr);
    //The trace is the sum of squares:
    gsl_vector_view trace = gsl_matrix_diagonal(product_matrix->matrix);
    double tracesum = apop_sum(&trace.vector);
    Diff(tracesum, pi_over_six);

    apop_data_free(product_matrix);
    gsl_matrix_free(dmr.matrix);
}
Beispiel #9
0
static double one_wishart_row(gsl_vector *in, void *ws_in){
    wishartstruct_t *ws = ws_in;
    gsl_matrix *invparams_dot_data = gsl_matrix_alloc(ws->len, ws->len);
    apop_data *square= apop_data_alloc(ws->len, ws->len);
    apop_data_unpack(in, square);
    double datadet = apop_matrix_determinant(square->matrix);
    assert(datadet);

    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1, ws->paraminv, square->matrix, 0, invparams_dot_data);   
    gsl_vector_view diag = gsl_matrix_diagonal(invparams_dot_data);
    double trace = apop_sum(&diag.vector);
    gsl_matrix_free(invparams_dot_data);
    apop_data_free(square);
    double out= log(datadet) * (ws->df - ws->len -1.)/2. - trace*ws->df/2.;
    assert(isfinite(out));
    return out;
}
Beispiel #10
0
static void make_covar(apop_model *est){
    int size = est->parameters->vector->size;
    //the trick where we turn the params into a p-vector
    double * pv = est->parameters->vector->data;
    int n = pv[0];
    pv[0] = 1 - (apop_sum(est->parameters->vector)-n);

    apop_data *cov = apop_data_add_page(est->parameters, 
                            apop_data_alloc(size, size), "<Covariance>");
    for (int i=0; i < size; i++){
        double p = apop_data_get(est->parameters, i, -1);
        apop_data_set(cov, i, i, n * p *(1-p));
        for (int j=i+1; j < size; j++){
            double pj = apop_data_get(est->parameters, j, -1);
            double thiscell = -n*p*pj;
            apop_data_set(cov, i, j, thiscell);
            apop_data_set(cov, j, i, thiscell);
        }
    }
    pv[0]=n;
}
Beispiel #11
0
    fprintf(out, "%s, with %i draws.\nBin odds:\n", est->name, N);
    apop_vector_print(est->parameters->vector, .output_pipe=out);
    p[0]=N;
}

double avs(gsl_vector *v){return (double) apop_vector_sum(v);}

/* \amodel apop_multinomial The \f$n\f$--option generalization of the \ref apop_binomial "Binomial distribution".

\adoc estimated_info   Reports <tt>log likelihood</tt>. */
static void multinomial_estimate(apop_data * data,  apop_model *est){
    Nullcheck_mpd(data, est, );
    Get_vmsizes(data); //vsize, msize1
    est->parameters= apop_map(data, .fn_v=avs, .part='c');
    gsl_vector *v = est->parameters->vector;
    int n = apop_sum(v)/data->matrix->size1; //size of one row
    apop_vector_normalize(v);
    apop_name_add(est->parameters->names, "n", 'r');
    apop_data_set(est->parameters, .val=n); //zeroth item is now n, not p_0
    char name[100];
    for(int i=1; i < v->size; i++){
        sprintf(name, "p%i", i);
        apop_name_add(est->parameters->names, name, 'r');
    }
    est->dsize = n;
    make_covar(est);
    apop_data_add_named_elmt(est->info, "log likelihood", multinomial_log_likelihood(data, est));
}

static void multinom_prep(apop_data *data, apop_model *params){
    apop_model_print_vtable_add(multinomial_show, params);