Example #1
0
apop_model* apop_fdist_estimate(apop_data *d, apop_model *m){
    Apop_assert(d, "No data with which to count df. (the default estimation method)");
    apop_name_add(m->parameters->names, "df",  'r');
    apop_name_add(m->parameters->names, "df2",  'r');
    apop_data_set(m->parameters, 0, -1, d->vector->size -1);
    apop_data_set(m->parameters, 1, -1, d->matrix->size1 * d->matrix->size2 -1);
    apop_data_add_named_elmt(m->info, "log likelihood", apop_f_distribution.log_likelihood(d, m));
    return m;
}
Example #2
0
void FishModel::AddTiles(const FishHookTiles& f) {

  size_t nrows = f.size();
  size_t ncols = f.NumCovariates();
  // one for data, one for intercept
  mat = apop_data_alloc(0, nrows, (int)ncols+2); //vec, nrow, ncol
 
  // add the observed counts
  size_t i = 0; // row id (tile)
  for (i = 0; i < nrows; ++i) {
    float scaled_events = f.at(i).events;
    //scaled_events = f.at(i).covered > 0 ? scaled_events / f.at(i).covered : 0;
    apop_data_set(mat, i, (int)(ncols+1), scaled_events, NULL, NULL, NULL);
  }

  // set the covariates
  i = 0; // row id (tile)
  int j = 1; // column id (covariate) (0 is obs counts)
  for (const auto& r : f) { // loop the bins
    if (i == nrows) break;//debug, to break early
    j = 1;
    for (const auto& c : r) { // loop the covariates
      apop_data_set(mat, i, j++, c.second, NULL, NULL, NULL);
    }
    ++i; // update row interator
  }

  // add the intercept term
  for (i = 0; i < nrows; ++i)
    apop_data_set(mat, i, 0, 1.0, NULL, NULL, NULL);

  // add column names
  apop_name_add(mat->names, "events", 'c');
  for (const auto& c : f[0])
    apop_name_add(mat->names, c.first.c_str(), 'c');
  apop_name_add(mat->names, "intercept", 'c');

  // add row names
  std::stringstream ss;
  for (const auto& r : f) {
    ss << r.ChrName(SeqLib::BamHeader()) << ":" << r.pos1 << "-" << r.pos2;
    apop_name_add(mat->names, ss.str().c_str(), 'r');
    ss.str(std::string());
  }

}
Example #3
0
apop_data *draw_some_data(){
    gsl_rng *r = apop_rng_alloc(7);
    apop_data *d  = apop_data_alloc(0,10,1); 
    for (int i=0; i<10; i++)
        apop_data_set(d, i, 0, gsl_rng_uniform(r)*20);
    apop_data_print(d, .output_pipe=stderr);
    return d;
}
Example #4
0
apop_model* apop_t_estimate(apop_data *d, apop_model *m){
    Apop_assert(d, "No data with which to count df. (the default estimation method)");
    Get_vmsizes(d); //vsize, msize1, msize2, tsize
    apop_model *out = apop_model_copy(*m);
    double vmu = vsize ? apop_mean(d->vector) : 0;
    double v_sum_sq = vsize ? apop_var(d->vector)*(vsize-1) : 0;
    double m_sum_sq = 0;
    double mmu = 0;
   if (msize1) {
       apop_matrix_mean_and_var(d->matrix, &mmu, &m_sum_sq);
       m_sum_sq *= msize1*msize2-1;
   }
    apop_data_add_names(out->parameters, 'r', "mean", "standard deviation", "df");
    apop_data_set(out->parameters, 0, -1, (vmu *vsize + mmu * msize1*msize2)/tsize);
    apop_data_set(out->parameters, 1, -1, sqrt((v_sum_sq*vsize + m_sum_sq * msize1*msize2)/(tsize-1))); 
    apop_data_set(out->parameters, 2, -1, tsize-1);
    apop_data_add_named_elmt(out->info, "log likelihood", out->log_likelihood(d, out));
    return out;
}
Example #5
0
void plot(apop_model *k, apop_model *k2){
    apop_data *onept = apop_data_alloc(0,1,1);
    FILE *outtab = fopen("kerneldata", "w");
    for (double i=0; i<20; i+=0.01){
        apop_data_set(onept,0,0, i);
        fprintf(outtab, "%g %g %g\n", i, apop_p(onept, k), apop_p(onept, k2));
    }
    fclose(outtab);
    printf("plot 'kerneldata' using 1:2\n"
           "replot 'kerneldata' using 1:3\n");
}
Example #6
0
static long double multinomial_constraint(apop_data *data, apop_model *b){
  //constraint is that 0 < all elmts, and  1>all ps.
    int size = b->parameters->vector->size;
    static threadlocal apop_data *constr;
    if (constr && constr->matrix->size2 != size)
        apop_data_free(constr);
    if (!constr){
        constr = apop_data_calloc(size*2-1, size*2-1, size);

        //top half: 0 < [param], including param 0
        gsl_matrix_set_identity(Apop_subm(constr->matrix, 0, 0, size, size));

        //bottom (almost) half: 1 >= [param], excluding param 0
        for (int i=size; i < size*2-1; i++){
            apop_data_set(constr, i, -1, -1);
            apop_data_set(constr, i, i-size+1, -1);
        }
    }
    return apop_linear_constraint(b->parameters->vector, constr);
}
Example #7
0
static void make_covar(apop_model *est){
    int size = est->parameters->vector->size;
    //the trick where we turn the params into a p-vector
    double * pv = est->parameters->vector->data;
    int n = pv[0];
    pv[0] = 1 - (apop_sum(est->parameters->vector)-n);

    apop_data *cov = apop_data_add_page(est->parameters, 
                            apop_data_alloc(size, size), "<Covariance>");
    for (int i=0; i < size; i++){
        double p = apop_data_get(est->parameters, i, -1);
        apop_data_set(cov, i, i, n * p *(1-p));
        for (int j=i+1; j < size; j++){
            double pj = apop_data_get(est->parameters, j, -1);
            double thiscell = -n*p*pj;
            apop_data_set(cov, i, j, thiscell);
            apop_data_set(cov, j, i, thiscell);
        }
    }
    pv[0]=n;
}
Example #8
0
apop_data *make_histo(char const *zila, int year) {
    apop_data *d = apop_data_calloc(65,1);
    for (int i=0; i< 64; i++) {
        char *col;
        asprintf(&col, "year_%i_num_intensity_%i", year, i);
        double val=apop_query_to_float("select %s from ppl where "
                                       "ADMName='%s'", col, zila);
        Apop_stopif(isnan(val), continue, 0, "couldn't find %s for %s", col, zila);
        apop_data_set(d, i+1, 0, val);
        free(col);
    }
    return d;
}
Example #9
0
static void * process_result_set_data (MYSQL *conn, MYSQL_RES *res_set) {
  MYSQL_ROW        row;
  unsigned int     j=0;
  unsigned int num_fields = mysql_num_fields(res_set);
  apop_data *out   =apop_data_alloc(0, mysql_num_rows (res_set), num_fields);
     while ((row = mysql_fetch_row (res_set)) ) {
             for (size_t i = 0; i < mysql_num_fields (res_set); i++) 
                 apop_data_set(out, j , i, atof(row[i]));
             j++;
        }
     MYSQL_FIELD *fields = mysql_fetch_fields(res_set);
     for(size_t i = 0; i < num_fields; i++)
         apop_name_add(out->names, fields[i].name, 'c');
     check_and_clean(apop_data_free(out))
}
Example #10
0
/* This sample produces a dummy times table, gets a summary, and prunes the summary table.
If you are not a test script, uncomment the last line to display the pruned table.  */
void test_prune_cols(){
    int i, j;
    apop_data *d = apop_data_alloc(0, 10, 4);
    for (i=0; i< 10; i++)
        for (j=0; j< 4; j++)
            apop_data_set(d, i, j, i*j);
    apop_data *summary = apop_data_summarize(d);
    apop_data_prune_columns(summary, "mean", "median");
    assert(apop_name_find(summary->names, "mean", 'c')!=-2);
    assert(apop_name_find(summary->names, "median", 'c')!=-2);
    assert(apop_name_find(summary->names, "max", 'c')==-2); //not found
    assert(apop_name_find(summary->names, "variance", 'c')==-2); //not found
    assert(apop_data_get(summary, .row=0, .colname="mean")==0);
    assert(apop_data_get(summary, .row=1, .colname="median")==4);
    assert(apop_data_get(summary, .row=2, .colname="median")==8);
    //apop_data_show(summary);
}
Example #11
0
void set_uniform_edges(apop_data * r, apop_model *unif){
    apop_data_set(unif->parameters, 0, -1, r->matrix->data[0]-0.5);
    apop_data_set(unif->parameters, 1, -1, r->matrix->data[0]+0.5);
}
Example #12
0
apop_data* multiple_imputation_variance_base(multiple_imputation_variance_t in){
    /*The first half of this is filling in the values. In an attempt at versatility, I allow users to 
      give any named column, be it numeric or text, for every piece of input info. That means a whole lot 
      of checking around to determine what goes where---and a macro.  */

    Apop_assert_c(in.base_data,NULL, 1, "It doesn't make sense to impute over a NULL data set.");
    Apop_assert_c(in.fill_ins, NULL, 1, "Didn't receive a fill-in table. Returning NULL.");
    data_to_data stat = in.stat? in.stat : colmeans;

//At the end of this macro, you've got rowcol and rowtype, valuecol and valuetype, &c.
#define apop_setup_one_colthing(c) \
    int c##col = apop_name_find(in.fill_ins->names, in.c##_name, 'c');   \
    int c##type = 'd';         \
    if (c##col==-2){           \
        c##col = apop_name_find(in.fill_ins->names, in.c##_name, 't');   \
        c##type = 't';         \
       Apop_assert(c##col!=-2, "I couldn't find the c##_name %s in the column/text names of your fill_in table.", in.c##_name);    \
    }

    apop_setup_one_colthing(row)
    apop_setup_one_colthing(col)
    apop_setup_one_colthing(value)
    apop_setup_one_colthing(imputation)

    Apop_assert(!(rowtype=='t' && !in.base_data->names->rowct),
            "the rowname you gave refers to text, so I will be searching for a row name in the base data."
            " But the base_data set has no row names.");
    Apop_assert(!(coltype=='t' && !in.base_data->names->colct),
            "the colname you gave refers to text, so I will be searching for a column name in the base data."
            " But the base_data set has no column names.");

    //get a list of unique imputation markers.
    gsl_vector *imps = NULL;
    apop_data *impt = NULL; 
    if (imputationtype == 'd'){
        Apop_col_v(in.fill_ins, imputationcol, ic);
        imps = apop_vector_unique_elements(ic);
    } else impt = apop_text_unique_elements(in.fill_ins, imputationcol);

    int len = imps ? imps->size : impt->textsize[0];
    int thisimp=-2; char *thisimpt=NULL;
	apop_data *estimates[len];
    for (int impctr=0; impctr< len; impctr++){
        if (imps) thisimp  = gsl_vector_get(imps, impctr);
        else      thisimpt = impt->text[impctr][0];
        Get_vmsizes(in.fill_ins); //masxize
        int fillsize = maxsize ? maxsize : in.fill_ins->textsize[0];
        for (int i=0; i< fillsize; i++){
            if (!(thisimpt && apop_strcmp(in.fill_ins->text[i][imputationcol], thisimpt))
                && !(imps && thisimp==apop_data_get(in.fill_ins, i, imputationcol)))
                continue;
            int thisrow = (rowtype=='d') ? 
                                apop_data_get(in.fill_ins, i, rowcol)
                               :apop_name_find(in.base_data->names, in.fill_ins->text[i][rowcol], 'r');
            int thiscol = (coltype=='d') ? 
                                apop_data_get(in.fill_ins, i, colcol)
                               :apop_name_find(in.base_data->names, in.fill_ins->text[i][colcol], 'c');
            if (valuetype=='d') apop_data_set(in.base_data, thisrow, thiscol, 
                                            apop_data_get(in.fill_ins, i, valuecol));
            else apop_text_add(in.base_data, rowcol, colcol, in.fill_ins->text[i][valuecol]);
        }
        //OK, base_data is now filled in. Estimate the statistic for it.
		estimates[impctr] = stat(in.base_data);
    }


    //Part II: find the mean of the statistics and the total variance of the cov matrix.
	gsl_vector *vals = gsl_vector_alloc(len);
    apop_data *out = apop_data_copy(estimates[0]);
	//take the simple mean of the main data set.
	{ //this limits the scope of the Get_vmsizes macro.
	 Get_vmsizes(estimates[0]); 
     for (int j=0; j < msize2; j++)
         for (int i=0; i < (vsize ? vsize : msize1); i++){
            for (int k=0; k< len; k++)
                gsl_vector_set(vals, k, apop_data_get(estimates[k], i, j));
             apop_data_set(out, i, j, apop_vector_mean(vals));
         }
	}
    apop_data *out_var = apop_data_get_page(estimates[0], "<Covariance>");
    int cov_is_labelled = out_var !=NULL;
    if (!cov_is_labelled){
        asprintf(&out->more->names->title, "<Covariance>");
        out_var = estimates[0]->more;
    }
	Get_vmsizes(out_var);
    for (int i=0; i < msize1; i++)
        for (int j=i; j < msize2; j++){
            for (int k=0; k< len; k++){
                apop_data *this_p = cov_is_labelled ? apop_data_get_page(estimates[k], "<Covariance>")
                                        : estimates[k]->more;
                gsl_vector_set(vals, k, apop_data_get(this_p, i, j));
            }
            double total_var = apop_vector_mean(vals) + apop_var(vals)/(1+1./len);
            apop_data_set(out_var, i, j, total_var);
            if (j != i)
                apop_data_set(out_var, j, i, total_var);
        }
    return out;	
}
Example #13
0
static long double wishart_ll(apop_data *in, apop_model *m){
    Nullcheck_mpd(in, m, GSL_NAN);
    wishartstruct_t ws = {
            .paraminv = apop_matrix_inverse(m->parameters->matrix),
            .len = sqrt(in->matrix->size2),
            .df = m->parameters->vector->data[0]
        };
    double paramdet = apop_matrix_determinant(m->parameters->matrix);
    if (paramdet < 1e-3) return GSL_NEGINF;
    double ll =  apop_map_sum(in, .fn_vp = one_wishart_row, .param=&ws, .part='r');
    double k = log(ws.df)*ws.df/2.;
    k -= M_LN2 * ws.len* ws.df/2.;
    k -= log(paramdet) * ws.df/2.;
    k -= apop_multivariate_lngamma(ws.df/2., ws.len);
    return ll + k*in->matrix->size1;
}

static int apop_wishart_draw(double *out, gsl_rng *r, apop_model *m){
    /*
Translated from the Fortran by BK. Fortran comments:

C          SUBROUTINE DWSHRT(D, N, NP, NNP, SB, SA)
C
C       ALGORITHM AS 53  APPL. STATIST. (1972) VOL.21, NO.3
C
C     Wishart variate generator.  On output, SA is an upper-triangular
C     matrix of size NP * NP [...]
C     whose elements have a Wishart(N, SIGMA) distribution.
*/
    Nullcheck_mp(m, );
    int np = m->parameters->matrix->size1;
    int n = m->parameters->vector->data[0];
    if (!m->more) { 
        gsl_matrix *ccc = apop_matrix_copy(m->parameters->matrix);
        gsl_linalg_cholesky_decomp(ccc);
        for (int i=0; i < ccc->size1; i++) //zero out the upper diagonal
            for (int j=i+1; j < ccc->size2; j++)
                gsl_matrix_set(ccc, i, j, 0);
        m->more = apop_matrix_to_data(ccc);
        m->more_size = sizeof(apop_data);
    }
    apop_data *Chol = m->more;
    apop_data *rmatrix = apop_data_calloc(np, np);
    Staticdef(apop_model *, std_normal, apop_model_set_parameters(apop_normal, 0, 1));

//C     Load diagonal elements with square root of chi-square variates
    for(int i = 0; i< np; i++){
        int DF = n - i;
        apop_data_set(rmatrix, i, i, sqrt(gsl_ran_chisq(r, DF)));
    }
    
    for(int i = 1; i< np; i++) //off-diagonal triangles: Normals.
          for(int j = 0; j< i; j++){
            double ndraw;
            apop_draw(&ndraw, r, std_normal);
            assert (!gsl_isnan(ndraw));
            apop_data_set(rmatrix, i, j, ndraw);
          }
    //Now find C * rand * rand' * C'
    apop_data *cr = apop_dot(Chol, rmatrix);
    apop_data *crr = apop_dot(cr, rmatrix, .form2='t');
    apop_data *crrc = apop_dot(crr, Chol, .form2='t');
    memmove(out, crrc->matrix->data, sizeof(double)*np*np);
    apop_data_free(rmatrix); apop_data_free(cr);
    apop_data_free(crrc);    apop_data_free(crr);
    return 0;
}
Example #14
0
//For centering a uniform distribution around a point. 
//Cut/pasted from the Apophenia documentation, but narrowed range to ±.0.8. This also took some tweaking.
//The uniform is not a good choice (and ruins the covariance estimate), but the premise
//was that we don't know the formula for a Normal distribution.
void set_midpoint(apop_data * in, apop_model *m){
    apop_data_set(m->parameters, 0, -1, apop_data_get(in)-0.07);
    apop_data_set(m->parameters, 1, -1, apop_data_get(in)+0.07);
}
Example #15
0
void set_uniform_edges(apop_data_row r, apop_model *unif){
    apop_data_set(unif->parameters, 0, -1, r.matrix_row.data[0]-0.5);
    apop_data_set(unif->parameters, 1, -1, r.matrix_row.data[0]+0.5);
}