示例#1
0
int main(){
    apop_db_open("data-census.db");
    gsl_vector *n = apop_query_to_vector("select in_per_capita from income "
            "where state= (select state from geography where name ='North Dakota')");
    gsl_vector *s = apop_query_to_vector("select in_per_capita from income  "
            "where state= (select state from geography where name ='South Dakota')");

    double n_count = n->size,
           n_mean  = apop_vector_mean(n),
           n_var   = apop_vector_var(n),
           s_count = s->size,
           s_mean  = apop_vector_mean(s),
           s_var   = apop_vector_var(s);

    double  stat      = fabs(n_mean - s_mean)/ sqrt(n_var/ (n_count-1) + s_var/(s_count-1));
    double confidence = 1 - (2 * gsl_cdf_tdist_Q(stat, n_count + s_count -2));
    printf("Reject the null with %g%% confidence\n", confidence*100);
}
示例#2
0
//apop_vector_mean is a macro, so we can't point a pointer to it.
double mu(gsl_vector *in){ return apop_vector_mean(in);}
示例#3
0
apop_data* multiple_imputation_variance_base(multiple_imputation_variance_t in){
    /*The first half of this is filling in the values. In an attempt at versatility, I allow users to 
      give any named column, be it numeric or text, for every piece of input info. That means a whole lot 
      of checking around to determine what goes where---and a macro.  */

    Apop_assert_c(in.base_data,NULL, 1, "It doesn't make sense to impute over a NULL data set.");
    Apop_assert_c(in.fill_ins, NULL, 1, "Didn't receive a fill-in table. Returning NULL.");
    data_to_data stat = in.stat? in.stat : colmeans;

//At the end of this macro, you've got rowcol and rowtype, valuecol and valuetype, &c.
#define apop_setup_one_colthing(c) \
    int c##col = apop_name_find(in.fill_ins->names, in.c##_name, 'c');   \
    int c##type = 'd';         \
    if (c##col==-2){           \
        c##col = apop_name_find(in.fill_ins->names, in.c##_name, 't');   \
        c##type = 't';         \
       Apop_assert(c##col!=-2, "I couldn't find the c##_name %s in the column/text names of your fill_in table.", in.c##_name);    \
    }

    apop_setup_one_colthing(row)
    apop_setup_one_colthing(col)
    apop_setup_one_colthing(value)
    apop_setup_one_colthing(imputation)

    Apop_assert(!(rowtype=='t' && !in.base_data->names->rowct),
            "the rowname you gave refers to text, so I will be searching for a row name in the base data."
            " But the base_data set has no row names.");
    Apop_assert(!(coltype=='t' && !in.base_data->names->colct),
            "the colname you gave refers to text, so I will be searching for a column name in the base data."
            " But the base_data set has no column names.");

    //get a list of unique imputation markers.
    gsl_vector *imps = NULL;
    apop_data *impt = NULL; 
    if (imputationtype == 'd'){
        Apop_col_v(in.fill_ins, imputationcol, ic);
        imps = apop_vector_unique_elements(ic);
    } else impt = apop_text_unique_elements(in.fill_ins, imputationcol);

    int len = imps ? imps->size : impt->textsize[0];
    int thisimp=-2; char *thisimpt=NULL;
	apop_data *estimates[len];
    for (int impctr=0; impctr< len; impctr++){
        if (imps) thisimp  = gsl_vector_get(imps, impctr);
        else      thisimpt = impt->text[impctr][0];
        Get_vmsizes(in.fill_ins); //masxize
        int fillsize = maxsize ? maxsize : in.fill_ins->textsize[0];
        for (int i=0; i< fillsize; i++){
            if (!(thisimpt && apop_strcmp(in.fill_ins->text[i][imputationcol], thisimpt))
                && !(imps && thisimp==apop_data_get(in.fill_ins, i, imputationcol)))
                continue;
            int thisrow = (rowtype=='d') ? 
                                apop_data_get(in.fill_ins, i, rowcol)
                               :apop_name_find(in.base_data->names, in.fill_ins->text[i][rowcol], 'r');
            int thiscol = (coltype=='d') ? 
                                apop_data_get(in.fill_ins, i, colcol)
                               :apop_name_find(in.base_data->names, in.fill_ins->text[i][colcol], 'c');
            if (valuetype=='d') apop_data_set(in.base_data, thisrow, thiscol, 
                                            apop_data_get(in.fill_ins, i, valuecol));
            else apop_text_add(in.base_data, rowcol, colcol, in.fill_ins->text[i][valuecol]);
        }
        //OK, base_data is now filled in. Estimate the statistic for it.
		estimates[impctr] = stat(in.base_data);
    }


    //Part II: find the mean of the statistics and the total variance of the cov matrix.
	gsl_vector *vals = gsl_vector_alloc(len);
    apop_data *out = apop_data_copy(estimates[0]);
	//take the simple mean of the main data set.
	{ //this limits the scope of the Get_vmsizes macro.
	 Get_vmsizes(estimates[0]); 
     for (int j=0; j < msize2; j++)
         for (int i=0; i < (vsize ? vsize : msize1); i++){
            for (int k=0; k< len; k++)
                gsl_vector_set(vals, k, apop_data_get(estimates[k], i, j));
             apop_data_set(out, i, j, apop_vector_mean(vals));
         }
	}
    apop_data *out_var = apop_data_get_page(estimates[0], "<Covariance>");
    int cov_is_labelled = out_var !=NULL;
    if (!cov_is_labelled){
        asprintf(&out->more->names->title, "<Covariance>");
        out_var = estimates[0]->more;
    }
	Get_vmsizes(out_var);
    for (int i=0; i < msize1; i++)
        for (int j=i; j < msize2; j++){
            for (int k=0; k< len; k++){
                apop_data *this_p = cov_is_labelled ? apop_data_get_page(estimates[k], "<Covariance>")
                                        : estimates[k]->more;
                gsl_vector_set(vals, k, apop_data_get(this_p, i, j));
            }
            double total_var = apop_vector_mean(vals) + apop_var(vals)/(1+1./len);
            apop_data_set(out_var, i, j, total_var);
            if (j != i)
                apop_data_set(out_var, j, i, total_var);
        }
    return out;	
}