int main(){ apop_db_open("data-census.db"); gsl_vector *n = apop_query_to_vector("select in_per_capita from income " "where state= (select state from geography where name ='North Dakota')"); gsl_vector *s = apop_query_to_vector("select in_per_capita from income " "where state= (select state from geography where name ='South Dakota')"); double n_count = n->size, n_mean = apop_vector_mean(n), n_var = apop_vector_var(n), s_count = s->size, s_mean = apop_vector_mean(s), s_var = apop_vector_var(s); double stat = fabs(n_mean - s_mean)/ sqrt(n_var/ (n_count-1) + s_var/(s_count-1)); double confidence = 1 - (2 * gsl_cdf_tdist_Q(stat, n_count + s_count -2)); printf("Reject the null with %g%% confidence\n", confidence*100); }
//apop_vector_mean is a macro, so we can't point a pointer to it. double mu(gsl_vector *in){ return apop_vector_mean(in);}
apop_data* multiple_imputation_variance_base(multiple_imputation_variance_t in){ /*The first half of this is filling in the values. In an attempt at versatility, I allow users to give any named column, be it numeric or text, for every piece of input info. That means a whole lot of checking around to determine what goes where---and a macro. */ Apop_assert_c(in.base_data,NULL, 1, "It doesn't make sense to impute over a NULL data set."); Apop_assert_c(in.fill_ins, NULL, 1, "Didn't receive a fill-in table. Returning NULL."); data_to_data stat = in.stat? in.stat : colmeans; //At the end of this macro, you've got rowcol and rowtype, valuecol and valuetype, &c. #define apop_setup_one_colthing(c) \ int c##col = apop_name_find(in.fill_ins->names, in.c##_name, 'c'); \ int c##type = 'd'; \ if (c##col==-2){ \ c##col = apop_name_find(in.fill_ins->names, in.c##_name, 't'); \ c##type = 't'; \ Apop_assert(c##col!=-2, "I couldn't find the c##_name %s in the column/text names of your fill_in table.", in.c##_name); \ } apop_setup_one_colthing(row) apop_setup_one_colthing(col) apop_setup_one_colthing(value) apop_setup_one_colthing(imputation) Apop_assert(!(rowtype=='t' && !in.base_data->names->rowct), "the rowname you gave refers to text, so I will be searching for a row name in the base data." " But the base_data set has no row names."); Apop_assert(!(coltype=='t' && !in.base_data->names->colct), "the colname you gave refers to text, so I will be searching for a column name in the base data." " But the base_data set has no column names."); //get a list of unique imputation markers. gsl_vector *imps = NULL; apop_data *impt = NULL; if (imputationtype == 'd'){ Apop_col_v(in.fill_ins, imputationcol, ic); imps = apop_vector_unique_elements(ic); } else impt = apop_text_unique_elements(in.fill_ins, imputationcol); int len = imps ? imps->size : impt->textsize[0]; int thisimp=-2; char *thisimpt=NULL; apop_data *estimates[len]; for (int impctr=0; impctr< len; impctr++){ if (imps) thisimp = gsl_vector_get(imps, impctr); else thisimpt = impt->text[impctr][0]; Get_vmsizes(in.fill_ins); //masxize int fillsize = maxsize ? maxsize : in.fill_ins->textsize[0]; for (int i=0; i< fillsize; i++){ if (!(thisimpt && apop_strcmp(in.fill_ins->text[i][imputationcol], thisimpt)) && !(imps && thisimp==apop_data_get(in.fill_ins, i, imputationcol))) continue; int thisrow = (rowtype=='d') ? apop_data_get(in.fill_ins, i, rowcol) :apop_name_find(in.base_data->names, in.fill_ins->text[i][rowcol], 'r'); int thiscol = (coltype=='d') ? apop_data_get(in.fill_ins, i, colcol) :apop_name_find(in.base_data->names, in.fill_ins->text[i][colcol], 'c'); if (valuetype=='d') apop_data_set(in.base_data, thisrow, thiscol, apop_data_get(in.fill_ins, i, valuecol)); else apop_text_add(in.base_data, rowcol, colcol, in.fill_ins->text[i][valuecol]); } //OK, base_data is now filled in. Estimate the statistic for it. estimates[impctr] = stat(in.base_data); } //Part II: find the mean of the statistics and the total variance of the cov matrix. gsl_vector *vals = gsl_vector_alloc(len); apop_data *out = apop_data_copy(estimates[0]); //take the simple mean of the main data set. { //this limits the scope of the Get_vmsizes macro. Get_vmsizes(estimates[0]); for (int j=0; j < msize2; j++) for (int i=0; i < (vsize ? vsize : msize1); i++){ for (int k=0; k< len; k++) gsl_vector_set(vals, k, apop_data_get(estimates[k], i, j)); apop_data_set(out, i, j, apop_vector_mean(vals)); } } apop_data *out_var = apop_data_get_page(estimates[0], "<Covariance>"); int cov_is_labelled = out_var !=NULL; if (!cov_is_labelled){ asprintf(&out->more->names->title, "<Covariance>"); out_var = estimates[0]->more; } Get_vmsizes(out_var); for (int i=0; i < msize1; i++) for (int j=i; j < msize2; j++){ for (int k=0; k< len; k++){ apop_data *this_p = cov_is_labelled ? apop_data_get_page(estimates[k], "<Covariance>") : estimates[k]->more; gsl_vector_set(vals, k, apop_data_get(this_p, i, j)); } double total_var = apop_vector_mean(vals) + apop_var(vals)/(1+1./len); apop_data_set(out_var, i, j, total_var); if (j != i) apop_data_set(out_var, j, i, total_var); } return out; }