int main(){ int rep_ct = 10000; gsl_rng *r = apop_rng_alloc(0); apop_db_open("data-census.db"); gsl_vector *base_data = apop_query_to_vector("select in_per_capita from income where sumlevel+0.0 =40"); double RI = apop_query_to_float("select in_per_capita from income where sumlevel+0.0 =40 and geo_id2+0.0=44"); gsl_vector *boot_sample = gsl_vector_alloc(base_data->size); gsl_vector *replications = gsl_vector_alloc(rep_ct); for (int i=0; i< rep_ct; i++){ one_boot(base_data, r, boot_sample); gsl_vector_set(replications, i, apop_mean(boot_sample)); } double stderror = sqrt(apop_var(replications)); double mean = apop_mean(replications); printf("mean: %g; standard error: %g; (RI-mean)/stderr: %g; p value: %g\n", mean, stderror, (RI-mean)/stderror, 2*gsl_cdf_gaussian_Q(fabs(RI-mean), stderror)); }
apop_model* apop_t_estimate(apop_data *d, apop_model *m){ Apop_assert(d, "No data with which to count df. (the default estimation method)"); Get_vmsizes(d); //vsize, msize1, msize2, tsize apop_model *out = apop_model_copy(*m); double vmu = vsize ? apop_mean(d->vector) : 0; double v_sum_sq = vsize ? apop_var(d->vector)*(vsize-1) : 0; double m_sum_sq = 0; double mmu = 0; if (msize1) { apop_matrix_mean_and_var(d->matrix, &mmu, &m_sum_sq); m_sum_sq *= msize1*msize2-1; } apop_data_add_names(out->parameters, 'r', "mean", "standard deviation", "df"); apop_data_set(out->parameters, 0, -1, (vmu *vsize + mmu * msize1*msize2)/tsize); apop_data_set(out->parameters, 1, -1, sqrt((v_sum_sq*vsize + m_sum_sq * msize1*msize2)/(tsize-1))); apop_data_set(out->parameters, 2, -1, tsize-1); apop_data_add_named_elmt(out->info, "log likelihood", out->log_likelihood(d, out)); return out; }
double find_tstat(gsl_vector *in){ return apop_mean(in)/sqrt(apop_var(in));}