double one_chi_sq(apop_data *d, int row, int col, int n){ Apop_row_v(d, row, vr); Apop_col_v(d, col, vc); double rowexp = apop_vector_sum(vr)/n; double colexp = apop_vector_sum(vc)/n; double observed = apop_data_get(d, row, col); double expected = n * rowexp * colexp; return gsl_pow_2(observed - expected)/expected; }
/* The hard part is when your candidate point does not satisfy other constraints, so you need to translate the point until it meets the new hypersurface. How far is that? Project beta onto the new surface, and find the distance between that projection and the original surface. Then translate beta toward the original surface by that amount. The projection of the translated beta onto the new surface now also touches the old surface. */ static void get_candiate(gsl_vector *beta, apop_data *constraint, int current, gsl_vector *candidate, double margin){ double k, ck, off_by, s; gsl_vector *pseudobeta = NULL; gsl_vector *pseudocandidate = NULL; gsl_vector *pseudocandidate2 = NULL; gsl_vector *fix = NULL; Apop_row_v(constraint, current, cc); ck = gsl_vector_get(constraint->vector, current); find_nearest_point(beta, ck, cc, candidate); for (size_t i=0; i< constraint->vector->size; i++){ if (i!=current){ Apop_row_v(constraint, i, other); k =apop_data_get(constraint, i, -1); if (binds(candidate, k, other, margin)){ if (!pseudobeta){ pseudobeta = gsl_vector_alloc(beta->size); gsl_vector_memcpy(pseudobeta, beta); pseudocandidate = gsl_vector_alloc(beta->size); pseudocandidate2 = gsl_vector_alloc(beta->size); fix = gsl_vector_alloc(beta->size); } find_nearest_point(pseudobeta, k, other, pseudocandidate); find_nearest_point(pseudocandidate, ck, cc, pseudocandidate2); off_by = apop_vector_distance(pseudocandidate, pseudocandidate2); s = trig_bit(cc, other, off_by); gsl_vector_memcpy(fix, cc); gsl_vector_scale(fix, magnitude(cc)); gsl_vector_scale(fix, s); gsl_vector_add(pseudobeta, fix); find_nearest_point(pseudobeta, k, other, candidate); gsl_vector_memcpy(pseudobeta, candidate); } } } if (fix){ gsl_vector_free(fix); gsl_vector_free(pseudobeta); gsl_vector_free(pseudocandidate); gsl_vector_free(pseudocandidate2); } }
apop_model *fuzz(apop_model sim){ int draws = 100; gsl_rng *r = apop_rng_alloc(1); apop_model *prior = apop_model_cross( apop_model_set_parameters(apop_normal, 10, 2), apop_model_set_parameters(apop_normal, 10, 2)); apop_data *outdata = apop_data_alloc(draws, weibull->vsize); double *params = sim.parameters->vector->data; for (int i=0; i< draws; i++){ do { apop_draw(params, r, prior); } while (params[1]*2 > pow(params[0], 2)); sim.dsize=params[1]; apop_model *est = apop_estimate(apop_model_draws(&sim, 1000), weibull); Apop_row_v(outdata, i, onerow); gsl_vector_memcpy(onerow, est->parameters->vector); apop_model_free(est); } return apop_estimate(outdata, apop_pmf); }
apop_data *kappa_and_pi(apop_data const *tab_in){ apop_data *out = apop_data_alloc(); Apop_stopif(!tab_in, out->error='n'; return out, 0, "NULL input. Returning output with 'n' error code."); Apop_stopif(!tab_in->matrix, out->error='m'; return out, 0, "NULL input matrix. Returning output with 'm' error code."); Apop_stopif(tab_in->matrix->size1 != tab_in->matrix->size2, out->error='s'; return out, 0, "Input rows=%zu; input cols=%zu; " "these need to be equal. Returning output with error code 's'.", tab_in->matrix->size1, tab_in->matrix->size2); apop_data *tab = apop_data_copy(tab_in); double total = apop_matrix_sum(tab->matrix); gsl_matrix_scale(tab->matrix, 1./total); double p_o = 0, p_e = 0, scott_pe = 0, ia = 0, row_ent = 0, col_ent = 0; for (int c=0; c< tab->matrix->size1; c++){ double this_obs = apop_data_get(tab, c, c); p_o += this_obs; Apop_row_v(tab, c, row); Apop_col_v(tab, c, col); double rsum = apop_sum(row); double csum = apop_sum(col); p_e += rsum * csum; scott_pe += pow((rsum+csum)/2, 2); ia += this_obs * log2(this_obs/(rsum * csum)); row_ent -= rsum * log2(rsum); col_ent -= csum * log2(csum); } apop_data_free(tab); asprintf(&out->names->title, "Scott's π and Cohen's κ"); apop_data_add_named_elmt(out, "total count", total); apop_data_add_named_elmt(out, "percent agreement", p_o); apop_data_add_named_elmt(out, "κ", ((p_e==1)? 0: (p_o - p_e) / (1-p_e) )); apop_data_add_named_elmt(out, "π", ((p_e==1)? 0: (p_o - scott_pe) / (1-scott_pe))); apop_data_add_named_elmt(out, "P_I", ia/((row_ent+col_ent)/2)); apop_data_add_named_elmt(out, "Cohen's p_e", p_e); apop_data_add_named_elmt(out, "Scott's p_e", scott_pe); apop_data_add_named_elmt(out, "information in agreement", ia); apop_data_add_named_elmt(out, "row entropy", row_ent); apop_data_add_named_elmt(out, "column entropy", col_ent); return out; }
long double apop_linear_constraint(gsl_vector *beta, apop_data * constraint, double margin){ #else apop_varad_head(long double, apop_linear_constraint){ static threadlocal apop_data *default_constraint; gsl_vector * apop_varad_var(beta, NULL); double apop_varad_var(margin, 0); apop_data * apop_varad_var(constraint, NULL); Apop_assert(beta, "The vector to be checked is NULL."); if (!constraint){ if (default_constraint && beta->size != default_constraint->vector->size){ apop_data_free(default_constraint); default_constraint = NULL; } if (!default_constraint){ default_constraint = apop_data_alloc(0,beta->size, beta->size); default_constraint->vector = gsl_vector_calloc(beta->size); gsl_matrix_set_identity(default_constraint->matrix); } constraint = default_constraint; } return apop_linear_constraint_base(beta, constraint, margin); } long double apop_linear_constraint_base(gsl_vector *beta, apop_data * constraint, double margin){ #endif static threadlocal gsl_vector *closest_pt = NULL; static threadlocal gsl_vector *candidate = NULL; static threadlocal gsl_vector *fix = NULL; int constraint_ct = constraint->matrix->size1; int bindlist[constraint_ct]; int i, bound = 0; /* For added efficiency, keep a scratch vector or two on hand. */ if (closest_pt==NULL || closest_pt->size != constraint->matrix->size2){ closest_pt = gsl_vector_calloc(beta->size); candidate = gsl_vector_alloc(beta->size); fix = gsl_vector_alloc(beta->size); closest_pt->data[0] = GSL_NEGINF; } /* Do any constraints bind?*/ memset(bindlist, 0, sizeof(int)*constraint_ct); for (i=0; i< constraint_ct; i++){ Apop_row_v(constraint, i, c); bound += bindlist[i] = binds(beta, apop_data_get(constraint, i, -1), c, margin); } if (!bound) return 0; //All constraints met. gsl_vector *base_beta = apop_vector_copy(beta); /* With only one constraint, it's easy. */ if (constraint->vector->size==1){ Apop_row_v(constraint, 0, c); find_nearest_point(base_beta, constraint->vector->data[0], c, beta); goto add_margin; } /* Finally, multiple constraints, at least one binding. For each surface, pick a candidate point. Check whether the point meets the other constraints. if not, translate to a new point that works. [Do this by maintaining a pseudopoint that translates by the necessary amount.] Once you have a candidate point, compare its distance to the current favorite; keep the best. */ for (i=0; i< constraint_ct; i++) if (bindlist[i]){ get_candiate(base_beta, constraint, i, candidate, margin); if(apop_vector_distance(base_beta, candidate) < apop_vector_distance(base_beta, closest_pt)) gsl_vector_memcpy(closest_pt, candidate); } gsl_vector_memcpy(beta, closest_pt); add_margin: for (i=0; i< constraint_ct; i++){ if(bindlist[i]){ Apop_row_v(constraint, i, c); gsl_vector_memcpy(fix, c); gsl_vector_scale(fix, magnitude(fix)); gsl_vector_scale(fix, margin); gsl_vector_add(beta, fix); } } long double out = apop_vector_distance(base_beta, beta); gsl_vector_free(base_beta); return out; }