apop_data * apop_bootstrap_cov_base(apop_data * data, apop_model *model, gsl_rng *rng, int iterations, char keep_boots, char ignore_nans, apop_data **boot_store){ #endif Get_vmsizes(data); //vsize, msize1, msize2 apop_model *e = apop_model_copy(model); apop_data *subset = apop_data_copy(data); apop_data *array_of_boots = NULL, *summary; //prevent and infinite regression of covariance calculation. Apop_model_add_group(e, apop_parts_wanted); //default wants for nothing. size_t i, nan_draws=0; apop_name *tmpnames = (data && data->names) ? data->names : NULL; //save on some copying below. if (data && data->names) data->names = NULL; int height = GSL_MAX(msize1, GSL_MAX(vsize, (data?(*data->textsize):0))); for (i=0; i<iterations && nan_draws < iterations; i++){ for (size_t j=0; j< height; j++){ //create the data set size_t randrow = gsl_rng_uniform_int(rng, height); apop_data_memcpy(Apop_r(subset, j), Apop_r(data, randrow)); } //get the parameter estimates. apop_model *est = apop_estimate(subset, e); gsl_vector *estp = apop_data_pack(est->parameters); if (!gsl_isnan(apop_sum(estp))){ if (i==0){ array_of_boots = apop_data_alloc(iterations, estp->size); apop_name_stack(array_of_boots->names, est->parameters->names, 'c', 'v'); apop_name_stack(array_of_boots->names, est->parameters->names, 'c', 'c'); apop_name_stack(array_of_boots->names, est->parameters->names, 'c', 'r'); } gsl_matrix_set_row(array_of_boots->matrix, i, estp); } else if (ignore_nans=='y'){ i--; nan_draws++; } apop_model_free(est); gsl_vector_free(estp); } if(data) data->names = tmpnames; apop_data_free(subset); apop_model_free(e); int set_error=0; Apop_stopif(i == 0 && nan_draws == iterations, apop_return_data_error(N), 1, "I ran into %i NaNs and no not-NaN estimations, and so stopped. " , iterations); Apop_stopif(nan_draws == iterations, set_error++; apop_matrix_realloc(array_of_boots->matrix, i, array_of_boots->matrix->size2), 1, "I ran into %i NaNs, and so stopped. Returning results based " "on %zu bootstrap iterations.", iterations, i); summary = apop_data_covariance(array_of_boots); if (boot_store) *boot_store = array_of_boots; else apop_data_free(array_of_boots); if (set_error) summary->error = 'N'; return summary; }
/** Append one list of names to another. If the first list is empty, then this is a copy function. \param n1 The first set of names (no default, must not be \c NULL) \param nadd The second set of names, which will be appended after the first. (no default. If \c NULL, a no-op.) \param type1 Either 'c', 'r', 't', or 'v' stating whether you are merging the columns, rows, text, or vector. If 'v', then ignore \c typeadd and just overwrite the target vector name with the source name. (default: 'r') \param typeadd Either 'c', 'r', 't', or 'v' stating whether you are merging the columns, rows, or text. If 'v', then overwrite the target with the source vector name. (default: type1) */ APOP_VAR_HEAD void apop_name_stack(apop_name * n1, apop_name *nadd, char type1, char typeadd){ apop_name * apop_varad_var(nadd, NULL); if (!nadd) return; apop_name * apop_varad_var(n1, NULL); Apop_stopif(!n1, return, 0, "Can't stack onto a NULL set of names (which n1 is)."); char apop_varad_var(type1, 'r'); char apop_varad_var(typeadd, type1); APOP_VAR_ENDHEAD int i; apop_name counts = (apop_name){.rowct=nadd->rowct, .textct = nadd->textct, .colct = nadd->colct};//Necessary when stacking onto self.; if (typeadd == 'v') apop_name_add(n1, nadd->vector, 'v'); else if (typeadd == 'r') for (i=0; i< counts.rowct; i++) apop_name_add(n1, nadd->row[i], type1); else if (typeadd == 't') for (i=0; i< counts.textct; i++) apop_name_add(n1, nadd->text[i], type1); else if (typeadd == 'c') for (i=0; i< counts.colct; i++) apop_name_add(n1, nadd->col[i], type1); else Apop_notify(1, "'%c' sent to apop_name_stack, but the only " "valid options are r t c v. Doing nothing.", typeadd); } /** Copy one \ref apop_name structure to another. That is, all data is duplicated. Used internally by \ref apop_data_copy, but sometimes useful by itself. For example, say that we have an \ref apop_data struct named \c d and a \ref gsl_matrix of the same dimensions named \c m; we could give \c m the labels from \c d for printing: \code apop_data *wrapped = &(apop_data){.matrix=m, .names=apop_name_copy(d)}; apop_data_print(wrapped); apop_name_free(wrapped->names); //wrapped itself is auto-allocated; do not free. \endcode \param in The input names \return A \ref apop_name struct with copies of all input names. */ apop_name * apop_name_copy(apop_name *in){ apop_name *out = apop_name_alloc(); apop_name_stack(out, in, 'v'); apop_name_stack(out, in, 'c'); apop_name_stack(out, in, 'r'); apop_name_stack(out, in, 't'); Asprintf(&out->title, "%s", in->title); return out; }
static apop_data *colmeans(apop_data *in){ Get_vmsizes(in); //maxsize apop_data *sums = apop_data_summarize(in); Apop_col_tv(sums, "mean", means); apop_data *out = apop_matrix_to_data(apop_vector_to_matrix(means, 'r')); apop_name_stack(out->names, in->names, 'c', 'c'); apop_data *cov = apop_data_add_page(out, apop_data_covariance(in), "<Covariance>"); gsl_matrix_scale(cov->matrix, 1/sqrt(maxsize)); return out; }