gsl_matrix *query(char *d, char *q, int no_plot){ apop_db_open(d); apop_data *result = apop_query_to_data("%s", q); apop_db_close(0); Apop_stopif(!result, exit(2), 0, "Your query returned a blank table. Quitting."); Apop_stopif(result->error, exit(2), 0, "Error running your query. Quitting."); if (no_plot){ apop_data_show(result); exit(0); } return result->matrix; }
FILE *open_output(char *outfile, int sf){ FILE *f; if (sf && !strcmp (outfile, "-")) return stdout; if (sf && outfile){ f = fopen(outfile, "w"); Apop_stopif(!f, exit(0), 0, "Trouble opening %s.", outfile); return f; } f = popen("`which gnuplot` -persist", "w"); Apop_stopif(!f, exit(0), 0, "Trouble opening %s.", "gnuplot"); return f; }
apop_data * apop_bootstrap_cov_base(apop_data * data, apop_model *model, gsl_rng *rng, int iterations, char keep_boots, char ignore_nans, apop_data **boot_store){ #endif Get_vmsizes(data); //vsize, msize1, msize2 apop_model *e = apop_model_copy(model); apop_data *subset = apop_data_copy(data); apop_data *array_of_boots = NULL, *summary; //prevent and infinite regression of covariance calculation. Apop_model_add_group(e, apop_parts_wanted); //default wants for nothing. size_t i, nan_draws=0; apop_name *tmpnames = (data && data->names) ? data->names : NULL; //save on some copying below. if (data && data->names) data->names = NULL; int height = GSL_MAX(msize1, GSL_MAX(vsize, (data?(*data->textsize):0))); for (i=0; i<iterations && nan_draws < iterations; i++){ for (size_t j=0; j< height; j++){ //create the data set size_t randrow = gsl_rng_uniform_int(rng, height); apop_data_memcpy(Apop_r(subset, j), Apop_r(data, randrow)); } //get the parameter estimates. apop_model *est = apop_estimate(subset, e); gsl_vector *estp = apop_data_pack(est->parameters); if (!gsl_isnan(apop_sum(estp))){ if (i==0){ array_of_boots = apop_data_alloc(iterations, estp->size); apop_name_stack(array_of_boots->names, est->parameters->names, 'c', 'v'); apop_name_stack(array_of_boots->names, est->parameters->names, 'c', 'c'); apop_name_stack(array_of_boots->names, est->parameters->names, 'c', 'r'); } gsl_matrix_set_row(array_of_boots->matrix, i, estp); } else if (ignore_nans=='y'){ i--; nan_draws++; } apop_model_free(est); gsl_vector_free(estp); } if(data) data->names = tmpnames; apop_data_free(subset); apop_model_free(e); int set_error=0; Apop_stopif(i == 0 && nan_draws == iterations, apop_return_data_error(N), 1, "I ran into %i NaNs and no not-NaN estimations, and so stopped. " , iterations); Apop_stopif(nan_draws == iterations, set_error++; apop_matrix_realloc(array_of_boots->matrix, i, array_of_boots->matrix->size2), 1, "I ran into %i NaNs, and so stopped. Returning results based " "on %zu bootstrap iterations.", iterations, i); summary = apop_data_covariance(array_of_boots); if (boot_store) *boot_store = array_of_boots; else apop_data_free(array_of_boots); if (set_error) summary->error = 'N'; return summary; }
char *read_query(char *infile){ char in[1000]; char *q = malloc(10); q[0] = '\0'; FILE *inf = fopen(infile, "r"); Apop_stopif(!inf, exit(0), 0, "Trouble opening %s. Look into that.\n", infile); while(fgets(in, 1000, inf)){ q = realloc(q, strlen(q) + strlen(in) + 4); sprintf(q, "%s%s", q, in); } sprintf(q, "%s;\n", q); fclose(inf); return q; }
/** Give me a data set and a model, and I'll give you the jackknifed covariance matrix of the model parameters. The basic algorithm for the jackknife (glossing over the details): create a sequence of data sets, each with exactly one observation removed, and then produce a new set of parameter estimates using that slightly shortened data set. Then, find the covariance matrix of the derived parameters. \li Jackknife or bootstrap? As a broad rule of thumb, the jackknife works best on models that are closer to linear. The worse a linear approximation does (at the given data), the worse the jackknife approximates the variance. \param in The data set. An \ref apop_data set where each row is a single data point. \param model An \ref apop_model, that will be used internally by \ref apop_estimate. \exception out->error=='n' \c NULL input data. \return An \c apop_data set whose matrix element is the estimated covariance matrix of the parameters. \see apop_bootstrap_cov For example: \include jack.c */ apop_data * apop_jackknife_cov(apop_data *in, apop_model *model){ Apop_stopif(!in, apop_return_data_error(n), 0, "The data input can't be NULL."); Get_vmsizes(in); //msize1, msize2, vsize apop_model *e = apop_model_copy(model); int i, n = GSL_MAX(msize1, GSL_MAX(vsize, in->textsize[0])); apop_model *overall_est = e->parameters ? e : apop_estimate(in, e);//if not estimated, do so gsl_vector *overall_params = apop_data_pack(overall_est->parameters); gsl_vector_scale(overall_params, n); //do it just once. gsl_vector *pseudoval = gsl_vector_alloc(overall_params->size); //Copy the original, minus the first row. apop_data *subset = apop_data_copy(Apop_rs(in, 1, n-1)); apop_name *tmpnames = in->names; in->names = NULL; //save on some copying below. apop_data *array_of_boots = apop_data_alloc(n, overall_params->size); for(i = -1; i< n-1; i++){ //Get a view of row i, and copy it to position i-1 in the short matrix. if (i >= 0) apop_data_memcpy(Apop_r(subset, i), Apop_r(in, i)); apop_model *est = apop_estimate(subset, e); gsl_vector *estp = apop_data_pack(est->parameters); gsl_vector_memcpy(pseudoval, overall_params);// *n above. gsl_vector_scale(estp, n-1); gsl_vector_sub(pseudoval, estp); gsl_matrix_set_row(array_of_boots->matrix, i+1, pseudoval); apop_model_free(est); gsl_vector_free(estp); } in->names = tmpnames; apop_data *out = apop_data_covariance(array_of_boots); gsl_matrix_scale(out->matrix, 1./(n-1.)); apop_data_free(subset); gsl_vector_free(pseudoval); apop_data_free(array_of_boots); if (e!=overall_est) apop_model_free(overall_est); apop_model_free(e); gsl_vector_free(overall_params); return out; }
int check_levenshtein_distances(int max_lev_distance){ int typo_counter=0; int min_distance; char *closest; if (!apop_table_exists("keys")) return 0; apop_data *userkeys = apop_query_to_text("select key from keys"); for (int i=0; i < *userkeys->textsize; i++){ min_distance = 100; for (char **keyptr=ok_keys; strlen(*keyptr); keyptr++){ int ld = levenshtein_distance(*keyptr, *userkeys->text[i]); if (ld < min_distance){ if(ld == 0) {min_distance=0; break;} min_distance=ld; closest = *keyptr; } } Apop_stopif(min_distance > 0 && min_distance <= max_lev_distance, typo_counter++ , 0, "You wrote %s for one of the keys in your spec file. Did you " "mean to write %s?", *userkeys->text[i], closest); } return typo_counter; }