int main(int argc, char **argv){ char c, msg[1000]; int colnames = 'y', rownames = 0, tab_exists_check = 0; char **field_names = NULL; apop_data *field_name_data, *field_name_data_t; sprintf(msg, "%s [-d delimiters] text_file table_name dbname\n" "e.g.: %s -d\",|\" infile.txt a_table info.db\n" "If the input text file name is a single dash, -, then read from STDIN.\n" "Input must be plain ASCII or UTF-8.\n" "-d\t\tThe single-character delimiters to use, e.g., -d \" ,\" or -d \"\\t\" (which you \n" "\t\t\twill almost certainly have to write as -d \"\\\\t\"). Default: \"| ,\\t\", meaning \n" "\t\t\tthat any of a pipe, space, comma, or tab will delimit separate entries\n" "-nc\t\tData does not include column names\n" "-n regex\t\tCase-insensitive regular expression indicating Null values. Default: NaN \n" "-m\t\tUse a mysql database (default: SQLite)\n" "-f\t\tfixed width field ends: -f\"3,8,12,17\" (first char is one, not zero)\n" "-u\t\tmysql username\n" "-p\t\tmysql password\n" "-r\t\tData includes row names\n" "-v\t\tVerbose\n" "-N\t\tA comma-separated list of column names: -N\"apple,banana,carrot,durian\"\n" "-O\t\tIf table exists, erase it and write from scratch (i.e., Overwrite)\n" "-h\t\tPrint this help\n\n" , argv[0], argv[0]); int * field_list = NULL; if(argc<3){ printf("%s", msg); return 0; } while ((c = getopt (argc, argv, "n:d:f:hmp:ru:vN:O")) != -1) if (c=='n') { if (optarg[0]=='c') colnames='n'; else strcpy(apop_opts.db_nan, optarg); } else if (c=='N') { apop_regex(optarg, " *([^,]*[^ ]) *(,|$) *", &field_name_data); field_name_data_t = apop_data_transpose(field_name_data); field_names = field_name_data_t->text[0]; } else if (c=='d') strcpy(apop_opts.input_delimiters, optarg); else if (c=='f') field_list = break_down(optarg); else if (c=='h') printf("%s", msg); else if (c=='m') apop_opts.db_engine = 'm'; else if (c=='u') strcpy(apop_opts.db_user, optarg); else if (c=='p') strcpy(apop_opts.db_pass, optarg); else if (c=='r') rownames++; else if (c=='v') apop_opts.verbose=2; else if (c=='O') tab_exists_check++; apop_db_open(argv[optind + 2]); if (tab_exists_check) apop_table_exists(argv[optind+1],1); apop_query("begin;"); apop_text_to_db(argv[optind], argv[optind+1], rownames, colnames, field_names, .field_ends=field_list); apop_query("commit;"); }
int main(){ apop_db_open("data-corruption.db"); apop_data *corrupt = apop_db_to_crosstab("cpi", "country", "year", "score"); apop_data *clean = apop_data_listwise_delete(corrupt); apop_model *mlv = apop_estimate(clean, apop_multivariate_normal); apop_ml_imputation(corrupt, mlv); apop_crosstab_to_db(corrupt, "cpi_clean", "country", "year", "score"); }
int main(int argc, char **argv){ char c, verbose=0, *delimiter, *outfile = NULL, msg[1000]; apop_data *m; sprintf(msg, "%s [opts] dbname table_name rows columns data\n\n" "-d\tdelimiter\t\tdefault= \"|,<space><tab>\"\n" "-a\tappend\t\t\tdefault= append\n" "-o\toverwrite\t\tdefault= append\n" "-v\tverbose: prints status info on stderr and raises apop_opts.verbose by one for each use (so use -v -v for extra-verbose)\n" "-f\tfile to dump to\t\tdefault=STDOUT\n", argv[0]); if(argc<5){ printf("%s", msg); return 0; } delimiter = malloc(5); strcpy(delimiter, ","); while ((c = getopt (argc, argv, "ad:f:ho")) != -1){ switch (c){ case 'a': apop_opts.output_append = 1; break; case 'd': strcpy(apop_opts.output_delimiter,optarg); break; case 'o': apop_opts.output_append = 0; break; case 'f': outfile = malloc(1000); sprintf(outfile, "%s", optarg); apop_opts.output_type = 'f'; break; case 'h': printf("%s", msg); return 0; case 'v': verbose++; apop_opts.verbose++; break; } } Apop_assert(optind+4 <= argc, "I need five arguments past the options: database, table, row col, column col, data col"); if (verbose){ fprintf(stderr, "database:%s\ntable: %s\nrow col: %s\ncol col:%s\ndata col:%s\n", argv[optind], argv[optind +1], argv[optind+2], argv[optind+3], argv[optind+4]); if (outfile) fprintf(stderr, "outfile: %s\n", outfile); else fprintf(stderr, "output to stdout\n"); if (apop_opts.output_append) fprintf(stderr, "appending to output\n"); else fprintf(stderr, "overwriting output\n"); } apop_db_open(argv[optind]); m = apop_db_to_crosstab(argv[optind +1], argv[optind+2], argv[optind+3], argv[optind+4]); apop_data_print(m, outfile); }
apop_data *query_data(){ apop_db_open("data-census.db"); return apop_query_to_data(" select postcode as row_names, " " m_per_100_f, population/1e6 as population, median_age " " from geography, income,demos,postcodes " " where income.sumlevel= '040' " " and geography.geo_id = demos.geo_id " " and income.geo_name = postcodes.state " " and geography.geo_id = income.geo_id "); }
gsl_matrix *query(char *d, char *q, int no_plot){ apop_db_open(d); apop_data *result = apop_query_to_data("%s", q); apop_db_close(0); Apop_stopif(!result, exit(2), 0, "Your query returned a blank table. Quitting."); Apop_stopif(result->error, exit(2), 0, "Error running your query. Quitting."); if (no_plot){ apop_data_show(result); exit(0); } return result->matrix; }
int main(){ apop_db_open("data-climate.db"); apop_data *data = apop_query_to_data("select pcp from precip"); apop_data_pmf_compress(data); //creates a weights vector apop_vector_normalize(data->weights); apop_data_sort(data); apop_model *pmf = apop_estimate(data, apop_pmf); FILE *outfile = fopen("out.h", "w"); apop_model_print(pmf, outfile); apop_model *kernel = apop_model_set_parameters(apop_normal, 0., 0.1); apop_model *k = apop_model_copy(apop_kernel_density); Apop_settings_add_group(k, apop_kernel_density, .base_pmf=pmf, .kernel=kernel); plot(k, "out.k"); printf("plot 'out.h' with lines title 'data', 'out.k' with lines title 'smoothed'\n"); }
int main(){ int rep_ct = 10000; gsl_rng *r = apop_rng_alloc(0); apop_db_open("data-census.db"); gsl_vector *base_data = apop_query_to_vector("select in_per_capita from income where sumlevel+0.0 =40"); double RI = apop_query_to_float("select in_per_capita from income where sumlevel+0.0 =40 and geo_id2+0.0=44"); gsl_vector *boot_sample = gsl_vector_alloc(base_data->size); gsl_vector *replications = gsl_vector_alloc(rep_ct); for (int i=0; i< rep_ct; i++){ one_boot(base_data, r, boot_sample); gsl_vector_set(replications, i, apop_mean(boot_sample)); } double stderror = sqrt(apop_var(replications)); double mean = apop_mean(replications); printf("mean: %g; standard error: %g; (RI-mean)/stderr: %g; p value: %g\n", mean, stderror, (RI-mean)/stderror, 2*gsl_cdf_gaussian_Q(fabs(RI-mean), stderror)); }
int main(){ apop_db_open("data-census.db"); gsl_vector *n = apop_query_to_vector("select in_per_capita from income " "where state= (select state from geography where name ='North Dakota')"); gsl_vector *s = apop_query_to_vector("select in_per_capita from income " "where state= (select state from geography where name ='South Dakota')"); double n_count = n->size, n_mean = apop_vector_mean(n), n_var = apop_vector_var(n), s_count = s->size, s_mean = apop_vector_mean(s), s_var = apop_vector_var(s); double stat = fabs(n_mean - s_mean)/ sqrt(n_var/ (n_count-1) + s_var/(s_count-1)); double confidence = 1 - (2 * gsl_cdf_tdist_Q(stat, n_count + s_count -2)); printf("Reject the null with %g%% confidence\n", confidence*100); }
int main(){ char outfile[] = "scatter.gplot"; apop_db_open("data-metro.db"); apop_data *data = apop_query_to_data("select riders, year from riders where station like 'Silver%%' and riders>0"); apop_db_close(); //The regression destroys your data, so copy it first. apop_data *data_copy = apop_data_copy(data); //Run OLS, display results on terminal apop_model *est = apop_estimate(data, apop_OLS); apop_model_show(est); //Prep the file with a header, then call the function. FILE *f = fopen(outfile, "w"); fprintf(f,"set term postscript;\n set output \"scatter.eps\"\n set yrange [0:*]\n"); apop_plot_line_and_scatter(data_copy, est, .output_pipe=f); fclose(f); }
ykl_s make_yule(char const *zila, int y) { static gsl_matrix *indices; if (!indices) { indices = gsl_matrix_calloc(65,1); for (int i=0; i< 64; i++) gsl_matrix_set(indices, i,0, i); } apop_data *col = make_histo(zila, y); apop_data ww = (apop_data) { .weights=col->vector, .matrix=indices }; apop_data *d = apop_data_transpose(col); apop_data *exp = apop_data_rank_expand(d); apop_model *m = apop_estimate(exp, apop_yule); apop_model *n = apop_estimate(exp, apop_lognormal); ykl_s out = (ykl_s) { .yule=apop_data_get(m->parameters, .col=-1/*, .rowname="mu"*/), .ln=apop_data_get(n->parameters, .col=-1/*, .rowname="mu"*/), .lnstderr=sqrt(apop_data_get(n->parameters, .col=-1, .row=1/*, .rowname="mu"*/)), .kl = apop_kl_divergence(apop_estimate(&ww, apop_pmf), m), .lnkl = apop_kl_divergence(apop_estimate(&ww, apop_pmf), n), .mean = apop_matrix_mean(col->matrix) }; apop_data_free(d); apop_data_free(exp); apop_model_free(m); return out; } int main() { printf("zila|year|yule_p|kl_div|mu|ln_mu|ln_sigma|ln_kl\n"); apop_db_open("b.db"); apop_data *zilas = apop_query_to_text("select admname from ppl"); for (int i=0; i< *zilas->textsize; i++) for (int y=2001; y<= 2005; y++) { ykl_s ykl = make_yule(*zilas->text[i], y); printf("%20s| %i| %g| %g| %g| %g| %g|%g\n", *zilas->text[i], y, ykl.yule, ykl.kl, ykl.mean, ykl.ln, ykl.lnstderr, ykl.lnkl); } //apop_plot_histogram(m->data->weights, 64, .output_file="histo"); }
int main(int argc, char **argv){ char c, msg[1000]; int colnames = 1, rownames = 0, tab_exists_check = 0; sprintf(msg, "%s [-d delimiters] text_file table_name dbname\n" "e.g.: %s -d\",|\" infile.txt a_table info.db\n" "If the input text file name is a single dash, -, then read from STDIN.\n" "Input must be plain ASCII or UTF-8.\n" "-d\t\tThe single-character delimiters to use, e.g., -d \" ,\" or -d \"\\t\" (which you \n" "\t\t\twill almost certainly have to write as -d \"\\\\t\"). Default: \"| ,\\t\", meaning \n" "\t\t\tthat any of a pipe, space, comma, or tab will delimit separate entries\n" "-nc\t\tData does not include column names\n" "-n regex\t\tCase-insensitive regular expression indicating Null values. Default: NaN \n" "-m\t\tUse a mysql database (default: SQLite)\n" "-f\t\tfixed width field ends: -f\"3,8,12,17\" (first char is one, not zero)\n" "-u\t\tmysql username\n" "-p\t\tmysql password\n" "-r\t\tData includes row names\n" "-v\t\tVerbose\n" "-O\t\tIf table exists, erase it and write from scratch (i.e., Overwrite)\n" "-h\t\tPrint this help\n\n" , argv[0], argv[0]); int * field_list = NULL; if(argc<3){ printf("%s", msg); return 0; } while ((c = getopt (argc, argv, "n:d:f:hmp:ru:vO")) != -1){ switch (c){ case 'n': if (optarg[0]=='c') colnames --; else strcpy(apop_opts.db_nan, optarg); break; case 'd': strcpy(apop_opts.input_delimiters, optarg); break; case 'f': field_list = break_down(optarg); break; case 'h': printf("%s", msg); return 0; case 'm': apop_opts.db_engine = 'm'; break; case 'u': strcpy(apop_opts.db_user, optarg); break; case 'p': strcpy(apop_opts.db_pass, optarg); break; case 'r': rownames ++; break; case 'v': apop_opts.verbose ++; break; case 'O': tab_exists_check ++; break; } } apop_db_open(argv[optind + 2]); if (tab_exists_check) apop_table_exists(argv[optind+1],1); apop_query("begin;"); apop_text_to_db(argv[optind], argv[optind+1], rownames,colnames, NULL, .field_ends=field_list); apop_query("commit;"); }