int main(int argc, char **argv){ char c, msg[1000]; int colnames = 'y', rownames = 0, tab_exists_check = 0; char **field_names = NULL; apop_data *field_name_data, *field_name_data_t; sprintf(msg, "%s [-d delimiters] text_file table_name dbname\n" "e.g.: %s -d\",|\" infile.txt a_table info.db\n" "If the input text file name is a single dash, -, then read from STDIN.\n" "Input must be plain ASCII or UTF-8.\n" "-d\t\tThe single-character delimiters to use, e.g., -d \" ,\" or -d \"\\t\" (which you \n" "\t\t\twill almost certainly have to write as -d \"\\\\t\"). Default: \"| ,\\t\", meaning \n" "\t\t\tthat any of a pipe, space, comma, or tab will delimit separate entries\n" "-nc\t\tData does not include column names\n" "-n regex\t\tCase-insensitive regular expression indicating Null values. Default: NaN \n" "-m\t\tUse a mysql database (default: SQLite)\n" "-f\t\tfixed width field ends: -f\"3,8,12,17\" (first char is one, not zero)\n" "-u\t\tmysql username\n" "-p\t\tmysql password\n" "-r\t\tData includes row names\n" "-v\t\tVerbose\n" "-N\t\tA comma-separated list of column names: -N\"apple,banana,carrot,durian\"\n" "-O\t\tIf table exists, erase it and write from scratch (i.e., Overwrite)\n" "-h\t\tPrint this help\n\n" , argv[0], argv[0]); int * field_list = NULL; if(argc<3){ printf("%s", msg); return 0; } while ((c = getopt (argc, argv, "n:d:f:hmp:ru:vN:O")) != -1) if (c=='n') { if (optarg[0]=='c') colnames='n'; else strcpy(apop_opts.db_nan, optarg); } else if (c=='N') { apop_regex(optarg, " *([^,]*[^ ]) *(,|$) *", &field_name_data); field_name_data_t = apop_data_transpose(field_name_data); field_names = field_name_data_t->text[0]; } else if (c=='d') strcpy(apop_opts.input_delimiters, optarg); else if (c=='f') field_list = break_down(optarg); else if (c=='h') printf("%s", msg); else if (c=='m') apop_opts.db_engine = 'm'; else if (c=='u') strcpy(apop_opts.db_user, optarg); else if (c=='p') strcpy(apop_opts.db_pass, optarg); else if (c=='r') rownames++; else if (c=='v') apop_opts.verbose=2; else if (c=='O') tab_exists_check++; apop_db_open(argv[optind + 2]); if (tab_exists_check) apop_table_exists(argv[optind+1],1); apop_query("begin;"); apop_text_to_db(argv[optind], argv[optind+1], rownames, colnames, field_names, .field_ends=field_list); apop_query("commit;"); }
int main(int argc, char **argv){ int c; char *msg; int colnames = 'y', rownames = 0, tab_exists_check = 0; char **field_names = NULL; Asprintf(&msg, "Usage: %s [-d delimiters] text_file table_name dbname\n" "\n" "If the input text file name is a single dash, -, then read from STDIN.\n" "Input must be plain ASCII or UTF-8.\n" " -d\t\tthe single-character delimiters to use, e.g., -d \" ,\" or -d \"\\t\" (which you \n" " \t\t\twill almost certainly have to write as -d \"\\\\t\") (default: \"|,\\t\", meaning \n" " \t\t\tthat any of a pipe, comma, or tab will delimit separate entries)\n" " -nc\t\tdata does not include column names\n" " -n regex\t\tcase-insensitive regular expression indicating Null values (default: NaN)\n" " -m\t\tuse a MySQL database (default: SQLite)\n" " -f\t\tfixed width field ends: -f\"3,8,12,17\" (first char is one, not zero)\n" " -u\t\tmysql username\n" " -p\t\tmysql password\n" " -r\t\tdata includes row names\n" " -v\t\tverbosity\n" " -N\t\ta comma-separated list of column names: -N\"apple,banana,carrot,durian\"\n" " -en\t\tif table exists, do nothing and exit\n" " -ed\t\tif table exists, retain the table, delete all data, refill with the new data (i.e., call 'delete * from your_table')\n" " -eo\t\tif table exists, overwrite the table from scratch (deleting the previous table entirely)\n" " -ea\t\tif table exists, append new data to the existing table\n" " -h\t\tdisplay this help and exit\n" "\n" , argv[0]); int * field_list = NULL; char if_exists = 'n'; if(argc<3){ printf("%s", msg); return 0; } while ((c = getopt (argc, argv, "n:d:e:f:hmp:ru:vN:O")) != -1) if (c=='n') { if (optarg[0]=='c') colnames='n'; else apop_opts.nan_string = optarg; } else if (c=='N') { apop_data *field_name_data; apop_regex(optarg, " *([^,]*[^ ]) *(,|$) *", &field_name_data); Apop_stopif(!field_name_data, return 1, 0, "'%s' should be a " "comma-delimited list of field names, but I had trouble " "parsing it as such.", optarg); apop_data_transpose(field_name_data); field_names = field_name_data->text[0]; }
ykl_s make_yule(char const *zila, int y) { static gsl_matrix *indices; if (!indices) { indices = gsl_matrix_calloc(65,1); for (int i=0; i< 64; i++) gsl_matrix_set(indices, i,0, i); } apop_data *col = make_histo(zila, y); apop_data ww = (apop_data) { .weights=col->vector, .matrix=indices }; apop_data *d = apop_data_transpose(col); apop_data *exp = apop_data_rank_expand(d); apop_model *m = apop_estimate(exp, apop_yule); apop_model *n = apop_estimate(exp, apop_lognormal); ykl_s out = (ykl_s) { .yule=apop_data_get(m->parameters, .col=-1/*, .rowname="mu"*/), .ln=apop_data_get(n->parameters, .col=-1/*, .rowname="mu"*/), .lnstderr=sqrt(apop_data_get(n->parameters, .col=-1, .row=1/*, .rowname="mu"*/)), .kl = apop_kl_divergence(apop_estimate(&ww, apop_pmf), m), .lnkl = apop_kl_divergence(apop_estimate(&ww, apop_pmf), n), .mean = apop_matrix_mean(col->matrix) }; apop_data_free(d); apop_data_free(exp); apop_model_free(m); return out; } int main() { printf("zila|year|yule_p|kl_div|mu|ln_mu|ln_sigma|ln_kl\n"); apop_db_open("b.db"); apop_data *zilas = apop_query_to_text("select admname from ppl"); for (int i=0; i< *zilas->textsize; i++) for (int y=2001; y<= 2005; y++) { ykl_s ykl = make_yule(*zilas->text[i], y); printf("%20s| %i| %g| %g| %g| %g| %g|%g\n", *zilas->text[i], y, ykl.yule, ykl.kl, ykl.mean, ykl.ln, ykl.lnstderr, ykl.lnkl); } //apop_plot_histogram(m->data->weights, 64, .output_file="histo"); }