int main(int argc, char **argv){ char c, msg[1000]; int colnames = 'y', rownames = 0, tab_exists_check = 0; char **field_names = NULL; apop_data *field_name_data, *field_name_data_t; sprintf(msg, "%s [-d delimiters] text_file table_name dbname\n" "e.g.: %s -d\",|\" infile.txt a_table info.db\n" "If the input text file name is a single dash, -, then read from STDIN.\n" "Input must be plain ASCII or UTF-8.\n" "-d\t\tThe single-character delimiters to use, e.g., -d \" ,\" or -d \"\\t\" (which you \n" "\t\t\twill almost certainly have to write as -d \"\\\\t\"). Default: \"| ,\\t\", meaning \n" "\t\t\tthat any of a pipe, space, comma, or tab will delimit separate entries\n" "-nc\t\tData does not include column names\n" "-n regex\t\tCase-insensitive regular expression indicating Null values. Default: NaN \n" "-m\t\tUse a mysql database (default: SQLite)\n" "-f\t\tfixed width field ends: -f\"3,8,12,17\" (first char is one, not zero)\n" "-u\t\tmysql username\n" "-p\t\tmysql password\n" "-r\t\tData includes row names\n" "-v\t\tVerbose\n" "-N\t\tA comma-separated list of column names: -N\"apple,banana,carrot,durian\"\n" "-O\t\tIf table exists, erase it and write from scratch (i.e., Overwrite)\n" "-h\t\tPrint this help\n\n" , argv[0], argv[0]); int * field_list = NULL; if(argc<3){ printf("%s", msg); return 0; } while ((c = getopt (argc, argv, "n:d:f:hmp:ru:vN:O")) != -1) if (c=='n') { if (optarg[0]=='c') colnames='n'; else strcpy(apop_opts.db_nan, optarg); } else if (c=='N') { apop_regex(optarg, " *([^,]*[^ ]) *(,|$) *", &field_name_data); field_name_data_t = apop_data_transpose(field_name_data); field_names = field_name_data_t->text[0]; } else if (c=='d') strcpy(apop_opts.input_delimiters, optarg); else if (c=='f') field_list = break_down(optarg); else if (c=='h') printf("%s", msg); else if (c=='m') apop_opts.db_engine = 'm'; else if (c=='u') strcpy(apop_opts.db_user, optarg); else if (c=='p') strcpy(apop_opts.db_pass, optarg); else if (c=='r') rownames++; else if (c=='v') apop_opts.verbose=2; else if (c=='O') tab_exists_check++; apop_db_open(argv[optind + 2]); if (tab_exists_check) apop_table_exists(argv[optind+1],1); apop_query("begin;"); apop_text_to_db(argv[optind], argv[optind+1], rownames, colnames, field_names, .field_ends=field_list); apop_query("commit;"); }
void check_out_impute(char **origin, char **destin, int *imputation_number, char **subset, char **filltabin){ char *filltab = (filltabin && *filltabin) ? *filltabin : "filled"; Tea_stopif(!origin || !*origin, return, 0, "NULL origin table, but I need that."); char *id_column= get_key_word(NULL, "id"); const char *dest = destin ? *destin : NULL; int use_rowids = 0; if (!id_column) { use_rowids++; id_column = strdup("rowid"); } sprintf(apop_opts.db_name_column, "%s", id_column); begin_transaction(); if (dest && strcmp(*origin, dest)){ apop_table_exists(dest, 'd'); apop_query("create table %s as select %s * from %s %s %s", dest, use_rowids ? "rowid as id_col, " : " ", *origin, (subset && *subset) ? "where" : " ", (subset && *subset) ? *subset : " " ); } else dest = *origin; create_index(dest, use_rowids ? "id_col" : id_column); Tea_stopif(!apop_table_exists(filltab), return , 0, "No table named '%s'; did you already doMImpute()?", filltab); apop_data *fills = apop_query_to_text("select %s, field, value from %s where (draw=%i or draw = -1)" , id_column, filltab, *imputation_number); Tea_stopif(!fills || fills->error, return, 0, "Expected fill-in table " "%s, but couldn't query it.", filltab); for(int i=0; i< *fills->textsize; i++){ _Bool is_null = !strcmp(fills->text[i][1], apop_opts.nan_string); char tick = is_null ? ' ' : '\''; apop_query("update %s set %s = %c%s%c " "where cast(%s as numeric) = %s", dest, fills->text[i][0], tick, is_null ? "NULL" : fills->text[i][1], tick, id_column, fills->names->row[i]); } commit_transaction(); apop_data_free(fills); free(id_column); }
//these work by checking that K-L divergence shrunk, and that individual margins are correct. void test_raking_further(){ apop_table_exists("rake_test", 'd'); apop_query("create table rake_test (first, second, weights);" "insert into rake_test values(1, 1, 10);" "insert into rake_test values(1, 2, 2);" "insert into rake_test values(2, 1, 15);" "insert into rake_test values(2, 2, 5);" ); //Synthetic data, starting at all ones. apop_data_print( apop_rake(.margin_table="rake_test", .count_col="weights", .contrasts=(char*[]){"first", "second"}, .contrast_ct=2),
void test_check_out_impute(){ apop_table_exists("testcheckoutbase", 'd'); apop_table_exists("testcheckoutfill", 'd'); apop_table_exists("testcheckoutbase_copy", 'd'); apop_table_exists("tcb", 'd'); apop_query("create table testcheckoutbase (id, a, b); " " insert into testcheckoutbase values(0, 3, 0./0.);" " insert into testcheckoutbase values(1, 0./0., 3);" " insert into testcheckoutbase values(2, 3, 3);" "create table testcheckoutfill (draw, value, id, field); " " insert into testcheckoutfill values(0, 3, 1, 'a');" " insert into testcheckoutfill values(0, 3, 0, 'b');" " insert into testcheckoutfill values(1, 9, 1, 'a');" " insert into testcheckoutfill values(1, 6, 0, 'b');" ); char *strings[] = {"testcheckoutbase", //0 "testcheckoutfill", //1 "testcheckoutbase_copy", //2 "tcb" //3 }; set_key_text("id", NULL, "id"); check_out_impute( strings+0, strings+2, (int[]){0}, NULL, strings+1);
void check_out_impute(char **origin, char **destin, int *imputation_number, char **subset, char **filltabin){ char *filltab = (filltabin && *filltabin) ? *filltabin : "filled"; Apop_stopif(!origin || !*origin, return, 0, "NULL origin table, but I need that."); char *id_column= get_key_word(NULL, "id"); const char *dest = destin ? *destin : NULL; int use_rowids = 0; if (!id_column) { use_rowids++; id_column = strdup("rowid"); } sprintf(apop_opts.db_name_column, "%s", id_column); if (dest){ apop_table_exists(dest, 'd'); apop_query("create table %s as select %s * from %s %s %s", dest, use_rowids ? "rowid as id_col, " : " ", *origin, (subset && *subset) ? "where" : " ", (subset && *subset) ? *subset : " " ); } else dest = *origin; has_sqlite3_index(dest, use_rowids ? "id_col" : id_column, 'y'); Apop_stopif(!apop_table_exists(filltab), return , 0, "No table named '%s'; did you already doMImpute()?", filltab); apop_data *fills = apop_query_to_text("select %s, field, value from %s where draw+0.0=%i" , id_column, filltab, *imputation_number); Apop_stopif(!fills || fills->error, return, 0, "Expected fill-in table " "%s, but couldn't query it.", filltab); begin_transaction(); if (fills) for(int i=0; i< *fills->textsize; i++) apop_query("update %s set %s = '%s' " "where %s = %s", dest, fills->text[i][0], fills->text[i][1], id_column, fills->names->row[i]); commit_transaction(); apop_data_free(fills); free(id_column); }
/** Print an \ref apop_data set to a file, the database, or the screen, as determined by the \c .output_type. \li See \ref apop_prep_output for more on how printing settings are set. \li See \ref Legi for more details and examples. \li See \ref sqlsec for notes on writing an \ref apop_data set to the database. \li This function uses the \ref designated syntax for inputs. \ingroup all_public */ APOP_VAR_HEAD void apop_data_print(const apop_data *data, Output_declares){ const apop_data * apop_varad_var(data, NULL); Dispatch_output APOP_VAR_ENDHEAD if (output_type == 'd'){ if (output_append == 'w') apop_table_exists(output_name, 'd'); apop_data_to_db(data, output_name, output_append); return; } apop_data_print_core(data, output_pipe, output_type); if (data && data->more) { output_append='a'; apop_data_print(data->more, Output_vars); } if (output_name) fclose(output_pipe); }
int check_levenshtein_distances(int max_lev_distance){ int typo_counter=0; int min_distance; char *closest; if (!apop_table_exists("keys")) return 0; apop_data *userkeys = apop_query_to_text("select key from keys"); for (int i=0; i < *userkeys->textsize; i++){ min_distance = 100; for (char **keyptr=ok_keys; strlen(*keyptr); keyptr++){ int ld = levenshtein_distance(*keyptr, *userkeys->text[i]); if (ld < min_distance){ if(ld == 0) {min_distance=0; break;} min_distance=ld; closest = *keyptr; } } Apop_stopif(min_distance > 0 && min_distance <= max_lev_distance, typo_counter++ , 0, "You wrote %s for one of the keys in your spec file. Did you " "mean to write %s?", *userkeys->text[i], closest); } return typo_counter; }
int main(int argc, char **argv){ char c, msg[1000]; int colnames = 1, rownames = 0, tab_exists_check = 0; sprintf(msg, "%s [-d delimiters] text_file table_name dbname\n" "e.g.: %s -d\",|\" infile.txt a_table info.db\n" "If the input text file name is a single dash, -, then read from STDIN.\n" "Input must be plain ASCII or UTF-8.\n" "-d\t\tThe single-character delimiters to use, e.g., -d \" ,\" or -d \"\\t\" (which you \n" "\t\t\twill almost certainly have to write as -d \"\\\\t\"). Default: \"| ,\\t\", meaning \n" "\t\t\tthat any of a pipe, space, comma, or tab will delimit separate entries\n" "-nc\t\tData does not include column names\n" "-n regex\t\tCase-insensitive regular expression indicating Null values. Default: NaN \n" "-m\t\tUse a mysql database (default: SQLite)\n" "-f\t\tfixed width field ends: -f\"3,8,12,17\" (first char is one, not zero)\n" "-u\t\tmysql username\n" "-p\t\tmysql password\n" "-r\t\tData includes row names\n" "-v\t\tVerbose\n" "-O\t\tIf table exists, erase it and write from scratch (i.e., Overwrite)\n" "-h\t\tPrint this help\n\n" , argv[0], argv[0]); int * field_list = NULL; if(argc<3){ printf("%s", msg); return 0; } while ((c = getopt (argc, argv, "n:d:f:hmp:ru:vO")) != -1){ switch (c){ case 'n': if (optarg[0]=='c') colnames --; else strcpy(apop_opts.db_nan, optarg); break; case 'd': strcpy(apop_opts.input_delimiters, optarg); break; case 'f': field_list = break_down(optarg); break; case 'h': printf("%s", msg); return 0; case 'm': apop_opts.db_engine = 'm'; break; case 'u': strcpy(apop_opts.db_user, optarg); break; case 'p': strcpy(apop_opts.db_pass, optarg); break; case 'r': rownames ++; break; case 'v': apop_opts.verbose ++; break; case 'O': tab_exists_check ++; break; } } apop_db_open(argv[optind + 2]); if (tab_exists_check) apop_table_exists(argv[optind+1],1); apop_query("begin;"); apop_text_to_db(argv[optind], argv[optind+1], rownames,colnames, NULL, .field_ends=field_list); apop_query("commit;"); }