Example #1
0
File: checkout.c Project: b-k/tea
void check_out_impute(char **origin, char **destin, int *imputation_number, char **subset, char **filltabin){
    char *filltab = (filltabin && *filltabin) ? *filltabin : "filled";
    Tea_stopif(!origin || !*origin, return, 0, "NULL origin table, but I need that.");
    char *id_column= get_key_word(NULL, "id");
    const char *dest = destin ? *destin : NULL;
    int use_rowids = 0;
    if (!id_column) {
        use_rowids++;
        id_column = strdup("rowid");
    }
    sprintf(apop_opts.db_name_column, "%s",  id_column);
    begin_transaction();
    if (dest && strcmp(*origin, dest)){
        apop_table_exists(dest, 'd');
        apop_query("create table %s as select %s * from %s %s %s", 
                        dest, 
                        use_rowids ? "rowid as id_col, " : " ", *origin,
                        (subset && *subset) ? "where" : " ",
                        (subset && *subset) ? *subset : " "
                        );
    } else dest = *origin;
    create_index(dest, use_rowids ? "id_col" : id_column);
    Tea_stopif(!apop_table_exists(filltab), return , 0, "No table named '%s'; did you already doMImpute()?", filltab);
    apop_data *fills = apop_query_to_text("select %s, field, value from %s where (draw=%i or draw = -1)"
                                              , id_column, filltab, *imputation_number);
    Tea_stopif(!fills || fills->error, return, 0, "Expected fill-in table "
                "%s, but couldn't query it.", filltab);
    for(int i=0; i< *fills->textsize; i++){
        _Bool is_null = !strcmp(fills->text[i][1], apop_opts.nan_string);
        char tick = is_null ? ' ' : '\'';
        apop_query("update %s set %s = %c%s%c "
                   "where cast(%s as numeric) = %s", 
                      dest, fills->text[i][0], 
                      tick, is_null ? "NULL" : fills->text[i][1], tick,
                      id_column, fills->names->row[i]);
    }
    commit_transaction();
    apop_data_free(fills);
    free(id_column);
}
Example #2
0
ykl_s make_yule(char const *zila, int y) {
    static gsl_matrix *indices;
    if (!indices) {
        indices = gsl_matrix_calloc(65,1);
        for (int i=0; i< 64; i++) gsl_matrix_set(indices, i,0, i);
    }
    apop_data *col = make_histo(zila, y);
    apop_data ww = (apop_data) {
        .weights=col->vector, .matrix=indices
    };
    apop_data *d = apop_data_transpose(col);
    apop_data *exp = apop_data_rank_expand(d);
    apop_model *m = apop_estimate(exp, apop_yule);
    apop_model *n = apop_estimate(exp, apop_lognormal);
    ykl_s out = (ykl_s) {
        .yule=apop_data_get(m->parameters, .col=-1/*, .rowname="mu"*/),
         .ln=apop_data_get(n->parameters, .col=-1/*, .rowname="mu"*/),
          .lnstderr=sqrt(apop_data_get(n->parameters, .col=-1, .row=1/*, .rowname="mu"*/)),
           .kl = apop_kl_divergence(apop_estimate(&ww, apop_pmf), m),
            .lnkl = apop_kl_divergence(apop_estimate(&ww, apop_pmf), n),
             .mean = apop_matrix_mean(col->matrix)
    };
    apop_data_free(d);
    apop_data_free(exp);
    apop_model_free(m);
    return out;
}

int main() {
    printf("zila|year|yule_p|kl_div|mu|ln_mu|ln_sigma|ln_kl\n");
    apop_db_open("b.db");
    apop_data *zilas = apop_query_to_text("select admname from ppl");
    for (int i=0; i< *zilas->textsize; i++)
        for (int y=2001; y<= 2005; y++) {
            ykl_s ykl = make_yule(*zilas->text[i], y);
            printf("%20s| %i| %g| %g| %g| %g| %g|%g\n", *zilas->text[i], y, ykl.yule, ykl.kl, ykl.mean, ykl.ln, ykl.lnstderr, ykl.lnkl);
        }
    //apop_plot_histogram(m->data->weights, 64, .output_file="histo");
}
Example #3
0
int check_levenshtein_distances(int max_lev_distance){
    int typo_counter=0;
    int min_distance;
    char *closest;
    if (!apop_table_exists("keys")) return 0;
    apop_data *userkeys = apop_query_to_text("select key from keys");
    for (int i=0; i < *userkeys->textsize; i++){
        min_distance = 100;
        for (char **keyptr=ok_keys; strlen(*keyptr); keyptr++){
            int ld = levenshtein_distance(*keyptr, *userkeys->text[i]);

            if (ld < min_distance){
                if(ld == 0) {min_distance=0; break;}
                min_distance=ld;
                closest = *keyptr;    
            }
        }
        Apop_stopif(min_distance > 0 && min_distance <= max_lev_distance, typo_counter++ , 0, 
                            "You wrote %s for one of the keys in your spec file. Did you "
                            "mean to write %s?", *userkeys->text[i], closest);
    }
    return typo_counter;
}
Example #4
0
void check_out_impute(char **origin, char **destin, int *imputation_number, char **subset, char **filltabin){
    char *filltab = (filltabin && *filltabin) ? *filltabin : "filled";
    Apop_stopif(!origin || !*origin, return, 0, "NULL origin table, but I need that.");
    char *id_column= get_key_word(NULL, "id");
    const char *dest = destin ? *destin : NULL;
    int use_rowids = 0;
    if (!id_column) {
        use_rowids++;
        id_column = strdup("rowid");
    }
    sprintf(apop_opts.db_name_column, "%s",  id_column);
    if (dest){
        apop_table_exists(dest, 'd');
        apop_query("create table %s as select %s * from %s %s %s", 
                        dest, 
                        use_rowids ? "rowid as id_col, " : " ", *origin,
                        (subset && *subset) ? "where" : " ",
                        (subset && *subset) ? *subset : " "
                        );
    } else dest = *origin;
    has_sqlite3_index(dest, use_rowids ? "id_col" : id_column, 'y');
    Apop_stopif(!apop_table_exists(filltab), return , 0, "No table named '%s'; did you already doMImpute()?", filltab);
    apop_data *fills = apop_query_to_text("select %s, field, value from %s where draw+0.0=%i"
                                              , id_column, filltab, *imputation_number);
    Apop_stopif(!fills || fills->error, return, 0, "Expected fill-in table "
                "%s, but couldn't query it.", filltab);
    begin_transaction();
    if (fills)
        for(int i=0; i< *fills->textsize; i++)
            apop_query("update %s set %s = '%s' "
                       "where %s = %s", 
                          dest, fills->text[i][0], fills->text[i][1], 
                          id_column, fills->names->row[i]);
    commit_transaction();
    apop_data_free(fills);
    free(id_column);
}
Example #5
0
/** This function creates a series of spec files with paste in macros used
  * instead of normal keys. The tests will ensure that the correct keys are 
  * getting written to the keys table by running read_spec() and then using 
  * apop functions to verify that the keys are indeed in the spec file
  */
void pastein_tests(){

char *spec1;
asprintf(&spec1, "1.spec");

char *spec2;
asprintf(&spec2, "2.spec");

char *spec3;
asprintf(&spec3, "3.spec");
        
char *spec4;
asprintf(&spec4, "4.spec");

char *spec5;
asprintf(&spec5, "5.spec");
        
    /* Standard test here: creating a macro with a few sub keys and calling it on its own
     * in the impute key. If something goes wrong here it's because there's something
     * fundamentally wrong with the paste in macro (because there's only one so there's
     * nothing too complex going on).
     */
     write_a_file(spec1,
     "\n"
     "database: demo.db\n"
     "verbose: 2\n"
     "catagesex{\n"
     "  min group size: 3\n"
     "  draw count: 3\n"
     "  seed: 2332\n"
     "  categories {\n"
     "      CATAGE\n"
     "      SEX\n"
     "  }\n"
     "}\n"
     "\n"
     "input {\n"
     "    input file: dc_pums_08.csv\n"
     "    output table: dc \n "
     "    overwrite: y \n "
     "} \n "
     " \n"
     "fields { \n"
     "SCHL: int 0-24 \n"
     "WAGP: real\n"
     "\n}"
     "impute{\n"
     "  input table: viewdc\n"
     "  output table: imputes\n"
     "  paste in: catagesex\n"
     "  method: hot deck\n"
     "  output vars: SCHL\n"
     "}\n"
     "impute{\n"
     "  input table: viewdc\n"
     "  output table: imputes\n"
     "  paste in: catagesex\n"
     "  method: hot deck\n"
     "  output vars: WAGP\n"
     "}\n"
     );

     /* Creating test here that uses two macros that are used concurrently but that do not
      * call each other. tables{...} and catagesex{...} are each used in impute{...} but
      * they do not "paste each other in". This will be tested in spec 3.
      */
     write_a_file(spec2,
     "\n"
     "database: demo.db\n"
     "verbose: 2\n"
     "catagesex{\n"
     "  min group size: 3\n"
     "  draw count: 3\n"
     "  seed: 2332\n"
     "  categories {\n"
     "      CATAGE\n"
     "      SEX\n"
     "  }\n"
     "}\n"
     "tables{\n"
     "  input table: viewdc\n"
     "  output table: impuTable\n" //To account for analysts who like camel case
     "}\n"
     "\n"
     "input {\n"
     "    paste in: tables\n"
     "    input file: dc_pums_08.csv\n"
     "    output table: dc \n "
     "    overwrite: y \n "
     "} \n "
     " \n"
     "fields { \n"
     "SCHL: int 0-24 \n"
     "WAGP: real\n"
     "\n}"
     "impute{\n"
     "  paste in: tables\n"
     "  paste in: catagesex\n"
     "  method: hot deck\n"
     "  output vars: SCHL\n"
     "}\n"
     "impute{\n"
     "  paste in: tables\n"
     "  paste in: catagesex\n"
     "  method: hot deck\n"
     "  output vars: WAGP\n"
     "}\n"
     );

    /* More complicated test that tests the ability of a macro to use another macro in its
     * own definition. For instance, it tests something along the lines of
     * catagesex{paste in: impute stuff \n paste in: categories}
     */
     write_a_file(spec3,
     "\n"
     "database: demo.db\n"
     "verbose: 2\n"
     "imputestuff{\n"
     "  min group size: 3\n"
     "  draw count: 3\n"
     "  seed: 2332\n"
     "}\n"
     "categoriesstuff {\n"
     "  categories{\n"
     "      CATAGE\n"
     "      SEX\n"
     "  }\n"
     "}\n"
     "catagesex{\n"
     "  paste in: imputestuff\n"
     "  paste in: categoriesstuff\n"
     "}\n"
     "\n"
     "input {\n"
     "    input file: dc_pums_08.csv\n"
     "    output table: dc \n "
     "    overwrite: y \n "
     "} \n "
     " \n"
     "fields { \n"
     "SCHL: int 0-24 \n"
     "WAGP: real\n"
     "\n}"
     "impute{\n"
     "  input table: viewdc\n"
     "  output table: imputes\n"
     "  paste in: catagesex\n"
     "  method: hot deck\n"
     "  output vars: SCHL\n"
     "}\n"
     "impute{\n"
     "  input table: viewdc\n"
     "  output table: imputes\n"
     "  paste in: catagesex\n"
     "  method: hot deck\n"
     "  output vars: WAGP\n"
     "}\n"
     );

    /* Tests whether it's possible to create a macro that comprises the entire spec file
     * (which, of course, is then pasted in on its own). This includes other macros that
     * are written within the overarching macro itself. Possibly overkill? But I think
     * it's worth it to test given that different analysts might include big portions of
     * the spec file separately and could decide to use a macro to do so.
     */
     write_a_file(spec4,
     "\n"
     "database: demo.db\n"
     "wholeSpec{\n"
     "verbose: 2\n"
     "catagesex{\n"
     "  min group size: 3\n"
     "  draw count: 3\n"
     "  seed: 2332\n"
     "  categories {\n"
     "      CATAGE\n"
     "      SEX\n"
     "  }\n"
     "}\n"
     "\n"
     "input {\n"
     "    input file: dc_pums_08.csv\n"
     "    output table: dc \n "
     "    overwrite: y \n "
     "} \n "
     " \n"
     "fields { \n"
     "SCHL: int 0-24 \n"
     "WAGP: real\n"
     "\n}"
     "impute{\n"
     "  input table: viewdc\n"
     "  output table: imputes\n"
     "  paste in: catagesex\n"
     "  method: hot deck\n"
     "  output vars: SCHL\n"
     "}\n"
     "impute{\n"
     "  input table: viewdc\n"
     "  output table: imputes\n"
     "  paste in: catagesex\n"
     "  method: hot deck\n"
     "  output vars: WAGP\n"
     "}\n"
     "}\n"
     "paste in: wholeSpec\n"
     );

char *db_dummy;

     char *imp_min_grp, *imp_drw_cnt, *imp_seed, *imp_categories;

     read_spec(&spec1, &db_dummy);
     asprintf(&imp_min_grp, "impute/min group size");
     asprintf(&imp_drw_cnt, "impute/draw count");
     asprintf(&imp_seed, "impute/seed");
     asprintf(&imp_categories, "impute/categories");

     apop_data *spec1_keys1 = apop_query_to_text("select * from keys where key like "
             "'impute/m%%'");
     printf("spec1_keys1->text[0][0] is given by: %s.\n", spec1_keys1->text[0][0]);
     assert(!strcmp(imp_min_grp, spec1_keys1->text[0][0]));


     apop_data *spec1_keys2 = apop_query_to_text("select * from keys where key like "
             "'impute/d%%'");
     printf("spec1_keys2->text[0][0] is given by: %s.\n", spec1_keys2->text[0][0]);
     assert(!strcmp(imp_drw_cnt, spec1_keys2->text[0][0]));


     apop_data *spec1_keys3 = apop_query_to_text("select * from keys where key like "
             "'impute/s%%'");
     printf("spec1_keys3->text[0][0] is given by: %s.\n", spec1_keys3->text[0][0]);
     assert(!strcmp(imp_seed, spec1_keys3->text[0][0]));

     
     apop_data *spec1_keys4 = apop_query_to_text("select * from keys where key like "
             "'impute/c%%'");
     printf("spec1_keys4->text[0][0] is given by: %s.\n", spec1_keys4->text[0][0]);
     assert(!strcmp(imp_categories, spec1_keys4->text[0][0]));
     
     apop_data_free(spec1_keys1);
     apop_data_free(spec1_keys2);
     apop_data_free(spec1_keys3);
     apop_data_free(spec1_keys4);
      

     read_spec(&spec2, &db_dummy);
     char *inpt_inpt_table;
     char *inpt_otpt_table;

     asprintf(&inpt_inpt_table, "input/input table");
     asprintf(&inpt_otpt_table, "input/output table");

     apop_data *spec2_keys1 = apop_query_to_text("select * from keys where key like "
             "'impute/m%%'");
     printf("spec2_keys1->text[0][0] is given by: %s.\n", spec2_keys1->text[0][0]);
     assert(!strcmp(imp_min_grp, spec2_keys1->text[0][0]));


     apop_data *spec2_keys2 = apop_query_to_text("select * from keys where key like "
             "'impute/d%%'");
     printf("spec2_keys2->text[0][0] is given by: %s.\n", spec2_keys2->text[0][0]);
     assert(!strcmp(imp_drw_cnt, spec2_keys2->text[0][0]));


     apop_data *spec2_keys3 = apop_query_to_text("select * from keys where key like "
             "'impute/s%%'");
     printf("spec2_keys3->text[0][0] is given by: %s.\n", spec2_keys3->text[0][0]);
     assert(!strcmp(imp_seed, spec2_keys3->text[0][0]));

     
     apop_data *spec2_keys4 = apop_query_to_text("select * from keys where key like "
             "'impute/c%%'");
     printf("spec2_keys4->text[0][0] is given by: %s.\n", spec2_keys4->text[0][0]);
     assert(!strcmp(imp_categories, spec2_keys4->text[0][0]));
     
     apop_data *spec2_keys5 = apop_query_to_text("select * from keys where key like "
             "'input/input t%%'");
     printf("spec2_keys5->text[0][0] is given by: %s.\n", spec2_keys5->text[0][0]);
     assert(!strcmp(inpt_inpt_table, spec2_keys5->text[0][0]));


     apop_data *spec2_keys6 = apop_query_to_text("select * from keys where key like "
             "'input/output t%%'");
     printf("spec2_keys6->text[0][0] is given by: %s.\n", spec2_keys6->text[0][0]);
     assert(!strcmp(inpt_otpt_table, spec2_keys6->text[0][0]));

     apop_data_free(spec2_keys1);
     apop_data_free(spec2_keys2);
     apop_data_free(spec2_keys3);
     apop_data_free(spec2_keys4);
     apop_data_free(spec2_keys5);
     apop_data_free(spec2_keys6);

     read_spec(&spec3, &db_dummy);
     
     apop_data *spec3_keys1 = apop_query_to_text("select * from keys where key like "
             "'impute/m%%'");
     printf("spec3_keys1->text[0][0] is given by: %s.\n", spec3_keys1->text[0][0]);
     assert(!strcmp(imp_min_grp, spec3_keys1->text[0][0]));


     apop_data *spec3_keys2 = apop_query_to_text("select * from keys where key like "
             "'impute/d%%'");
     printf("spec3_keys2->text[0][0] is given by: %s.\n", spec3_keys2->text[0][0]);
     assert(!strcmp(imp_drw_cnt, spec3_keys2->text[0][0]));


     apop_data *spec3_keys3 = apop_query_to_text("select * from keys where key like "
             "'impute/s%%'");
     printf("spec3_keys3->text[0][0] is given by: %s.\n", spec3_keys3->text[0][0]);
     assert(!strcmp(imp_seed, spec3_keys3->text[0][0]));

     
     apop_data *spec3_keys4 = apop_query_to_text("select * from keys where key like "
             "'impute/c%%'");
     printf("spec3_keys4->text[0][0] is given by: %s.\n", spec3_keys4->text[0][0]);
     assert(!strcmp(imp_categories, spec3_keys4->text[0][0]));

     apop_data_free(spec3_keys1);
     apop_data_free(spec3_keys2);
     apop_data_free(spec3_keys3);
     apop_data_free(spec3_keys4);

     /* This is spec file that tests whether paste in works for pasting in the entire spec
      * file (without the database -- pasting in database has not been tested yet). spec 4
      * paste in stuff is tested by just testing for an assortment of keys.
      */
     read_spec(&spec4, &db_dummy);


     /* DV - ATTENTION:
      * This test is failing right now so I've put in an if statement below to exit when
      * there's no impute key to avoid a segfault in the testing. We need to fix the bug
      * that is preventing paste in from allowing an entire spec file (minus the database)
      * to be pasted in.
      */


     apop_data *spec4_keys1 = apop_query_to_text("select * from keys where key like "
             "'impute/m%%'");

     if(get_key_word("impute", NULL) == NULL) return;
     printf("spec4_keys1->text[0][0] is given by: %s.\n", spec4_keys1->text[0][0]);
     assert(!strcmp(imp_min_grp, spec4_keys1->text[0][0]));


     apop_data *spec4_keys2 = apop_query_to_text("select * from keys where key like "
             "'impute/d%%'");
     printf("spec4_keys2->text[0][0] is given by: %s.\n", spec4_keys2->text[0][0]);
     assert(!strcmp(imp_drw_cnt, spec4_keys2->text[0][0]));


     apop_data *spec4_keys3 = apop_query_to_text("select * from keys where key like "
             "'impute/s%%'");
     printf("spec4_keys3->text[0][0] is given by: %s.\n", spec4_keys3->text[0][0]);
     assert(!strcmp(imp_seed, spec4_keys3->text[0][0]));

     
     apop_data *spec4_keys4 = apop_query_to_text("select * from keys where key like "
             "'impute/c%%'");
     printf("spec4_keys4->text[0][0] is given by: %s.\n", spec4_keys4->text[0][0]);
     assert(!strcmp(imp_categories, spec4_keys4->text[0][0]));
     
     apop_data *spec4_keys5 = apop_query_to_text("select * from keys where key like "
             "'input/input t%%'");
     printf("spec4_keys5->text[0][0] is given by: %s.\n", spec4_keys5->text[0][0]);
     assert(!strcmp(inpt_inpt_table, spec4_keys5->text[0][0]));


     apop_data *spec4_keys6 = apop_query_to_text("select * from keys where key like "
             "'input/output t%%'");
     printf("spec4_keys6->text[0][0] is given by: %s.\n", spec4_keys6->text[0][0]);
     assert(!strcmp(inpt_otpt_table, spec4_keys6->text[0][0]));

     apop_data_free(spec4_keys1);
     apop_data_free(spec4_keys2);
     apop_data_free(spec4_keys3);
     apop_data_free(spec4_keys4);
     apop_data_free(spec4_keys5);
     apop_data_free(spec4_keys6);

     free(imp_min_grp);
     free(imp_drw_cnt);
     free(imp_seed);
     free(imp_categories);
     free(inpt_inpt_table);
     free(inpt_otpt_table);
     free(spec1);
     free(spec2);
     free(spec3);
     free(spec4);
     free(spec5);

     printf("Reached end of test.\n");

}