Esempio n. 1
0
int main(int argc, char **argv){
    char c, msg[1000];
    int colnames = 'y',
        rownames = 0,
        tab_exists_check = 0;
    char **field_names = NULL;
    apop_data *field_name_data, *field_name_data_t;

	sprintf(msg, "%s [-d delimiters] text_file table_name dbname\n"
                "e.g.: %s -d\",|\" infile.txt a_table info.db\n"
"If the input text file name is a single dash, -, then read from STDIN.\n"
"Input must be plain ASCII or UTF-8.\n"
"-d\t\tThe single-character delimiters to use, e.g., -d \" ,\" or -d \"\\t\" (which you \n"
"\t\t\twill almost certainly have to write as -d \"\\\\t\"). Default: \"| ,\\t\", meaning \n"
"\t\t\tthat any of a pipe, space, comma, or tab will delimit separate entries\n"
"-nc\t\tData does not include column names\n"
"-n regex\t\tCase-insensitive regular expression indicating Null values. Default: NaN \n"
"-m\t\tUse a mysql database (default: SQLite)\n"
"-f\t\tfixed width field ends: -f\"3,8,12,17\" (first char is one, not zero)\n"
"-u\t\tmysql username\n"
"-p\t\tmysql password\n"
"-r\t\tData includes row names\n"
"-v\t\tVerbose\n"
"-N\t\tA comma-separated list of column names: -N\"apple,banana,carrot,durian\"\n"
"-O\t\tIf table exists, erase it and write from scratch (i.e., Overwrite)\n"
"-h\t\tPrint this help\n\n"
, argv[0], argv[0]); 
    int * field_list = NULL;

	if(argc<3){
		printf("%s", msg);
		return 0;
	}
	while ((c = getopt (argc, argv, "n:d:f:hmp:ru:vN:O")) != -1)
        if (c=='n') {
              if (optarg[0]=='c')
			    colnames='n';
              else
                strcpy(apop_opts.db_nan, optarg);
        }
		else if (c=='N') {
            apop_regex(optarg, " *([^,]*[^ ]) *(,|$) *", &field_name_data);
            field_name_data_t = apop_data_transpose(field_name_data);
            field_names = field_name_data_t->text[0];
        }
        else if (c=='d') strcpy(apop_opts.input_delimiters, optarg);
		else if (c=='f') field_list = break_down(optarg);
		else if (c=='h') printf("%s", msg);
		else if (c=='m') apop_opts.db_engine = 'm';
		else if (c=='u') strcpy(apop_opts.db_user, optarg);
		else if (c=='p') strcpy(apop_opts.db_pass, optarg);
		else if (c=='r') rownames++;
		else if (c=='v') apop_opts.verbose=2;
		else if (c=='O') tab_exists_check++;
	apop_db_open(argv[optind + 2]);
    if (tab_exists_check) apop_table_exists(argv[optind+1],1);
    apop_query("begin;");
	apop_text_to_db(argv[optind], argv[optind+1], rownames, colnames, field_names, .field_ends=field_list);
    apop_query("commit;");
}
Esempio n. 2
0
int main(){
    apop_db_open("data-corruption.db");
    apop_data *corrupt = apop_db_to_crosstab("cpi", "country", "year", "score");
    apop_data *clean   = apop_data_listwise_delete(corrupt);
    apop_model *mlv = apop_estimate(clean, apop_multivariate_normal);
    apop_ml_imputation(corrupt, mlv);
    apop_crosstab_to_db(corrupt, "cpi_clean", "country", "year", "score");
}
Esempio n. 3
0
int main(int argc, char **argv){
char		c,  verbose=0,
		    *delimiter,
            *outfile    = NULL,
		    msg[1000];
apop_data	*m;

	sprintf(msg, "%s [opts] dbname table_name rows columns data\n\n"
            "-d\tdelimiter\t\tdefault= \"|,<space><tab>\"\n"
            "-a\tappend\t\t\tdefault= append\n"
            "-o\toverwrite\t\tdefault= append\n"
            "-v\tverbose: prints status info on stderr and raises apop_opts.verbose by one for each use (so use -v -v for extra-verbose)\n"
            "-f\tfile to dump to\t\tdefault=STDOUT\n", argv[0]); 

	if(argc<5){
		printf("%s", msg);
		return 0;
	}
	delimiter	= malloc(5);
	strcpy(delimiter, ",");
	while ((c = getopt (argc, argv, "ad:f:ho")) != -1){
		switch (c){
		  case 'a':
              apop_opts.output_append = 1;
			  break;
		  case 'd':
			  strcpy(apop_opts.output_delimiter,optarg);
			  break;
		  case 'o':
              apop_opts.output_append = 0;
			  break;
		  case 'f':
              outfile   = malloc(1000);
			  sprintf(outfile, "%s", optarg);
			  apop_opts.output_type	= 'f';
			  break;
		  case 'h':
			printf("%s", msg);
			return 0;
		  case 'v':
            verbose++;
            apop_opts.verbose++;
            break;
		}
	}
    Apop_assert(optind+4 <= argc, "I need five arguments past the options: database, table, row col, column col, data col");
    if (verbose){
        fprintf(stderr, "database:%s\ntable: %s\nrow col: %s\ncol col:%s\ndata col:%s\n",
            argv[optind], argv[optind +1], argv[optind+2], argv[optind+3], argv[optind+4]);
        if (outfile) fprintf(stderr, "outfile: %s\n", outfile);
        else  fprintf(stderr, "output to stdout\n");
        if (apop_opts.output_append) fprintf(stderr, "appending to output\n");
        else fprintf(stderr, "overwriting output\n");
    }
	apop_db_open(argv[optind]);
	m	= apop_db_to_crosstab(argv[optind +1], argv[optind+2], argv[optind+3], argv[optind+4]);
	apop_data_print(m, outfile);
}
Esempio n. 4
0
apop_data *query_data(){
    apop_db_open("data-census.db");
    return apop_query_to_data(" select postcode as row_names, "
                        " m_per_100_f, population/1e6 as population, median_age "
                        " from geography, income,demos,postcodes "
                        " where income.sumlevel= '040' "
                        " and geography.geo_id = demos.geo_id  "
                        " and income.geo_name = postcodes.state "
                        " and geography.geo_id = income.geo_id ");
}
Esempio n. 5
0
gsl_matrix *query(char *d, char *q, int no_plot){
	apop_db_open(d);
    apop_data *result = apop_query_to_data("%s", q);
	apop_db_close(0);
    Apop_stopif(!result, exit(2), 0, "Your query returned a blank table. Quitting.");
    Apop_stopif(result->error, exit(2), 0, "Error running your query. Quitting.");
    if (no_plot){
        apop_data_show(result);
        exit(0);
    }
    return result->matrix;
}
Esempio n. 6
0
int main(){
    apop_db_open("data-climate.db");

    apop_data *data = apop_query_to_data("select pcp from precip");
    apop_data_pmf_compress(data); //creates a weights vector
    apop_vector_normalize(data->weights);
    apop_data_sort(data);
    apop_model *pmf = apop_estimate(data, apop_pmf);
    FILE *outfile = fopen("out.h", "w");
    apop_model_print(pmf, outfile);
    apop_model *kernel = apop_model_set_parameters(apop_normal, 0., 0.1);
    apop_model *k = apop_model_copy(apop_kernel_density);
    Apop_settings_add_group(k, apop_kernel_density, .base_pmf=pmf, .kernel=kernel);
    plot(k, "out.k");
    printf("plot 'out.h' with lines title 'data', 'out.k' with lines title 'smoothed'\n");
}
Esempio n. 7
0
int main(){
  int       rep_ct  = 10000;
  gsl_rng   *r      = apop_rng_alloc(0);
    apop_db_open("data-census.db");
    gsl_vector *base_data    = apop_query_to_vector("select in_per_capita from income where sumlevel+0.0 =40");
    double      RI           = apop_query_to_float("select in_per_capita from income where sumlevel+0.0 =40 and geo_id2+0.0=44");
    gsl_vector *boot_sample  =  gsl_vector_alloc(base_data->size);
    gsl_vector *replications = gsl_vector_alloc(rep_ct);
    for (int i=0; i< rep_ct; i++){
        one_boot(base_data, r, boot_sample);
        gsl_vector_set(replications, i, apop_mean(boot_sample));
    }
    double stderror = sqrt(apop_var(replications));
    double mean     = apop_mean(replications);
    printf("mean: %g; standard error: %g; (RI-mean)/stderr: %g; p value: %g\n",
       mean, stderror, (RI-mean)/stderror, 2*gsl_cdf_gaussian_Q(fabs(RI-mean), stderror));
}
Esempio n. 8
0
int main(){
    apop_db_open("data-census.db");
    gsl_vector *n = apop_query_to_vector("select in_per_capita from income "
            "where state= (select state from geography where name ='North Dakota')");
    gsl_vector *s = apop_query_to_vector("select in_per_capita from income  "
            "where state= (select state from geography where name ='South Dakota')");

    double n_count = n->size,
           n_mean  = apop_vector_mean(n),
           n_var   = apop_vector_var(n),
           s_count = s->size,
           s_mean  = apop_vector_mean(s),
           s_var   = apop_vector_var(s);

    double  stat      = fabs(n_mean - s_mean)/ sqrt(n_var/ (n_count-1) + s_var/(s_count-1));
    double confidence = 1 - (2 * gsl_cdf_tdist_Q(stat, n_count + s_count -2));
    printf("Reject the null with %g%% confidence\n", confidence*100);
}
Esempio n. 9
0
int main(){
    char outfile[] = "scatter.gplot";

    apop_db_open("data-metro.db");
    apop_data *data = apop_query_to_data("select riders, year from riders where station like 'Silver%%' and riders>0");
    apop_db_close();

    //The regression destroys your data, so copy it first.
    apop_data *data_copy = apop_data_copy(data);

    //Run OLS, display results on terminal
    apop_model *est = apop_estimate(data, apop_OLS);
    apop_model_show(est);

    //Prep the file with a header, then call the function.
    FILE *f = fopen(outfile, "w");
    fprintf(f,"set term postscript;\n set output \"scatter.eps\"\n set yrange [0:*]\n");
    apop_plot_line_and_scatter(data_copy, est, .output_pipe=f);
    fclose(f);
}
Esempio n. 10
0
ykl_s make_yule(char const *zila, int y) {
    static gsl_matrix *indices;
    if (!indices) {
        indices = gsl_matrix_calloc(65,1);
        for (int i=0; i< 64; i++) gsl_matrix_set(indices, i,0, i);
    }
    apop_data *col = make_histo(zila, y);
    apop_data ww = (apop_data) {
        .weights=col->vector, .matrix=indices
    };
    apop_data *d = apop_data_transpose(col);
    apop_data *exp = apop_data_rank_expand(d);
    apop_model *m = apop_estimate(exp, apop_yule);
    apop_model *n = apop_estimate(exp, apop_lognormal);
    ykl_s out = (ykl_s) {
        .yule=apop_data_get(m->parameters, .col=-1/*, .rowname="mu"*/),
         .ln=apop_data_get(n->parameters, .col=-1/*, .rowname="mu"*/),
          .lnstderr=sqrt(apop_data_get(n->parameters, .col=-1, .row=1/*, .rowname="mu"*/)),
           .kl = apop_kl_divergence(apop_estimate(&ww, apop_pmf), m),
            .lnkl = apop_kl_divergence(apop_estimate(&ww, apop_pmf), n),
             .mean = apop_matrix_mean(col->matrix)
    };
    apop_data_free(d);
    apop_data_free(exp);
    apop_model_free(m);
    return out;
}

int main() {
    printf("zila|year|yule_p|kl_div|mu|ln_mu|ln_sigma|ln_kl\n");
    apop_db_open("b.db");
    apop_data *zilas = apop_query_to_text("select admname from ppl");
    for (int i=0; i< *zilas->textsize; i++)
        for (int y=2001; y<= 2005; y++) {
            ykl_s ykl = make_yule(*zilas->text[i], y);
            printf("%20s| %i| %g| %g| %g| %g| %g|%g\n", *zilas->text[i], y, ykl.yule, ykl.kl, ykl.mean, ykl.ln, ykl.lnstderr, ykl.lnkl);
        }
    //apop_plot_histogram(m->data->weights, 64, .output_file="histo");
}
Esempio n. 11
0
int main(int argc, char **argv){
    char c, msg[1000];
    int colnames = 1,
        rownames = 0,
        tab_exists_check = 0;

	sprintf(msg, "%s [-d delimiters] text_file table_name dbname\n"
                "e.g.: %s -d\",|\" infile.txt a_table info.db\n"
"If the input text file name is a single dash, -, then read from STDIN.\n"
"Input must be plain ASCII or UTF-8.\n"
"-d\t\tThe single-character delimiters to use, e.g., -d \" ,\" or -d \"\\t\" (which you \n"
"\t\t\twill almost certainly have to write as -d \"\\\\t\"). Default: \"| ,\\t\", meaning \n"
"\t\t\tthat any of a pipe, space, comma, or tab will delimit separate entries\n"
"-nc\t\tData does not include column names\n"
"-n regex\t\tCase-insensitive regular expression indicating Null values. Default: NaN \n"
"-m\t\tUse a mysql database (default: SQLite)\n"
"-f\t\tfixed width field ends: -f\"3,8,12,17\" (first char is one, not zero)\n"
"-u\t\tmysql username\n"
"-p\t\tmysql password\n"
"-r\t\tData includes row names\n"
"-v\t\tVerbose\n"
"-O\t\tIf table exists, erase it and write from scratch (i.e., Overwrite)\n"
"-h\t\tPrint this help\n\n"
, argv[0], argv[0]); 
    int * field_list = NULL;

	if(argc<3){
		printf("%s", msg);
		return 0;
	}
	while ((c = getopt (argc, argv, "n:d:f:hmp:ru:vO")) != -1){
		switch (c){
		  case 'n':
              if (optarg[0]=='c')
			    colnames    --;
              else
                strcpy(apop_opts.db_nan, optarg);
			break;
		  case 'd':
			strcpy(apop_opts.input_delimiters, optarg);
			break;
		  case 'f':
            field_list = break_down(optarg);
            break;
		  case 'h':
			printf("%s", msg);
			return 0;
		  case 'm':
			apop_opts.db_engine = 'm';
            break;
		  case 'u':
			strcpy(apop_opts.db_user, optarg);
			break;
		  case 'p':
			strcpy(apop_opts.db_pass, optarg);
			break;
		  case 'r':
			rownames    ++;
			break;
		  case 'v':
			apop_opts.verbose ++;
			break;
		  case 'O':
            tab_exists_check    ++;
			break;
		}
	}
	apop_db_open(argv[optind + 2]);
    if (tab_exists_check) apop_table_exists(argv[optind+1],1);
    apop_query("begin;");
	apop_text_to_db(argv[optind], argv[optind+1], rownames,colnames, NULL, .field_ends=field_list);
    apop_query("commit;");
}