Ejemplo n.º 1
0
/** Test the goodness-of-fit between two \ref apop_pmf models. 

If you send two histograms, I assume that the histograms are synced: for PMFs,
you've used \ref apop_data_to_bins to generate two histograms using the same binspec,
or you've used \ref apop_data_pmf_compress to guarantee that each observation value
appears exactly once in each data set.

In any case, you are confident that all values in the \c observed set appear in the \c
expected set with nonzero weight; otherwise this will return a \f$\chi^2\f$ statistic
of \c GSL_POSINF, indicating that it is impossible for the \c observed data to have
been drawn from the \c expected distribution.

\li If an observation row has weight zero, I skip it. if <tt>apop_opts.verbose >=1 </tt> I will show a warning.

  \ingroup histograms
*/
apop_data *apop_histograms_test_goodness_of_fit(apop_model *observed, apop_model *expected){
    int df = observed->data->weights->size;
    double diff = 0;
    for (int i=0; i< observed->data->weights->size; i++){
        Apop_data_row(observed->data, i, one_obs);
        double obs_val = gsl_vector_get(observed->data->weights, i);
        double exp_val = apop_p(one_obs, expected);
        if (exp_val == 0){
            diff = GSL_POSINF; 
            break;
        }
        if (obs_val==0){
            Apop_notify(1, "element %i of the observed data has weight zero. Skipping it.", i);
            df --;
        } else 
            diff += gsl_pow_2(obs_val - exp_val)/exp_val;
    }
    //Data gathered. Now output
    apop_data   *out    = apop_data_alloc();
    double      toptail = gsl_cdf_chisq_Q(diff, df-1);
    sprintf(out->names->title, "Goodness-of-fit test via Chi-squared statistic");
    apop_data_add_named_elmt(out, "Chi squared statistic", diff);
    apop_data_add_named_elmt(out, "df", df-1);
    apop_data_add_named_elmt(out, "p value",  toptail); 
    apop_data_add_named_elmt(out, "confidence", 1 - toptail);
    return out;
}
Ejemplo n.º 2
0
/** Join together a list or array of strings, with optional separators between the strings.

\param strings  An \ref apop_data set with a grid of text to be combined into a single string
\param between  The text to put in between the rows of the table, such as ", ". (Default is a single space: " ")
\param before   The text to put at the head of the string. For the query example, this would be <tt>.before="select "</tt>. (Default: NULL)
\param after   The text to put at the tail of the string. For the query example, <tt>.after=" from data_table"</tt>. (Default: NULL)
\param between_cols The text to insert between columns of text. See below for an example (Default is set to equal <tt>.between</tt>)
\param prune If you don't want to use the entire text set, you can provide a function to indicate which elements should be pruned out. Some examples:

\code
//Just use column 3
int is_not_col_3(apop_data *indata, int row, int col, void *ignore){
    return col!=3;
}

//Jump over blanks as if they don't exist.
int is_blank(apop_data *indata, int row, int col, void *ignore){
    return strlen(indata->text[row][col])==0;
}
\endcode

\param prune_parameter A void pointer to pass to your \c prune function.

\return A single string with the elements of the \c strings table joined as per your specification. Allocated by the function, to be freed by you if desired.

\li If the table of strings is \c NULL or has no text, I will print only the <tt>.before</tt> and <tt>.after</tt> parts with nothing in between.
\li if <tt> apop_opts.verbose >=2</tt>, then print the pasted text to stderr.
\li This function uses the \ref designated syntax for inputs.

The sample snippet generates the SQL for a query using a list of column names (where
the query begins with <tt>select </tt>, ends with <tt>from datatab</tt>, and has commas
in between each element), re-processes the same list to produce the head of an HTML
table, then produces the body of the table with the query result (pasting the
<tt>tr</tt>s and <tt> td</tt>s into the right places).

\include sql_to_html.c
*/
APOP_VAR_HEAD char *apop_text_paste(apop_data *strings, char *between, char *before, char *after, char *between_cols, apop_fn_riip prune, void *prune_parameter){
    apop_data *apop_varad_var(strings, NULL);
    char *apop_varad_var(between, " ");
    char *apop_varad_var(before, NULL);
    char *apop_varad_var(after, NULL);
    char *apop_varad_var(between_cols, between);
    apop_fn_riip apop_varad_var(prune, NULL);
    void *apop_varad_var(prune_parameter, NULL);
APOP_VAR_ENDHEAD
    char *prior_line=NULL, *oneline=NULL, *out = before ? strdup(before) : NULL;
    for (int i=0; i< strings->textsize[0]; i++){
        free(oneline); oneline = NULL;
        for (int j=0; j< strings->textsize[1]; j++){
            if (prune && !prune(strings, i, j, prune_parameter)) continue;
            apop_tack_on(&oneline, strings->text[i][j]);
            if (j <strings->textsize[1]-1)  apop_tack_on(&oneline, between_cols);
        }
        apop_tack_on(&out, prior_line);
        if (prior_line && oneline) apop_tack_on(&out, between);
        free(prior_line);
        prior_line=oneline ? strdup(oneline): NULL;
        //if (i <strings->textsize[0]-1)  apop_tack_on(&out, between);
        //if (oneline)  apop_tack_on(&out, oneline);
    }
    apop_tack_on(&out, oneline); //the final one never got a chance to be prior_line
    apop_tack_on(&out, after);
    Apop_notify(2, "%s", out);
    return out;
}
Ejemplo n.º 3
0
static int apop_mysql_db_open(char const *in){
    Apop_assert(in, "MySQL needs a non-NULL db name.");
    mysql_db = mysql_init (NULL);
    Apop_assert(mysql_db, "mysql_init() failed (probably out of memory)");
    /* connect to server */
    if (!mysql_real_connect (mysql_db, opt_host_name, apop_opts.db_user, apop_opts.db_pass,
            in, opt_port_num, opt_socket_name, CLIENT_MULTI_STATEMENTS+opt_flags)) {
                Apop_notify(0, "mysql_real_connect() to %s failed", in);
                mysql_close (mysql_db);
                return 1;
            }
    return 0;
}
Ejemplo n.º 4
0
/** Append one list of names to another.

If the first list is empty, then this is a copy function.

\param  n1      The first set of names (no default, must not be \c NULL)
\param  nadd      The second set of names, which will be appended after the first. (no default. If \c NULL, a no-op.)
\param type1     Either 'c', 'r', 't', or 'v' stating whether you are merging the
columns, rows, text, or vector. If 'v', then ignore \c typeadd and just overwrite the
target vector name with the source name. (default: 'r')
\param typeadd     Either 'c', 'r', 't', or 'v' stating whether you are merging the columns, rows, or text. If 'v', then overwrite the target with the source vector name. (default: type1)
*/
APOP_VAR_HEAD void  apop_name_stack(apop_name * n1, apop_name *nadd, char type1, char typeadd){
    apop_name * apop_varad_var(nadd, NULL); 
    if (!nadd) return;
    apop_name * apop_varad_var(n1, NULL);
    Apop_stopif(!n1, return, 0, "Can't stack onto a NULL set of names (which n1 is).");
    char apop_varad_var(type1, 'r');
    char apop_varad_var(typeadd, type1);
APOP_VAR_ENDHEAD
    int i;
    apop_name counts = (apop_name){.rowct=nadd->rowct, .textct = nadd->textct, .colct = nadd->colct};//Necessary when stacking onto self.;
    if (typeadd == 'v')
        apop_name_add(n1, nadd->vector, 'v');
    else if (typeadd == 'r')
        for (i=0; i< counts.rowct; i++)
            apop_name_add(n1, nadd->row[i], type1);
    else if (typeadd == 't')
        for (i=0; i< counts.textct; i++)
            apop_name_add(n1, nadd->text[i], type1);
    else if (typeadd == 'c')
        for (i=0; i< counts.colct; i++)
            apop_name_add(n1, nadd->col[i], type1);
    else Apop_notify(1, "'%c' sent to apop_name_stack, but the only "
                        "valid options are r t c v. Doing nothing.", typeadd);
}

/** Copy one \ref apop_name structure to another. That is, all data is duplicated.

Used internally by \ref apop_data_copy, but sometimes useful by itself. For example,
say that we have an \ref apop_data struct named \c d and a \ref gsl_matrix of the same
dimensions named \c m; we could give \c m the labels from \c d for printing:
\code
apop_data *wrapped = &(apop_data){.matrix=m, .names=apop_name_copy(d)};
apop_data_print(wrapped);
apop_name_free(wrapped->names); //wrapped itself is auto-allocated; do not free.
\endcode
 
\param in The input names
\return   A \ref apop_name struct with copies of all input names.
*/
apop_name * apop_name_copy(apop_name *in){
    apop_name *out = apop_name_alloc();
    apop_name_stack(out, in, 'v');
    apop_name_stack(out, in, 'c');
    apop_name_stack(out, in, 'r');
    apop_name_stack(out, in, 't');
    Asprintf(&out->title, "%s", in->title);
    return out;
}
Ejemplo n.º 5
0
void apop_gsl_error(const char *reason, const char *file, int line, int gsl_errno){
    Apop_notify(1, "%s: %s", file, reason);
    Apop_maybe_abort(1);
}