コード例 #1
0
/** If there is an NaN anywhere in the row of data (including the matrix, the vector, the weights, and the text) then delete the row from the data set.

\li If every row has an NaN, then this returns \c NULL.
\li If \c apop_opts.db_nan is not \c NULL, then I will use that as a regular expression to check the text elements for bad data as well.
\li If \c inplace = 'y', then I'll free each element of the input data
    set and refill it with the pruned elements. I'll still take up (up to)
    twice the size of the data set in memory during the function. If
    every row has an NaN, then your \c apop_data set will end up with
    \c NULL vector, matrix, \dots. if \c inplace = 'n', then the original data set is left unmolested.
\li I only look at the first page of data (i.e. the \c more element is ignored).
\li This function uses the \ref designated syntax for inputs.

    \param d    The data, with NaNs
    \param inplace If \c 'y', clear out the pointer-to-\ref apop_data that
    you sent in and refill with the pruned data. If \c 'n', leave the
    set alone and return a new data set.
    \return     A (potentially shorter) copy of the data set, without
    NaNs. If <tt>inplace=='y'</tt>, redundant with the input. If the entire data set is
    cleared out, then this will be \c NULL.
*/
APOP_VAR_HEAD apop_data * apop_data_listwise_delete(apop_data *d, char inplace){
    apop_data * apop_varad_var(d, NULL);
    if (!d) return NULL;
    char apop_varad_var(inplace, 'n');
APOP_VAR_ENDHEAD
    Get_vmsizes(d) //defines firstcol, vsize, wsize, msize1, msize2.
    apop_assert_c(msize1 || vsize || d->textsize[0], NULL, 0, 
            "You sent to apop_data_listwise_delete a data set with NULL matrix, NULL vector, and no text. "
            "Confused, it is returning NULL.");
    //find out where the NaNs are
    int len = GSL_MAX(vsize ? vsize : msize1, d->textsize[0]); //still some size assumptions here.
    int not_empty = 0;
    int *marked = calloc(len, sizeof(int));
    for (int i=0; i< (vsize ? vsize: msize1); i++)
        for (int j=firstcol; j <msize2; j++){
            if (gsl_isnan(apop_data_get(d, i, j))){
                    marked[i] = 1;
                    break;
            }
        }
    for (int i=0; i< wsize; i++)
        if (gsl_isnan(gsl_vector_get(d->weights, i)))
            marked[i] = 1;
    if (d->textsize[0] && apop_opts.db_nan){
        regex_t    rex;
        int compiled_ok = !regcomp(&rex, apop_opts.db_nan, REG_EXTENDED +  REG_ICASE + REG_NOSUB);
        apop_assert(compiled_ok, "apop_opts.db_nan needs to be a regular expression that "
                                "I can use to check the text element of your data set for "
                                "NaNs, But compiling %s into a regex failed. Or, set "
                                "apop_opts.db_nan=NULL to bypass text checking.", apop_opts.db_nan);
        for(int i=0; i< d->textsize[0]; i++)
            if (!marked[i])
                for(int j=0; j< d->textsize[1]; j++)
                    if (!regexec(&rex, d->text[i][j], 0, 0, 0)){
                        marked[i] ++;
                        break;
                    }
        regfree(&rex);
    }

    //check that at least something isn't NULL.
    for (int i=0; i< len; i++)
        if (!marked[i]){
            not_empty ++;
            break;
        }
    if (!not_empty){
        free(marked);
        return NULL;
    }
    apop_data *out = (inplace=='y'|| inplace=='Y') ? d : apop_data_copy(d);
    apop_data_rm_rows(out, marked);
    free(marked);
    return out;
}
コード例 #2
0
ファイル: apop_smoothing.c プロジェクト: rlowrance/Apophenia
/** Return a new vector that is the moving average of the input vector.
 \param v The input vector, unsmoothed
 \param bandwidth The number of elements to be smoothed.
 */
gsl_vector *apop_vector_moving_average(gsl_vector *v, size_t bandwidth) {
    apop_assert_c(v,  NULL, 0, "You asked me to smooth a NULL vector; returning NULL.\n");
    apop_assert_s(bandwidth, "Bandwidth must be >=1.\n");
    int halfspan = bandwidth/2;
    gsl_vector *vout = gsl_vector_calloc(v->size - halfspan*2);
    for(size_t i=0; i < vout->size; i ++) {
        double *item = gsl_vector_ptr(vout, i);
        for (int j=-halfspan; j < halfspan+1; j ++)
            *item += gsl_vector_get(v, j+ i+ halfspan);
        *item /= halfspan*2 +1;
    }
    return vout;
}
コード例 #3
0
ファイル: apop_smoothing.c プロジェクト: rlowrance/Apophenia
/** Return a new histogram that is the moving average of the input histogram.
 \param m A histogram, in \c apop_model form.
 \param bandwidth The number of elements to be smoothed.
 */
apop_model *apop_histogram_moving_average(apop_model *m, size_t bandwidth) {
    apop_assert_c(m && !strcmp(m->name, "Histogram"), NULL, 0, "The first argument needs to be an apop_histogram model.");
    apop_assert_s(bandwidth, "bandwidth must be an integer >=1.");
    apop_model *out = apop_model_copy(*m);
    gsl_histogram *h     = Apop_settings_get(m, apop_histogram, pdf);
    gsl_histogram *hout  = Apop_settings_get(out, apop_histogram, pdf);
    gsl_vector *bins     = apop_array_to_vector(h->bin, h->n);
    gsl_vector *smoothed = apop_vector_moving_average(bins, bandwidth);
    for (int i=0; i< h->n; i++)
        if (i < bandwidth/2 || i>= smoothed->size+bandwidth/2)
            hout->bin[i] = 0;
        else
            hout->bin[i] = gsl_vector_get(smoothed, i-bandwidth/2);
    gsl_vector_free(bins);
    gsl_vector_free(smoothed);
    return out;
}