Beispiel #1
0
void R_set_response(SEXP learnsample, SEXP y) {

    double *v, *t, *j, *dy, *p;
    int i, n;
    
    n = LENGTH(y);
    dy = REAL(y);
    
    if (LENGTH(R_get_response(learnsample)) != n)
        error("lengths of arguments don't match");
    
    v = REAL(VECTOR_ELT(GET_SLOT(GET_SLOT(learnsample, PL2_responsesSym), 
                                 PL2_variablesSym), 0));
    t = REAL(VECTOR_ELT(GET_SLOT(GET_SLOT(learnsample, PL2_responsesSym), 
                                 PL2_transformationsSym), 0));
    j = REAL(get_test_trafo(GET_SLOT(learnsample, PL2_responsesSym)));
    p = REAL(get_predict_trafo(GET_SLOT(learnsample, PL2_responsesSym)));
    
    for (i = 0; i < n; i++) {
        v[i] = dy[i];
        t[i] = dy[i];
        j[i] = dy[i];
        p[i] = dy[i];
    }
}
Beispiel #2
0
SEXP ctree_memory (SEXP object, SEXP MP_INV) {

    SEXP ans, weights, splitstatistics, dontuse, dontusetmp, varmemory;
    int q, p, nobs, ninputs;

    q = ncol(get_test_trafo(GET_SLOT(object, PL2_responsesSym)));

    ninputs = get_ninputs(object);
    nobs = get_nobs(object);

    ans = PROTECT(NEW_OBJECT(MAKE_CLASS("TreeFitMemory")));
    SET_SLOT(ans, PL2_expcovinfSym, PROTECT(new_ExpectCovarInfluence(q)));
    SET_SLOT(ans, PL2_expcovinfssSym, PROTECT(new_ExpectCovarInfluence(1)));
    SET_SLOT(ans, PL2_linexpcov2sampleSym, PROTECT(new_LinStatExpectCovar(1, q)));

    SET_SLOT(ans, PL2_weightsSym, weights = PROTECT(allocVector(REALSXP, nobs)));
    for (int i = 0; i < nobs; i++)
        REAL(weights)[i] = 0.0;
    SET_SLOT(ans, PL2_splitstatisticsSym, splitstatistics = PROTECT(allocVector(REALSXP, nobs)));
    for (int i = 0; i < nobs; i++)
        REAL(splitstatistics)[i] = 0.0;
    SET_SLOT(ans, PL2_dontuseSym, dontuse = PROTECT(allocVector(LGLSXP, ninputs)));
    for (int i = 0; i < ninputs; i++)
        LOGICAL(dontuse)[i] = 0.0;
    SET_SLOT(ans, PL2_dontusetmpSym, dontusetmp = PROTECT(allocVector(LGLSXP, ninputs)));
    for (int i = 0; i < ninputs; i++)
        LOGICAL(dontusetmp)[i] = 0.0;


    varmemory = PROTECT(allocVector(VECSXP, ninputs));

    for (int i = 0; i < ninputs; i++) {

        p = ncol(get_transformation(GET_SLOT(object, PL2_inputsSym), i + 1));

        if (LOGICAL(MP_INV)[0]) {
            SET_VECTOR_ELT(varmemory, i, new_LinStatExpectCovarMPinv(p, q));
        } else {
            SET_VECTOR_ELT(varmemory, i, new_LinStatExpectCovar(p, q));
        }
    }

    SET_SLOT(ans, PL2_varmemorySym, varmemory);

    UNPROTECT(9);
    return(ans);
}
Beispiel #3
0
void C_Node(SEXP node, SEXP learnsample, SEXP weights,
            SEXP fitmem, SEXP controls, int TERMINAL, int depth) {

    int nobs, ninputs, jselect, q, j, k, i;
    double mincriterion, sweights, *dprediction;
    double *teststat, *pvalue, smax, cutpoint = 0.0, maxstat = 0.0;
    double *standstat, *splitstat;
    SEXP responses, inputs, x, expcovinf, linexpcov;
    SEXP varctrl, splitctrl, gtctrl, tgctrl, split, testy, predy;
    double *dxtransf, *thisweights;
    int *itable;

    nobs = get_nobs(learnsample);
    ninputs = get_ninputs(learnsample);
    varctrl = get_varctrl(controls);
    splitctrl = get_splitctrl(controls);
    gtctrl = get_gtctrl(controls);
    tgctrl = get_tgctrl(controls);
    mincriterion = get_mincriterion(gtctrl);
    responses = GET_SLOT(learnsample, PL2_responsesSym);
    inputs = GET_SLOT(learnsample, PL2_inputsSym);
    testy = get_test_trafo(responses);
    predy = get_predict_trafo(responses);
    q = ncol(testy);

    /* <FIXME> we compute C_GlobalTest even for TERMINAL nodes! </FIXME> */

    /* compute the test statistics and the node criteria for each input */
    C_GlobalTest(learnsample, weights, fitmem, varctrl,
                 gtctrl, get_minsplit(splitctrl),
                 REAL(S3get_teststat(node)), REAL(S3get_criterion(node)), depth);

    /* sum of weights: C_GlobalTest did nothing if sweights < mincriterion */
    sweights = REAL(GET_SLOT(GET_SLOT(fitmem, PL2_expcovinfSym),
                             PL2_sumweightsSym))[0];
    REAL(VECTOR_ELT(node, S3_SUMWEIGHTS))[0] = sweights;

    /* compute the prediction of this node */
    dprediction = REAL(S3get_prediction(node));

    /* <FIXME> feed raw numeric values OR dummy encoded factors as y
       Problem: what happens for survival times ? */
    C_prediction(REAL(predy), nobs, ncol(predy), REAL(weights),
                 sweights, dprediction);
    /* </FIXME> */

    teststat = REAL(S3get_teststat(node));
    pvalue = REAL(S3get_criterion(node));

    /* try the two out of ninputs best inputs variables */
    /* <FIXME> be more flexible and add a parameter controlling
               the number of inputs tried </FIXME> */
    for (j = 0; j < 2; j++) {

        smax = C_max(pvalue, ninputs);
        REAL(S3get_maxcriterion(node))[0] = smax;

        /* if the global null hypothesis was rejected */
        if (smax > mincriterion && !TERMINAL) {

            /* the input variable with largest association to the response */
            jselect = C_whichmax(pvalue, teststat, ninputs) + 1;

            /* get the raw numeric values or the codings of a factor */
            x = get_variable(inputs, jselect);
            if (has_missings(inputs, jselect)) {
                expcovinf = GET_SLOT(get_varmemory(fitmem, jselect),
                                     PL2_expcovinfSym);
                thisweights = C_tempweights(jselect, weights, fitmem, inputs);
            } else {
                expcovinf = GET_SLOT(fitmem, PL2_expcovinfSym);
                thisweights = REAL(weights);
            }

            /* <FIXME> handle ordered factors separatly??? </FIXME> */
            if (!is_nominal(inputs, jselect)) {

                /* search for a split in a ordered variable x */
                split = S3get_primarysplit(node);

                /* check if the n-vector of splitstatistics
                   should be returned for each primary split */
                if (get_savesplitstats(tgctrl)) {
                    C_init_orderedsplit(split, nobs);
                    splitstat = REAL(S3get_splitstatistics(split));
                } else {
                    C_init_orderedsplit(split, 0);
                    splitstat = REAL(get_splitstatistics(fitmem));
                }

                C_split(REAL(x), 1, REAL(testy), q, thisweights, nobs,
                        INTEGER(get_ordering(inputs, jselect)), splitctrl,
                        GET_SLOT(fitmem, PL2_linexpcov2sampleSym),
                        expcovinf, REAL(S3get_splitpoint(split)), &maxstat,
                        splitstat);
                S3set_variableID(split, jselect);
            } else {

                /* search of a set of levels (split) in a numeric variable x */
                split = S3get_primarysplit(node);

                /* check if the n-vector of splitstatistics
                   should be returned for each primary split */
                if (get_savesplitstats(tgctrl)) {
                    C_init_nominalsplit(split,
                                        LENGTH(get_levels(inputs, jselect)),
                                        nobs);
                    splitstat = REAL(S3get_splitstatistics(split));
                } else {
                    C_init_nominalsplit(split,
                                        LENGTH(get_levels(inputs, jselect)),
                                        0);
                    splitstat = REAL(get_splitstatistics(fitmem));
                }

                linexpcov = get_varmemory(fitmem, jselect);
                standstat = Calloc(get_dimension(linexpcov), double);
                C_standardize(REAL(GET_SLOT(linexpcov,
                                            PL2_linearstatisticSym)),
                              REAL(GET_SLOT(linexpcov, PL2_expectationSym)),
                              REAL(GET_SLOT(linexpcov, PL2_covarianceSym)),
                              get_dimension(linexpcov), get_tol(splitctrl),
                              standstat);

                C_splitcategorical(INTEGER(x),
                                   LENGTH(get_levels(inputs, jselect)),
                                   REAL(testy), q, thisweights,
                                   nobs, standstat, splitctrl,
                                   GET_SLOT(fitmem, PL2_linexpcov2sampleSym),
                                   expcovinf, &cutpoint,
                                   INTEGER(S3get_splitpoint(split)),
                                   &maxstat, splitstat);

                /* compute which levels of a factor are available in this node
                   (for printing) later on. A real `table' for this node would
                   induce too much overhead here. Maybe later. */

                itable = INTEGER(S3get_table(split));
                dxtransf = REAL(get_transformation(inputs, jselect));
                for (k = 0; k < LENGTH(get_levels(inputs, jselect)); k++) {
                    itable[k] = 0;
                    for (i = 0; i < nobs; i++) {
                        if (dxtransf[k * nobs + i] * thisweights[i] > 0) {
                            itable[k] = 1;
                            continue;
                        }
                    }
                }

                Free(standstat);
            }
            if (maxstat == 0) {
                if (j == 1) {
                    S3set_nodeterminal(node);
                } else {
                    /* do not look at jselect in next iteration */
                    pvalue[jselect - 1] = R_NegInf;
                }
            } else {
                S3set_variableID(split, jselect);
                break;
            }
        } else {