void R_set_response(SEXP learnsample, SEXP y) { double *v, *t, *j, *dy, *p; int i, n; n = LENGTH(y); dy = REAL(y); if (LENGTH(R_get_response(learnsample)) != n) error("lengths of arguments don't match"); v = REAL(VECTOR_ELT(GET_SLOT(GET_SLOT(learnsample, PL2_responsesSym), PL2_variablesSym), 0)); t = REAL(VECTOR_ELT(GET_SLOT(GET_SLOT(learnsample, PL2_responsesSym), PL2_transformationsSym), 0)); j = REAL(get_test_trafo(GET_SLOT(learnsample, PL2_responsesSym))); p = REAL(get_predict_trafo(GET_SLOT(learnsample, PL2_responsesSym))); for (i = 0; i < n; i++) { v[i] = dy[i]; t[i] = dy[i]; j[i] = dy[i]; p[i] = dy[i]; } }
SEXP ctree_memory (SEXP object, SEXP MP_INV) { SEXP ans, weights, splitstatistics, dontuse, dontusetmp, varmemory; int q, p, nobs, ninputs; q = ncol(get_test_trafo(GET_SLOT(object, PL2_responsesSym))); ninputs = get_ninputs(object); nobs = get_nobs(object); ans = PROTECT(NEW_OBJECT(MAKE_CLASS("TreeFitMemory"))); SET_SLOT(ans, PL2_expcovinfSym, PROTECT(new_ExpectCovarInfluence(q))); SET_SLOT(ans, PL2_expcovinfssSym, PROTECT(new_ExpectCovarInfluence(1))); SET_SLOT(ans, PL2_linexpcov2sampleSym, PROTECT(new_LinStatExpectCovar(1, q))); SET_SLOT(ans, PL2_weightsSym, weights = PROTECT(allocVector(REALSXP, nobs))); for (int i = 0; i < nobs; i++) REAL(weights)[i] = 0.0; SET_SLOT(ans, PL2_splitstatisticsSym, splitstatistics = PROTECT(allocVector(REALSXP, nobs))); for (int i = 0; i < nobs; i++) REAL(splitstatistics)[i] = 0.0; SET_SLOT(ans, PL2_dontuseSym, dontuse = PROTECT(allocVector(LGLSXP, ninputs))); for (int i = 0; i < ninputs; i++) LOGICAL(dontuse)[i] = 0.0; SET_SLOT(ans, PL2_dontusetmpSym, dontusetmp = PROTECT(allocVector(LGLSXP, ninputs))); for (int i = 0; i < ninputs; i++) LOGICAL(dontusetmp)[i] = 0.0; varmemory = PROTECT(allocVector(VECSXP, ninputs)); for (int i = 0; i < ninputs; i++) { p = ncol(get_transformation(GET_SLOT(object, PL2_inputsSym), i + 1)); if (LOGICAL(MP_INV)[0]) { SET_VECTOR_ELT(varmemory, i, new_LinStatExpectCovarMPinv(p, q)); } else { SET_VECTOR_ELT(varmemory, i, new_LinStatExpectCovar(p, q)); } } SET_SLOT(ans, PL2_varmemorySym, varmemory); UNPROTECT(9); return(ans); }
void C_Node(SEXP node, SEXP learnsample, SEXP weights, SEXP fitmem, SEXP controls, int TERMINAL, int depth) { int nobs, ninputs, jselect, q, j, k, i; double mincriterion, sweights, *dprediction; double *teststat, *pvalue, smax, cutpoint = 0.0, maxstat = 0.0; double *standstat, *splitstat; SEXP responses, inputs, x, expcovinf, linexpcov; SEXP varctrl, splitctrl, gtctrl, tgctrl, split, testy, predy; double *dxtransf, *thisweights; int *itable; nobs = get_nobs(learnsample); ninputs = get_ninputs(learnsample); varctrl = get_varctrl(controls); splitctrl = get_splitctrl(controls); gtctrl = get_gtctrl(controls); tgctrl = get_tgctrl(controls); mincriterion = get_mincriterion(gtctrl); responses = GET_SLOT(learnsample, PL2_responsesSym); inputs = GET_SLOT(learnsample, PL2_inputsSym); testy = get_test_trafo(responses); predy = get_predict_trafo(responses); q = ncol(testy); /* <FIXME> we compute C_GlobalTest even for TERMINAL nodes! </FIXME> */ /* compute the test statistics and the node criteria for each input */ C_GlobalTest(learnsample, weights, fitmem, varctrl, gtctrl, get_minsplit(splitctrl), REAL(S3get_teststat(node)), REAL(S3get_criterion(node)), depth); /* sum of weights: C_GlobalTest did nothing if sweights < mincriterion */ sweights = REAL(GET_SLOT(GET_SLOT(fitmem, PL2_expcovinfSym), PL2_sumweightsSym))[0]; REAL(VECTOR_ELT(node, S3_SUMWEIGHTS))[0] = sweights; /* compute the prediction of this node */ dprediction = REAL(S3get_prediction(node)); /* <FIXME> feed raw numeric values OR dummy encoded factors as y Problem: what happens for survival times ? */ C_prediction(REAL(predy), nobs, ncol(predy), REAL(weights), sweights, dprediction); /* </FIXME> */ teststat = REAL(S3get_teststat(node)); pvalue = REAL(S3get_criterion(node)); /* try the two out of ninputs best inputs variables */ /* <FIXME> be more flexible and add a parameter controlling the number of inputs tried </FIXME> */ for (j = 0; j < 2; j++) { smax = C_max(pvalue, ninputs); REAL(S3get_maxcriterion(node))[0] = smax; /* if the global null hypothesis was rejected */ if (smax > mincriterion && !TERMINAL) { /* the input variable with largest association to the response */ jselect = C_whichmax(pvalue, teststat, ninputs) + 1; /* get the raw numeric values or the codings of a factor */ x = get_variable(inputs, jselect); if (has_missings(inputs, jselect)) { expcovinf = GET_SLOT(get_varmemory(fitmem, jselect), PL2_expcovinfSym); thisweights = C_tempweights(jselect, weights, fitmem, inputs); } else { expcovinf = GET_SLOT(fitmem, PL2_expcovinfSym); thisweights = REAL(weights); } /* <FIXME> handle ordered factors separatly??? </FIXME> */ if (!is_nominal(inputs, jselect)) { /* search for a split in a ordered variable x */ split = S3get_primarysplit(node); /* check if the n-vector of splitstatistics should be returned for each primary split */ if (get_savesplitstats(tgctrl)) { C_init_orderedsplit(split, nobs); splitstat = REAL(S3get_splitstatistics(split)); } else { C_init_orderedsplit(split, 0); splitstat = REAL(get_splitstatistics(fitmem)); } C_split(REAL(x), 1, REAL(testy), q, thisweights, nobs, INTEGER(get_ordering(inputs, jselect)), splitctrl, GET_SLOT(fitmem, PL2_linexpcov2sampleSym), expcovinf, REAL(S3get_splitpoint(split)), &maxstat, splitstat); S3set_variableID(split, jselect); } else { /* search of a set of levels (split) in a numeric variable x */ split = S3get_primarysplit(node); /* check if the n-vector of splitstatistics should be returned for each primary split */ if (get_savesplitstats(tgctrl)) { C_init_nominalsplit(split, LENGTH(get_levels(inputs, jselect)), nobs); splitstat = REAL(S3get_splitstatistics(split)); } else { C_init_nominalsplit(split, LENGTH(get_levels(inputs, jselect)), 0); splitstat = REAL(get_splitstatistics(fitmem)); } linexpcov = get_varmemory(fitmem, jselect); standstat = Calloc(get_dimension(linexpcov), double); C_standardize(REAL(GET_SLOT(linexpcov, PL2_linearstatisticSym)), REAL(GET_SLOT(linexpcov, PL2_expectationSym)), REAL(GET_SLOT(linexpcov, PL2_covarianceSym)), get_dimension(linexpcov), get_tol(splitctrl), standstat); C_splitcategorical(INTEGER(x), LENGTH(get_levels(inputs, jselect)), REAL(testy), q, thisweights, nobs, standstat, splitctrl, GET_SLOT(fitmem, PL2_linexpcov2sampleSym), expcovinf, &cutpoint, INTEGER(S3get_splitpoint(split)), &maxstat, splitstat); /* compute which levels of a factor are available in this node (for printing) later on. A real `table' for this node would induce too much overhead here. Maybe later. */ itable = INTEGER(S3get_table(split)); dxtransf = REAL(get_transformation(inputs, jselect)); for (k = 0; k < LENGTH(get_levels(inputs, jselect)); k++) { itable[k] = 0; for (i = 0; i < nobs; i++) { if (dxtransf[k * nobs + i] * thisweights[i] > 0) { itable[k] = 1; continue; } } } Free(standstat); } if (maxstat == 0) { if (j == 1) { S3set_nodeterminal(node); } else { /* do not look at jselect in next iteration */ pvalue[jselect - 1] = R_NegInf; } } else { S3set_variableID(split, jselect); break; } } else {