/* return the skeleton of a DAG/PDAG. */ SEXP dag2ug(SEXP bn, SEXP moral, SEXP debug) { int i = 0, j = 0, k = 0, nnodes = 0, narcs = 0, row = 0; int debuglevel = isTRUE(debug), *moralize = LOGICAL(moral); int *nparents = NULL, *nnbr = NULL; SEXP node_data, current, nodes, result, temp; /* get the nodes' data. */ node_data = getListElement(bn, "nodes"); nnodes = length(node_data); PROTECT(nodes = getAttrib(node_data, R_NamesSymbol)); /* allocate and initialize parents' and neighbours' counters. */ nnbr = Calloc1D(nnodes, sizeof(int)); if (*moralize > 0) nparents = Calloc1D(nnodes, sizeof(int)); /* first pass: count neighbours, parents and resulting arcs. */ for (i = 0; i < nnodes; i++) { /* get the number of neighbours. */ current = VECTOR_ELT(node_data, i); nnbr[i] = length(getListElement(current, "nbr")); /* update the number of arcs to be returned. */ if (*moralize > 0) { /* get also the number of parents, needed to account for the arcs added * for their moralization. */ nparents[i] = length(getListElement(current, "parents")); narcs += nnbr[i] + nparents[i] * (nparents[i] - 1); }/*THEN*/ else { narcs += nnbr[i]; }/*ELSE*/ if (debuglevel > 0) { if (*moralize > 0) { Rprintf("* scanning node %s, found %d neighbours and %d parents.\n", NODE(i), nnbr[i], nparents[i]); Rprintf(" > adding %d arcs, for a total of %d.\n", nnbr[i] + nparents[i] * (nparents[i] - 1), narcs); }/*THEN*/ else { Rprintf("* scanning node %s, found %d neighbours.\n", NODE(i), nnbr[i]); Rprintf(" > adding %d arcs, for a total of %d.\n", nnbr[i], narcs); }/*ELSE*/ }/*THEN*/ }/*FOR*/ /* allocate the return value. */ PROTECT(result = allocMatrix(STRSXP, narcs, 2)); /* allocate and set the column names. */ setDimNames(result, R_NilValue, mkStringVec(2, "from", "to")); /* second pass: fill the return value. */ for (i = 0; i < nnodes; i++) { /* get to the current node. */ current = VECTOR_ELT(node_data, i); /* get the neighbours. */ temp = getListElement(current, "nbr"); for (j = 0; j < nnbr[i]; j++) { SET_STRING_ELT(result, CMC(row, 0, narcs), STRING_ELT(nodes, i)); SET_STRING_ELT(result, CMC(row, 1, narcs), STRING_ELT(temp, j)); row++; }/*FOR*/ /* if we are not creating a moral graph we are done with this node. */ if (*moralize == 0) continue; /* get the parents. */ temp = getListElement(current, "parents"); for (j = 0; j < nparents[i]; j++) { for (k = j+1; k < nparents[i]; k++) { SET_STRING_ELT(result, CMC(row, 0, narcs), STRING_ELT(temp, k)); SET_STRING_ELT(result, CMC(row, 1, narcs), STRING_ELT(temp, j)); row++; SET_STRING_ELT(result, CMC(row, 0, narcs), STRING_ELT(temp, j)); SET_STRING_ELT(result, CMC(row, 1, narcs), STRING_ELT(temp, k)); row++; }/*FOR*/ }/*FOR*/ }/*FOR*/ Free1D(nnbr); if (*moralize > 0) { /* be really sure not to return duplicate arcs in moral graphs when * shielded parents are present (the "shielding" nodes are counted * twice). */ result = c_unique_arcs(result, nodes, FALSE); Free1D(nparents); }/*THEN*/ UNPROTECT(2); return result; }/*DAG2UG*/

void bmerge_r(int xlowIn, int xuppIn, int ilowIn, int iuppIn, int col, int lowmax, int uppmax) // col is >0 and <=ncol-1 if this range of [xlow,xupp] and [ilow,iupp] match up to but not including that column // lowmax=1 if xlowIn is the lower bound of this group (needed for roll) // uppmax=1 if xuppIn is the upper bound of this group (needed for roll) { int xlow=xlowIn, xupp=xuppIn, ilow=ilowIn, iupp=iuppIn, j, k, ir, lir, tmp; SEXP class; ir = lir = ilow + (iupp-ilow)/2; // lir = logical i row. if (o) ir = o[lir]-1; // ir = the actual i row if i were ordered ic = VECTOR_ELT(i,icols[col]-1); // ic = i column xc = VECTOR_ELT(x,xcols[col]-1); // xc = x column // it was checked in bmerge() that the types are equal switch (TYPEOF(xc)) { case LGLSXP : case INTSXP : // including factors ival.i = INTEGER(ic)[ir]; while(xlow < xupp-1) { mid = xlow + (xupp-xlow)/2; // Same as (xlow+xupp)/2 but without risk of overflow xval.i = INTEGER(xc)[XIND(mid)]; if (xval.i<ival.i) { // relies on NA_INTEGER == INT_MIN, tested in init.c xlow=mid; } else if (xval.i>ival.i) { // TO DO: is *(&xlow, &xupp)[0|1]=mid more efficient than branch? xupp=mid; } else { // xval.i == ival.i including NA_INTEGER==NA_INTEGER // branch mid to find start and end of this group in this column // TO DO?: not if mult=first|last and col<ncol-1 tmplow = mid; tmpupp = mid; while(tmplow<xupp-1) { mid = tmplow + (xupp-tmplow)/2; xval.i = INTEGER(xc)[XIND(mid)]; if (xval.i == ival.i) tmplow=mid; else xupp=mid; } while(xlow<tmpupp-1) { mid = xlow + (tmpupp-xlow)/2; xval.i = INTEGER(xc)[XIND(mid)]; if (xval.i == ival.i) tmpupp=mid; else xlow=mid; } // xlow and xupp now surround the group in xc, we only need this range for the next column break; } } tmplow = lir; tmpupp = lir; while(tmplow<iupp-1) { // TO DO: could double up from lir rather than halving from iupp mid = tmplow + (iupp-tmplow)/2; xval.i = INTEGER(ic)[ o ? o[mid]-1 : mid ]; // reuse xval to search in i if (xval.i == ival.i) tmplow=mid; else iupp=mid; } while(ilow<tmpupp-1) { mid = ilow + (tmpupp-ilow)/2; xval.i = INTEGER(ic)[ o ? o[mid]-1 : mid ]; if (xval.i == ival.i) tmpupp=mid; else ilow=mid; } // ilow and iupp now surround the group in ic, too break; case STRSXP : ival.s = ENC2UTF8(STRING_ELT(ic,ir)); while(xlow < xupp-1) { mid = xlow + (xupp-xlow)/2; xval.s = ENC2UTF8(STRING_ELT(xc, XIND(mid))); tmp = StrCmp(xval.s, ival.s); // uses pointer equality first, NA_STRING are allowed and joined to, then uses strcmp on CHAR(). if (tmp == 0) { // TO DO: deal with mixed encodings and locale optionally tmplow = mid; tmpupp = mid; while(tmplow<xupp-1) { mid = tmplow + (xupp-tmplow)/2; xval.s = ENC2UTF8(STRING_ELT(xc, XIND(mid))); if (ival.s == xval.s) tmplow=mid; else xupp=mid; // the == here handles encodings as well. Marked non-utf8 encodings are converted to utf-8 using ENC2UTF8. } while(xlow<tmpupp-1) { mid = xlow + (tmpupp-xlow)/2; xval.s = ENC2UTF8(STRING_ELT(xc, XIND(mid))); if (ival.s == xval.s) tmpupp=mid; else xlow=mid; // see above re == } break; } else if (tmp < 0) { xlow=mid; } else { xupp=mid; } } tmplow = lir; tmpupp = lir; while(tmplow<iupp-1) { mid = tmplow + (iupp-tmplow)/2; xval.s = ENC2UTF8(STRING_ELT(ic, o ? o[mid]-1 : mid)); if (xval.s == ival.s) tmplow=mid; else iupp=mid; // see above re == } while(ilow<tmpupp-1) { mid = ilow + (tmpupp-ilow)/2; xval.s = ENC2UTF8(STRING_ELT(ic, o ? o[mid]-1 : mid)); if (xval.s == ival.s) tmpupp=mid; else ilow=mid; // see above re == } break; case REALSXP : class = getAttrib(xc, R_ClassSymbol); twiddle = (isString(class) && STRING_ELT(class, 0)==char_integer64) ? &i64twiddle : &dtwiddle; ival.ll = twiddle(DATAPTR(ic), ir, 1); while(xlow < xupp-1) { mid = xlow + (xupp-xlow)/2; xval.ll = twiddle(DATAPTR(xc), XIND(mid), 1); if (xval.ll<ival.ll) { xlow=mid; } else if (xval.ll>ival.ll) { xupp=mid; } else { // xval.ll == ival.ll) tmplow = mid; tmpupp = mid; while(tmplow<xupp-1) { mid = tmplow + (xupp-tmplow)/2; xval.ll = twiddle(DATAPTR(xc), XIND(mid), 1); if (xval.ll == ival.ll) tmplow=mid; else xupp=mid; } while(xlow<tmpupp-1) { mid = xlow + (tmpupp-xlow)/2; xval.ll = twiddle(DATAPTR(xc), XIND(mid), 1); if (xval.ll == ival.ll) tmpupp=mid; else xlow=mid; } break; } } tmplow = lir; tmpupp = lir; while(tmplow<iupp-1) { mid = tmplow + (iupp-tmplow)/2; xval.ll = twiddle(DATAPTR(ic), o ? o[mid]-1 : mid, 1 ); if (xval.ll == ival.ll) tmplow=mid; else iupp=mid; } while(ilow<tmpupp-1) { mid = ilow + (tmpupp-ilow)/2; xval.ll = twiddle(DATAPTR(ic), o ? o[mid]-1 : mid, 1 ); if (xval.ll == ival.ll) tmpupp=mid; else ilow=mid; } break; default: error("Type '%s' not supported as key column", type2char(TYPEOF(xc))); } if (xlow<xupp-1) { // if value found, low and upp surround it, unlike standard binary search where low falls on it if (col<ncol-1) bmerge_r(xlow, xupp, ilow, iupp, col+1, 1, 1); // final two 1's are lowmax and uppmax else { int len = xupp-xlow-1; if (len>1) allLen1[0] = FALSE; for (j=ilow+1; j<iupp; j++) { // usually iterates once only for j=ir k = o ? o[j]-1 : j; retFirst[k] = xlow+2; // extra +1 for 1-based indexing at R level retLength[k]= len; } } } else if (roll!=0.0 && col==ncol-1) { // runs once per i row (not each search test), so not hugely time critical if (xlow != xupp-1 || xlow<xlowIn || xupp>xuppIn) error("Internal error: xlow!=xupp-1 || xlow<xlowIn || xupp>xuppIn"); if (rollToNearest) { // value of roll ignored currently when nearest if ( (!lowmax || xlow>xlowIn) && (!uppmax || xupp<xuppIn) ) { if ( ( TYPEOF(ic)==REALSXP && REAL(ic)[ir]-REAL(xc)[XIND(xlow)] <= REAL(xc)[XIND(xupp)]-REAL(ic)[ir] ) || ( TYPEOF(ic)<=INTSXP && INTEGER(ic)[ir]-INTEGER(xc)[XIND(xlow)] <= INTEGER(xc)[XIND(xupp)]-INTEGER(ic)[ir] )) { retFirst[ir] = xlow+1; retLength[ir] = 1; } else { retFirst[ir] = xupp+1; retLength[ir] = 1; } } else if (uppmax && xupp==xuppIn && rollends[1]) { retFirst[ir] = xlow+1; retLength[ir] = 1; } else if (lowmax && xlow==xlowIn && rollends[0]) { retFirst[ir] = xupp+1; retLength[ir] = 1; } } else { // fix for #1405 if (TYPEOF(ic) == REALSXP) { xvalupp.ll = REAL(xc)[XIND(xupp)]; xvallow.ll = REAL(xc)[XIND(xlow)]; ival.ll = REAL(ic)[ir]; } if ( ( (roll>0.0 && (!lowmax || xlow>xlowIn) && (xupp<xuppIn || !uppmax || rollends[1])) || (roll<0.0 && xupp==xuppIn && uppmax && rollends[1]) ) && ( (TYPEOF(ic)==REALSXP && ((double)(ival.ll-xvallow.ll)-rollabs<1e-6 || (double)(ival.ll-xvallow.ll) == rollabs)) // #1007 fix || (TYPEOF(ic)<=INTSXP && (double)(INTEGER(ic)[ir]-INTEGER(xc)[XIND(xlow)])-rollabs<1e-6 ) || (TYPEOF(ic)==STRSXP) )) { retFirst[ir] = xlow+1; retLength[ir] = 1; } else if ( ( (roll<0.0 && (!uppmax || xupp<xuppIn) && (xlow>xlowIn || !lowmax || rollends[0])) || (roll>0.0 && xlow==xlowIn && lowmax && rollends[0]) ) && ( (TYPEOF(ic)==REALSXP && ((double)(xvalupp.ll-ival.ll)-rollabs<1e-6 || (double)(xvalupp.ll-ival.ll) == rollabs)) // 1007 fix || (TYPEOF(ic)<=INTSXP && (double)(INTEGER(xc)[XIND(xupp)]-INTEGER(ic)[ir])-rollabs<1e-6 ) || (TYPEOF(ic)==STRSXP) )) { retFirst[ir] = xupp+1; // == xlow+2 retLength[ir] = 1; } } if (iupp-ilow > 2 && retFirst[ir]!=NA_INTEGER) { // >=2 equal values in the last column being rolling to the same point. for (j=ilow+1; j<iupp; j++) { // will rewrite retFirst[ir] to itself, but that's ok if (o) k=o[j]-1; else k=j; retFirst[k] = retFirst[ir]; retLength[k]= retLength[ir]; } } } if (ilow>ilowIn && (xlow>xlowIn || (roll!=0.0 && col==ncol-1))) bmerge_r(xlowIn, xlow+1, ilowIn, ilow+1, col, lowmax, uppmax && xlow+1==xuppIn); if (iupp<iuppIn && (xupp<xuppIn || (roll!=0.0 && col==ncol-1))) bmerge_r(xupp-1, xuppIn, iupp-1, iuppIn, col, lowmax && xupp-1==xlowIn, uppmax); }

SEXP bmerge(SEXP iArg, SEXP xArg, SEXP icolsArg, SEXP xcolsArg, SEXP isorted, SEXP rollarg, SEXP rollendsArg, SEXP nomatch, SEXP retFirstArg, SEXP retLengthArg, SEXP allLen1Arg) { int xN, iN, protecti=0; roll = 0.0; nearest = FALSE; enc_warn = TRUE; if (isString(rollarg)) { if (strcmp(CHAR(STRING_ELT(rollarg,0)),"nearest") != 0) error("roll is character but not 'nearest'"); roll=1.0; nearest=TRUE; // the 1.0 here is just any non-0.0, so roll!=0.0 can be used later } else { if (!isReal(rollarg)) error("Internal error: roll is not character or double"); roll = REAL(rollarg)[0]; // more common case (rolling forwards or backwards) or no roll when 0.0 } rollabs = fabs(roll); i = iArg; x = xArg; // set globals so bmerge_r can see them. if (!isInteger(icolsArg)) error("Internal error: icols is not integer vector"); if (!isInteger(xcolsArg)) error("Internal error: xcols is not integer vector"); if (LENGTH(icolsArg) > LENGTH(xcolsArg)) error("Internal error: length(icols) [%d] > length(xcols) [%d]", LENGTH(icolsArg), LENGTH(xcolsArg)); icols = INTEGER(icolsArg); xcols = INTEGER(xcolsArg); xN = LENGTH(VECTOR_ELT(x,0)); iN = LENGTH(VECTOR_ELT(i,0)); ncol = LENGTH(icolsArg); // there may be more sorted columns in x than involved in the join for(int col=0; col<ncol; col++) { if (icols[col]==NA_INTEGER) error("Internal error. icols[%d] is NA", col); if (xcols[col]==NA_INTEGER) error("Internal error. xcols[%d] is NA", col); if (icols[col]>LENGTH(i) || icols[col]<1) error("icols[%d]=%d outside range [1,length(i)=%d]", col, icols[col], LENGTH(i)); if (xcols[col]>LENGTH(x) || xcols[col]<1) error("xcols[%d]=%d outside range [1,length(x)=%d]", col, xcols[col], LENGTH(x)); int it = TYPEOF(VECTOR_ELT(i, icols[col]-1)); int xt = TYPEOF(VECTOR_ELT(x, xcols[col]-1)); if (it != xt) error("typeof x.%s (%s) != typeof i.%s (%s)", CHAR(STRING_ELT(getAttrib(x,R_NamesSymbol),xcols[col]-1)), type2char(xt), CHAR(STRING_ELT(getAttrib(i,R_NamesSymbol),icols[col]-1)), type2char(it)); } if (!isInteger(retFirstArg) || LENGTH(retFirstArg)!=iN) error("retFirst must be integer vector the same length as nrow(i)"); retFirst = INTEGER(retFirstArg); if (!isInteger(retLengthArg) || LENGTH(retLengthArg)!=iN) error("retLength must be integer vector the same length as nrow(i)"); retLength = INTEGER(retLengthArg); if (!isLogical(allLen1Arg) || LENGTH(allLen1Arg) != 1) error("allLen1 must be a length 1 logical vector"); allLen1 = LOGICAL(allLen1Arg); if (!isLogical(rollendsArg) || LENGTH(rollendsArg) != 2) error("rollends must be a length 2 logical vector"); rollends = LOGICAL(rollendsArg); if (nearest && TYPEOF(VECTOR_ELT(i, icols[ncol-1]-1))==STRSXP) error("roll='nearest' can't be applied to a character column, yet."); for (int j=0; j<iN; j++) { // defaults need to populated here as bmerge_r may well not touch many locations, say if the last row of i is before the first row of x. retFirst[j] = INTEGER(nomatch)[0]; // default to no match for NA goto below // retLength[j] = 0; // TO DO: do this to save the branch below and later branches at R level to set .N to 0 retLength[j] = INTEGER(nomatch)[0]==0 ? 0 : 1; } allLen1[0] = TRUE; // All-0 and All-NA are considered all length 1 according to R code currently. Really, it means any(length>1). o = NULL; if (!LOGICAL(isorted)[0]) { SEXP order = PROTECT(vec_init(length(icolsArg), ScalarInteger(1))); // rep(1, length(icolsArg)) SEXP oSxp = PROTECT(forder(i, icolsArg, ScalarLogical(FALSE), ScalarLogical(TRUE), order, ScalarLogical(FALSE))); protecti += 2; if (!LENGTH(oSxp)) o = NULL; else o = INTEGER(oSxp); } if (iN) bmerge_r(-1,xN,-1,iN,0,1,1); UNPROTECT(protecti); return(R_NilValue); }

/* C-level interface to unique_arcs. */ SEXP c_unique_arcs(SEXP arcs, SEXP nodes, int warnlevel) { int i = 0, j = 0, k = 0, nrows = 0, uniq_rows = 0, n = length(nodes); int *checklist = NULL; SEXP result, try, node, dup; if (isNull(arcs)) { /* use NULL as a special jolly value which returns all possible arcs * given the specified node ordering. */ nrows = n * (n - 1)/2; /* allocate the return value. */ PROTECT(result = allocMatrix(STRSXP, nrows, 2)); /* fill in the nodes' labels. */ for (i = 0; i < n; i++) { node = STRING_ELT(nodes, i); for (j = i + 1; j < n; j++) { SET_STRING_ELT(result, CMC(k, 0, nrows), node); SET_STRING_ELT(result, CMC(k, 1, nrows), STRING_ELT(nodes, j)); k++; }/*FOR*/ }/*FOR*/ }/*THEN*/ else if (length(arcs) == 0) { /* the arc set is empty, nothing to do. */ return arcs; }/*THEN*/ else { /* there really is a non-empty arc set, process it. */ nrows = length(arcs)/2; /* match the node labels in the arc set. */ PROTECT(try = arc_hash(arcs, nodes, FALSE, FALSE)); /* check which are duplicated. */ PROTECT(dup = duplicated(try, FALSE)); checklist = INTEGER(dup); /* count how many are not. */ for (i = 0; i < nrows; i++) if (checklist[i] == 0) uniq_rows++; /* if there is no duplicate arc simply return the original arc set. */ if (uniq_rows == nrows) { UNPROTECT(2); return arcs; }/*THEN*/ else { /* warn the user if told to do so. */ if (warnlevel > 0) warning("removed %d duplicate arcs.", nrows - uniq_rows); /* allocate and initialize the return value. */ PROTECT(result = allocMatrix(STRSXP, uniq_rows, 2)); /* store the correct arcs in the return value. */ for (i = 0, k = 0; i < nrows; i++) { if (checklist[i] == 0) { SET_STRING_ELT(result, k, STRING_ELT(arcs, i)); SET_STRING_ELT(result, k + uniq_rows, STRING_ELT(arcs, i + nrows)); k++; }/*THEN*/ }/*FOR*/ }/*ELSE*/ }/*ELSE*/ /* allocate, initialize and set the column names. */ finalize_arcs(result); if (uniq_rows == 0) UNPROTECT(1); else UNPROTECT(3); return result; }/*C_UNIQUE_ARCS*/

/* check neighbourhood sets and markov blankets for consistency.. */ SEXP bn_recovery(SEXP bn, SEXP strict, SEXP mb, SEXP debug, SEXP filter) { int i = 0, j = 0, k = 0, n = 0, counter = 0; short int *checklist = NULL, err = 0; int *debuglevel = NULL, *checkmb = NULL, *nbrfilter = NULL; SEXP temp, temp2, nodes, elnames = NULL, fixed; /* get the names of the nodes. */ nodes = getAttrib(bn, R_NamesSymbol); n = LENGTH(nodes); /* allocate and initialize the checklist. */ checklist = allocstatus(UPTRI_MATRIX(n)); /* dereference the debug, mb and filter parameters. */ debuglevel = LOGICAL(debug); checkmb = LOGICAL(mb); nbrfilter = INTEGER(filter); if (*debuglevel > 0) { Rprintf("----------------------------------------------------------------\n"); if (*checkmb) Rprintf("* checking consistency of markov blankets.\n"); else Rprintf("* checking consistency of neighbourhood sets.\n"); }/*THEN*/ /* scan the structure to determine the number of arcs. */ for (i = 0; i < n; i++) { if (*debuglevel > 0) Rprintf(" > checking node %s.\n", NODE(i)); /* get the entry for the (neighbours|elements of the markov blanket) of the node.*/ temp = getListElement(bn, (char *)NODE(i)); if (!(*checkmb)) temp = getListElement(temp, "nbr"); /* check each element of the array and identify which variable it corresponds to. */ for (j = 0; j < LENGTH(temp); j++) { for (k = 0; k < n; k++) { /* increment the right element of checklist. */ if (!strcmp(NODE(k), (char *)CHAR(STRING_ELT(temp, j)))) checklist[UPTRI(i + 1, k + 1, n)]++; }/*FOR*/ }/*FOR*/ }/*FOR*/ /* if A is a neighbour of B, B is a neighbour of A; therefore each entry in * the checklist array must be equal to either zero (if the corresponding * nodes are not neighbours) or two (if the corresponding nodes are neighbours). * Any other value (typically one) is caused by an incorrect (i.e. asymmetric) * neighbourhood structure. The same logic holds for the markov blankets. */ for (i = 0; i < n; i++) for (j = i; j < n; j++) { if ((checklist[UPTRI(i + 1, j + 1, n)] != 0) && (checklist[UPTRI(i + 1, j + 1, n)] != 2)) { if (*debuglevel > 0) { if (*checkmb) Rprintf("@ asymmetry in the markov blankets for %s and %s.\n", NODE(i), NODE(j)); else Rprintf("@ asymmetry in the neighbourhood sets for %s and %s.\n", NODE(i), NODE(j)); }/*THEN*/ err = 1; }/*THEN*/ }/*FOR*/ /* no need to go on if the (neighbourhood sets|markov blankets) are symmetric; * otherwise throw either an error or a warning according to the value of the * strict parameter. */ if (!err) { return bn; }/*THEN*/ else if (isTRUE(strict)) { if (*checkmb) error("markov blankets are not symmetric.\n"); else error("neighbourhood sets are not symmetric.\n"); }/*THEN*/ else { if (*checkmb) warning("markov blankets are not symmetric.\n"); else warning("neighbourhood sets are not symmetric.\n"); }/*ELSE*/ /* build a correct structure to return. */ PROTECT(fixed = allocVector(VECSXP, n)); setAttrib(fixed, R_NamesSymbol, nodes); if (!(*checkmb)) { /* allocate colnames. */ PROTECT(elnames = allocVector(STRSXP, 2)); SET_STRING_ELT(elnames, 0, mkChar("mb")); SET_STRING_ELT(elnames, 1, mkChar("nbr")); }/*THEN*/ for (i = 0; i < n; i++) { if (!(*checkmb)) { /* allocate the "mb" and "nbr" elements of the node. */ PROTECT(temp = allocVector(VECSXP, 2)); SET_VECTOR_ELT(fixed, i, temp); setAttrib(temp, R_NamesSymbol, elnames); /* copy the "mb" part from the old structure. */ temp2 = getListElement(bn, (char *)NODE(i)); temp2 = getListElement(temp2, "mb"); SET_VECTOR_ELT(temp, 0, temp2); }/*THEN*/ /* fix the neighbourhoods with an AND or an OR filter. * AND means that both neighbours have to see each other * to be neighbours, OR means that one neighbour at least * as to see the other one for both to be neighbours. */ switch(*nbrfilter) { case AND_FILTER: /* rescan the checklist. */ for (j = 0; j < n; j++) if (checklist[UPTRI(i + 1, j + 1, n)] == 2) if (i != j) counter++; /* allocate and fill the "nbr" element. */ PROTECT(temp2 = allocVector(STRSXP, counter)); for (j = 0; j < n; j++) if (checklist[UPTRI(i + 1, j + 1, n)] == 2) if (i != j) SET_STRING_ELT(temp2, --counter, STRING_ELT(nodes, j)); break; case OR_FILTER: /* rescan the checklist. */ for (j = 0; j < n; j++) if (checklist[UPTRI(i + 1, j + 1, n)] >= 1) if (i != j) counter++; /* allocate and fill the "nbr" element. */ PROTECT(temp2 = allocVector(STRSXP, counter)); for (j = 0; j < n; j++) if (checklist[UPTRI(i + 1, j + 1, n)] >= 1) if (i != j) SET_STRING_ELT(temp2, --counter, STRING_ELT(nodes, j)); break; } if (*checkmb) { SET_VECTOR_ELT(fixed, i, temp2); UNPROTECT(1); }/*THEN*/ else { SET_VECTOR_ELT(temp, 1, temp2); UNPROTECT(2); }/*ELSE*/ }/*FOR*/ if (*checkmb) UNPROTECT(1); else UNPROTECT(2); return fixed; }/*BN_RECOVERY*/

std::string RChar2String(SEXP str) { return string(CHAR(STRING_ELT(str, 0))); }

SEXP extract_col (SEXP x, SEXP j, SEXP drop, SEXP first_, SEXP last_) { SEXP result, index, new_index; int nrs, nrsx, i, ii, jj, first, last; nrsx = nrows(x); first = asInteger(first_)-1; last = asInteger(last_)-1; /* nrs = offset_end - offset_start - 1; */ nrs = last - first + 1; PROTECT(result = allocVector(TYPEOF(x), nrs * length(j))); switch(TYPEOF(x)) { case REALSXP: for(i=0; i<length(j); i++) { /* Rprintf("j + i*nrs + first=%i\n", (int)(INTEGER(j)[i]-1 + i*nrs + first)); Rprintf("i=%i, j=%i, nrs=%i, first=%i\n", i, INTEGER(j)[i]-1, nrs, first); */ if(INTEGER(j)[i] == NA_INTEGER) { for(ii=0; ii < nrs; ii++) { REAL(result)[(i*nrs) + ii] = NA_REAL; } } else { memcpy(&(REAL(result)[i*nrs]), &(REAL(x)[(INTEGER(j)[i]-1)*nrsx + first]), nrs*sizeof(double)); } } break; case INTSXP: for(i=0; i<length(j); i++) { if(INTEGER(j)[i] == NA_INTEGER) { for(ii=0; ii < nrs; ii++) { INTEGER(result)[(i*nrs) + ii] = NA_INTEGER; } } else { memcpy(&(INTEGER(result)[i*nrs]), &(INTEGER(x)[(INTEGER(j)[i]-1)*nrsx + first]), nrs*sizeof(int)); } } break; case LGLSXP: for(i=0; i<length(j); i++) { if(INTEGER(j)[i] == NA_INTEGER) { for(ii=0; ii < nrs; ii++) { LOGICAL(result)[(i*nrs) + ii] = NA_LOGICAL; } } else { memcpy(&(LOGICAL(result)[i*nrs]), &(LOGICAL(x)[(INTEGER(j)[i]-1)*nrsx + first]), nrs*sizeof(int)); } } break; case CPLXSXP: for(i=0; i<length(j); i++) { if(INTEGER(j)[i] == NA_INTEGER) { for(ii=0; ii < nrs; ii++) { COMPLEX(result)[(i*nrs) + ii].r = NA_REAL; COMPLEX(result)[(i*nrs) + ii].i = NA_REAL; } } else { memcpy(&(COMPLEX(result)[i*nrs]), &(COMPLEX(x)[(INTEGER(j)[i]-1)*nrsx + first]), nrs*sizeof(Rcomplex)); } } break; case RAWSXP: for(i=0; i<length(j); i++) { if(INTEGER(j)[i] == NA_INTEGER) { for(ii=0; ii < nrs; ii++) { RAW(result)[(i*nrs) + ii] = 0; } } else { memcpy(&(RAW(result)[i*nrs]), &(RAW(x)[(INTEGER(j)[i]-1)*nrsx + first]), nrs*sizeof(Rbyte)); } } break; case STRSXP: for(jj=0; jj<length(j); jj++) { if(INTEGER(j)[jj] == NA_INTEGER) { for(i=0; i< nrs; i++) SET_STRING_ELT(result, i+jj*nrs, NA_STRING); } else { for(i=0; i< nrs; i++) SET_STRING_ELT(result, i+jj*nrs, STRING_ELT(x, i+(INTEGER(j)[jj]-1)*nrsx+first)); } } break; default: error("unsupported type"); } if(nrs != nrows(x)) { copyAttributes(x, result); /* subset index */ index = getAttrib(x, install("index")); PROTECT(new_index = allocVector(TYPEOF(index), nrs)); if(TYPEOF(index) == REALSXP) { memcpy(REAL(new_index), &(REAL(index)[first]), nrs*sizeof(double)); } else { /* INTSXP */ memcpy(INTEGER(new_index), &(INTEGER(index)[first]), nrs*sizeof(int)); } copyMostAttrib(index, new_index); setAttrib(result, install("index"), new_index); UNPROTECT(1); } else { copyMostAttrib(x, result); /* need an xts/zoo equal that skips 'index' */ } if(!asLogical(drop)) { /* keep dimension and dimnames */ SEXP dim; PROTECT(dim = allocVector(INTSXP, 2)); INTEGER(dim)[0] = nrs; INTEGER(dim)[1] = length(j); setAttrib(result, R_DimSymbol, dim); UNPROTECT(1); SEXP dimnames, currentnames, newnames; PROTECT(dimnames = allocVector(VECSXP, 2)); PROTECT(newnames = allocVector(STRSXP, length(j))); currentnames = getAttrib(x, R_DimNamesSymbol); if(!isNull(currentnames)) { SET_VECTOR_ELT(dimnames, 0, VECTOR_ELT(currentnames,0)); if(!isNull(VECTOR_ELT(currentnames,1))) { /* if colnames isn't NULL set */ for(i=0; i<length(j); i++) { SET_STRING_ELT(newnames, i, STRING_ELT(VECTOR_ELT(currentnames,1), INTEGER(j)[i]-1)); } SET_VECTOR_ELT(dimnames, 1, newnames); } else { /* else set to NULL */ SET_VECTOR_ELT(dimnames, 1, R_NilValue); } setAttrib(result, R_DimNamesSymbol, dimnames); } UNPROTECT(2); } UNPROTECT(1); return result; }

SEXP gbm ( SEXP radY, // outcome or response SEXP radOffset, // offset for f(x), NA for no offset SEXP radX, SEXP raiXOrder, SEXP radWeight, SEXP radMisc, // other row specific data (eg failure time), NA=no Misc SEXP rcRows, SEXP rcCols, SEXP racVarClasses, SEXP ralMonotoneVar, SEXP rszFamily, SEXP rcTrees, SEXP rcDepth, // interaction depth SEXP rcMinObsInNode, SEXP rdShrinkage, SEXP rdBagFraction, SEXP rcTrain, SEXP radFOld, SEXP rcCatSplitsOld, SEXP rcTreesOld, SEXP rfVerbose ) { unsigned long hr = 0; SEXP rAns = NULL; SEXP rNewTree = NULL; SEXP riSplitVar = NULL; SEXP rdSplitPoint = NULL; SEXP riLeftNode = NULL; SEXP riRightNode = NULL; SEXP riMissingNode = NULL; SEXP rdErrorReduction = NULL; SEXP rdWeight = NULL; SEXP rdPred = NULL; SEXP rdInitF = NULL; SEXP radF = NULL; SEXP radTrainError = NULL; SEXP radValidError = NULL; SEXP radOOBagImprove = NULL; SEXP rSetOfTrees = NULL; SEXP rSetSplitCodes = NULL; SEXP rSplitCode = NULL; VEC_VEC_CATEGORIES vecSplitCodes; int i = 0; int iT = 0; int cTrees = INTEGER(rcTrees)[0]; const int cResultComponents = 7; // rdInitF, radF, radTrainError, radValidError, radOOBagImprove // rSetOfTrees, rSetSplitCodes const int cTreeComponents = 8; // riSplitVar, rdSplitPoint, riLeftNode, // riRightNode, riMissingNode, rdErrorReduction, rdWeight, rdPred int cNodes = 0; int cTrain = INTEGER(rcTrain)[0]; double dTrainError = 0.0; double dValidError = 0.0; double dOOBagImprove = 0.0; CGBM *pGBM = NULL; CDataset *pData = NULL; CDistribution *pDist = NULL; // set up the dataset pData = new CDataset(); if(pData==NULL) { hr = GBM_OUTOFMEMORY; goto Error; } // initialize R's random number generator GetRNGstate(); // initialize some things hr = gbm_setup(REAL(radY), REAL(radOffset), REAL(radX), INTEGER(raiXOrder), REAL(radWeight), REAL(radMisc), INTEGER(rcRows)[0], INTEGER(rcCols)[0], INTEGER(racVarClasses), INTEGER(ralMonotoneVar), CHAR(STRING_ELT(rszFamily,0)), INTEGER(rcTrees)[0], INTEGER(rcDepth)[0], INTEGER(rcMinObsInNode)[0], REAL(rdShrinkage)[0], REAL(rdBagFraction)[0], INTEGER(rcTrain)[0], pData, pDist); if(GBM_FAILED(hr)) { goto Error; } // allocate the GBM pGBM = new CGBM(); if(pGBM==NULL) { hr = GBM_OUTOFMEMORY; goto Error; } // initialize the GBM hr = pGBM->Initialize(pData, pDist, REAL(rdShrinkage)[0], cTrain, REAL(rdBagFraction)[0], INTEGER(rcDepth)[0], INTEGER(rcMinObsInNode)[0]); if(GBM_FAILED(hr)) { goto Error; } // allocate the main return object PROTECT(rAns = allocVector(VECSXP, cResultComponents)); // allocate the initial value PROTECT(rdInitF = allocVector(REALSXP, 1)); SET_VECTOR_ELT(rAns,0,rdInitF); UNPROTECT(1); // rdInitF // allocate the predictions PROTECT(radF = allocVector(REALSXP, pData->cRows)); SET_VECTOR_ELT(rAns,1,radF); UNPROTECT(1); // radF if(ISNA(REAL(radFOld)[0])) // check for old predictions { // set the initial value of F as a constant hr = pDist->InitF(pData->adY, pData->adMisc, pData->adOffset, pData->adWeight, REAL(rdInitF)[0], cTrain); for(i=0; i < pData->cRows; i++) { REAL(radF)[i] = REAL(rdInitF)[0]; } } else { for(i=0; i < pData->cRows; i++) { REAL(radF)[i] = REAL(radFOld)[i]; } } // allocate space for the performance measures PROTECT(radTrainError = allocVector(REALSXP, cTrees)); PROTECT(radValidError = allocVector(REALSXP, cTrees)); PROTECT(radOOBagImprove = allocVector(REALSXP, cTrees)); SET_VECTOR_ELT(rAns,2,radTrainError); SET_VECTOR_ELT(rAns,3,radValidError); SET_VECTOR_ELT(rAns,4,radOOBagImprove); UNPROTECT(3); // radTrainError , radValidError, radOOBagImprove // allocate the component for the tree structures PROTECT(rSetOfTrees = allocVector(VECSXP, cTrees)); SET_VECTOR_ELT(rAns,5,rSetOfTrees); UNPROTECT(1); // rSetOfTrees if(INTEGER(rfVerbose)[0]) { Rprintf("Iter TrainDeviance ValidDeviance StepSize Improve\n"); } for(iT=0; iT<cTrees; iT++) { hr = pGBM->iterate(REAL(radF), dTrainError,dValidError,dOOBagImprove, cNodes); if(GBM_FAILED(hr)) { goto Error; } // store the performance measures REAL(radTrainError)[iT] = dTrainError; REAL(radValidError)[iT] = dValidError; REAL(radOOBagImprove)[iT] = dOOBagImprove; // allocate the new tree component for the R list structure PROTECT(rNewTree = allocVector(VECSXP, cTreeComponents)); // riNodeID,riSplitVar,rdSplitPoint,riLeftNode, // riRightNode,riMissingNode,rdErrorReduction,rdWeight PROTECT(riSplitVar = allocVector(INTSXP, cNodes)); PROTECT(rdSplitPoint = allocVector(REALSXP, cNodes)); PROTECT(riLeftNode = allocVector(INTSXP, cNodes)); PROTECT(riRightNode = allocVector(INTSXP, cNodes)); PROTECT(riMissingNode = allocVector(INTSXP, cNodes)); PROTECT(rdErrorReduction = allocVector(REALSXP, cNodes)); PROTECT(rdWeight = allocVector(REALSXP, cNodes)); PROTECT(rdPred = allocVector(REALSXP, cNodes)); SET_VECTOR_ELT(rNewTree,0,riSplitVar); SET_VECTOR_ELT(rNewTree,1,rdSplitPoint); SET_VECTOR_ELT(rNewTree,2,riLeftNode); SET_VECTOR_ELT(rNewTree,3,riRightNode); SET_VECTOR_ELT(rNewTree,4,riMissingNode); SET_VECTOR_ELT(rNewTree,5,rdErrorReduction); SET_VECTOR_ELT(rNewTree,6,rdWeight); SET_VECTOR_ELT(rNewTree,7,rdPred); UNPROTECT(cTreeComponents); SET_VECTOR_ELT(rSetOfTrees,iT,rNewTree); UNPROTECT(1); // rNewTree hr = gbm_transfer_to_R(pGBM, vecSplitCodes, INTEGER(riSplitVar), REAL(rdSplitPoint), INTEGER(riLeftNode), INTEGER(riRightNode), INTEGER(riMissingNode), REAL(rdErrorReduction), REAL(rdWeight), REAL(rdPred), INTEGER(rcCatSplitsOld)[0]); if((iT <= 9) || ((iT+1+INTEGER(rcTreesOld)[0])/100 == (iT+1+INTEGER(rcTreesOld)[0])/100.0) || (iT==cTrees-1)) { R_CheckUserInterrupt(); if(INTEGER(rfVerbose)[0]) { Rprintf("%6d %13.4f %15.4f %10.4f %9.4f\n", iT+1+INTEGER(rcTreesOld)[0], REAL(radTrainError)[iT], REAL(radValidError)[iT], REAL(rdShrinkage)[0], REAL(radOOBagImprove)[iT]); } } } if(INTEGER(rfVerbose)[0]) Rprintf("\n"); // transfer categorical splits to R PROTECT(rSetSplitCodes = allocVector(VECSXP, vecSplitCodes.size())); SET_VECTOR_ELT(rAns,6,rSetSplitCodes); UNPROTECT(1); // rSetSplitCodes for(i=0; i<(int)vecSplitCodes.size(); i++) { PROTECT(rSplitCode = allocVector(INTSXP, size_of_vector(vecSplitCodes,i))); SET_VECTOR_ELT(rSetSplitCodes,i,rSplitCode); UNPROTECT(1); // rSplitCode hr = gbm_transfer_catsplits_to_R(i, vecSplitCodes, INTEGER(rSplitCode)); } // dump random number generator seed #ifdef NOISY_DEBUG Rprintf("PutRNGstate\n"); #endif PutRNGstate(); Cleanup: UNPROTECT(1); // rAns #ifdef NOISY_DEBUG Rprintf("destructing\n"); #endif if(pGBM != NULL) { delete pGBM; pGBM = NULL; } if(pDist != NULL) { delete pDist; pDist = NULL; } if(pData != NULL) { delete pData; pData = NULL; } return rAns; Error: goto Cleanup; }

/** Matrix products of dense triangular Matrices * * @param a triangular matrix of class "dtrMatrix" * @param b ( ditto ) * @param right logical, if true, compute b %*% a, else a %*% b * @param trans logical, if true, "transpose a", i.e., use t(a), otherwise a * * @return the matrix product, one of a %*% b, t(a) %*% b, b %*% a, or b %*% t(a) * depending on (right, trans) = (F, F) (F, T) (T, F) (T, T) */ SEXP dtrMatrix_dtrMatrix_mm(SEXP a, SEXP b, SEXP right, SEXP trans) { /* called from "%*%" : (x,y, FALSE,FALSE), crossprod() : (x,y, FALSE, TRUE) , and tcrossprod(): (y,x, TRUE , TRUE) * - * TWO cases : (1) result is triangular <=> uplo's "match" (i.e., non-equal iff trans) * === (2) result is "general" */ SEXP val,/* = in case (2): PROTECT(dup_mMatrix_as_dgeMatrix(b)); */ d_a = GET_SLOT(a, Matrix_DimSym), uplo_a = GET_SLOT(a, Matrix_uploSym), diag_a = GET_SLOT(a, Matrix_diagSym), uplo_b = GET_SLOT(b, Matrix_uploSym), diag_b = GET_SLOT(b, Matrix_diagSym); int rt = asLogical(right); int tr = asLogical(trans); int *adims = INTEGER(d_a), n = adims[0]; double *valx = (double *) NULL /*Wall*/; const char *uplo_a_ch = CHAR(STRING_ELT(uplo_a, 0)), /* = uplo_P(a) */ *diag_a_ch = CHAR(STRING_ELT(diag_a, 0)), /* = diag_P(a) */ *uplo_b_ch = CHAR(STRING_ELT(uplo_b, 0)), /* = uplo_P(b) */ *diag_b_ch = CHAR(STRING_ELT(diag_b, 0)); /* = diag_P(b) */ Rboolean same_uplo = (*uplo_a_ch == *uplo_b_ch), matching_uplo = tr ? (!same_uplo) : same_uplo, uDiag_b = /* -Wall: */ FALSE; if (INTEGER(GET_SLOT(b, Matrix_DimSym))[0] != n) /* validity checking already "assures" square matrices ... */ error(_("\"dtrMatrix\" objects in '%*%' must have matching (square) dimension")); if(matching_uplo) { /* ==> result is triangular -- "dtrMatrix" ! * val := dup_mMatrix_as_dtrMatrix(b) : */ int sz = n * n; val = PROTECT(NEW_OBJECT(MAKE_CLASS("dtrMatrix"))); SET_SLOT(val, Matrix_uploSym, duplicate(uplo_b)); SET_SLOT(val, Matrix_DimSym, duplicate(d_a)); SET_DimNames(val, b); valx = REAL(ALLOC_SLOT(val, Matrix_xSym, REALSXP, sz)); Memcpy(valx, REAL(GET_SLOT(b, Matrix_xSym)), sz); if((uDiag_b = (*diag_b_ch == 'U'))) { /* unit-diagonal b - may contain garbage in diagonal */ for (int i = 0; i < n; i++) valx[i * (n+1)] = 1.; } } else { /* different "uplo" ==> result is "dgeMatrix" ! */ val = PROTECT(dup_mMatrix_as_dgeMatrix(b)); SEXP dn_a = GET_SLOT( a , Matrix_DimNamesSym), dn = GET_SLOT(val, Matrix_DimNamesSym); /* matrix product a %*% b, t(a) %*% b, b %*% a, or b %*% t(a) * (right, trans) = (F, F) (F, T) (T, F) (T, T) * set:from_a = 0:0 0:1 1:1 1:0 */ SET_VECTOR_ELT(dn, rt ? 1 : 0, VECTOR_ELT(dn_a, (rt+tr) % 2)); } if (n >= 1) { double alpha = 1.; /* Level 3 BLAS - DTRMM(): Compute one of the matrix multiplication operations * B := alpha*op( A )*B ["L"], or B := alpha*B*op( A ) ["R"], * where trans_A determines op(A):= A "N"one or * op(A):= t(A) "T"ransposed */ F77_CALL(dtrmm)(rt ? "R" : "L", uplo_a_ch, /*trans_A = */ tr ? "T" : "N", diag_a_ch, &n, &n, &alpha, REAL(GET_SLOT(a, Matrix_xSym)), adims, REAL(GET_SLOT(val, Matrix_xSym)), &n); } if(matching_uplo) { make_d_matrix_triangular(valx, tr ? b : a); /* set "other triangle" to 0 */ if(*diag_a_ch == 'U' && uDiag_b) /* result remains uni-diagonal */ SET_SLOT(val, Matrix_diagSym, duplicate(diag_a)); } UNPROTECT(1); return val; }

SEXP r_strptr(SEXP x) { return R_MakeExternalPtr( (void*) CHAR(STRING_ELT(x, 0)), R_NilValue, x ); }

static SEXP scanVector(SEXPTYPE type, int maxitems, int maxlines, int flush, SEXP stripwhite, int blskip, LocalData *d) { SEXP ans, bns; int blocksize, c, i, n, linesread, nprev,strip, bch; char *buffer; R_StringBuffer strBuf = {NULL, 0, MAXELTSIZE}; if (maxitems > 0) blocksize = maxitems; else blocksize = SCAN_BLOCKSIZE; R_AllocStringBuffer(0, &strBuf); PROTECT(ans = allocVector(type, blocksize)); nprev = 0; n = 0; linesread = 0; bch = 1; if (d->ttyflag) sprintf(ConsolePrompt, "1: "); strip = asLogical(stripwhite); for (;;) { if(n % 10000 == 9999) R_CheckUserInterrupt(); if (bch == R_EOF) { if (d->ttyflag) R_ClearerrConsole(); break; } else if (bch == '\n') { linesread++; if (linesread == maxlines) break; if (d->ttyflag) sprintf(ConsolePrompt, "%d: ", n + 1); nprev = n; } if (n == blocksize) { /* enlarge the vector*/ bns = ans; if(blocksize > INT_MAX/2) error(_("too many items")); blocksize = 2 * blocksize; ans = allocVector(type, blocksize); UNPROTECT(1); PROTECT(ans); copyVector(ans, bns); } buffer = fillBuffer(type, strip, &bch, d, &strBuf); if (nprev == n && strlen(buffer)==0 && ((blskip && bch =='\n') || bch == R_EOF)) { if (d->ttyflag || bch == R_EOF) break; } else { extractItem(buffer, ans, n, d); if (++n == maxitems) { if (d->ttyflag && bch != '\n') { /* MBCS-safe */ while ((c = scanchar(FALSE, d)) != '\n') ; } break; } } if (flush && (bch != '\n') && (bch != R_EOF)) { /* MBCS-safe */ while ((c = scanchar(FALSE, d)) != '\n' && (c != R_EOF)); bch = c; } } if (!d->quiet) REprintf("Read %d item%s\n", n, (n == 1) ? "" : "s"); if (d->ttyflag) ConsolePrompt[0] = '\0'; if (n == 0) { UNPROTECT(1); R_FreeStringBuffer(&strBuf); return allocVector(type,0); } if (n == maxitems) { UNPROTECT(1); R_FreeStringBuffer(&strBuf); return ans; } bns = allocVector(type, n); switch (type) { case LGLSXP: case INTSXP: for (i = 0; i < n; i++) INTEGER(bns)[i] = INTEGER(ans)[i]; break; case REALSXP: for (i = 0; i < n; i++) REAL(bns)[i] = REAL(ans)[i]; break; case CPLXSXP: for (i = 0; i < n; i++) COMPLEX(bns)[i] = COMPLEX(ans)[i]; break; case STRSXP: for (i = 0; i < n; i++) SET_STRING_ELT(bns, i, STRING_ELT(ans, i)); break; case RAWSXP: for (i = 0; i < n; i++) RAW(bns)[i] = RAW(ans)[i]; break; default: UNIMPLEMENTED_TYPEt("scanVector", type); } UNPROTECT(1); R_FreeStringBuffer(&strBuf); return bns; }

/** * Add a note for an object * * @param repo S4 class git_repository * @param sha The sha string of object * @param commit S4 class git_commit * @param message Content of the note to add * @param ref Canonical name of the reference to use * @param author Signature of the notes note author * @param committer Signature of the notes note committer * @param force Overwrite existing note * @return S4 class git_note */ SEXP git2r_note_create( SEXP repo, SEXP sha, SEXP message, SEXP ref, SEXP author, SEXP committer, SEXP force) { int err; SEXP result = R_NilValue; int overwrite = 0; git_oid note_oid; git_oid object_oid; git_signature *sig_author = NULL; git_signature *sig_committer = NULL; git_repository *repository = NULL; if (git2r_arg_check_sha(sha)) git2r_error(git2r_err_sha_arg, __func__, "sha"); if (git2r_arg_check_string(message)) git2r_error(git2r_err_string_arg, __func__, "message"); if (git2r_arg_check_string(ref)) git2r_error(git2r_err_string_arg, __func__, "ref"); if (git2r_arg_check_signature(author)) git2r_error(git2r_err_signature_arg, __func__, "author"); if (git2r_arg_check_signature(committer)) git2r_error(git2r_err_signature_arg, __func__, "committer"); if (git2r_arg_check_logical(force)) git2r_error(git2r_err_logical_arg, __func__, "force"); repository = git2r_repository_open(repo); if (!repository) git2r_error(git2r_err_invalid_repository, __func__, NULL); err = git2r_signature_from_arg(&sig_author, author); if (GIT_OK != err) goto cleanup; err = git2r_signature_from_arg(&sig_committer, committer); if (GIT_OK != err) goto cleanup; err = git_oid_fromstr(&object_oid, CHAR(STRING_ELT(sha, 0))); if (GIT_OK != err) goto cleanup; if (LOGICAL(force)[0]) overwrite = 1; err = git_note_create( ¬e_oid, repository, CHAR(STRING_ELT(ref, 0)), sig_author, sig_committer, &object_oid, CHAR(STRING_ELT(message, 0)), overwrite); if (GIT_OK != err) goto cleanup; PROTECT(result = NEW_OBJECT(MAKE_CLASS("git_note"))); err = git2r_note_init(¬e_oid, &object_oid, repository, CHAR(STRING_ELT(ref, 0)), repo, result); cleanup: if (sig_author) git_signature_free(sig_author); if (sig_committer) git_signature_free(sig_committer); if (repository) git_repository_free(repository); if (R_NilValue != result) UNPROTECT(1); if (GIT_OK != err) git2r_error(git2r_err_from_libgit2, __func__, giterr_last()->message); return result; }

/* backend to compute the cached values fro a single node. */ static SEXP cache_node_structure(int cur, SEXP nodes, int *amat, int nrows, int *status, int debuglevel) { int i = 0, j = 0; int num_parents = 0, num_children = 0, num_neighbours = 0, num_blanket = 0; SEXP structure, names, mb, nbr, children, parents; if (debuglevel > 0) Rprintf("* node %s.\n", NODE(cur)); for (i = 0; i < nrows; i++) { if (amat[CMC(cur, i, nrows)] == 1) { if (amat[CMC(i, cur, nrows)] == 0) { /* if a[i,j] = 1 and a[j,i] = 0, then i -> j. */ if (debuglevel > 0) Rprintf(" > found child %s.\n", NODE(i)); status[i] = CHILD; /* check whether this child has any other parent. */ for (j = 0; j < nrows; j++) { if ((amat[CMC(j, i, nrows)] == 1) && (amat[CMC(i, j, nrows)] == 0) && (j != cur)) { /* don't mark a neighbour as in the markov blanket. */ if (status[j] <= 1) { status[j] = BLANKET; if (debuglevel > 0) Rprintf(" > found node %s in markov blanket.\n", NODE(j)); }/*THEN*/ }/*THEN*/ }/*FOR*/ }/*THEN*/ else { /* if a[i,j] = 1 and a[j,i] = 1, then i -- j. */ if (debuglevel > 0) Rprintf(" > found neighbour %s.\n", NODE(i)); status[i] = NEIGHBOUR; }/*ELSE*/ }/*THEN*/ else { if (amat[CMC(i, cur, nrows)] == 1) { /* if a[i,j] = 0 and a[j,i] = 1, then i <- j. */ if (debuglevel > 0) Rprintf(" > found parent %s.\n", NODE(i)); status[i] = PARENT; }/*THEN*/ }/*ELSE*/ }/*FOR*/ /* count how may nodes fall in each category. */ for (i = 0; i < nrows; i++) { switch(status[i]) { case CHILD: /* a child is also a neighbour and belongs into the markov blanket. */ num_children++; num_neighbours++; num_blanket++; break; case PARENT: /* the same goes for a parent. */ num_parents++; num_neighbours++; num_blanket++; break; case NEIGHBOUR: /* it's not known if this is parent or a children, but it's certainly a neighbour. */ num_neighbours++; num_blanket++; break; case BLANKET: num_blanket++; break; default: /* this node is not even in the markov blanket. */ break; }/*SWITCH*/ }/*FOR*/ if (debuglevel > 0) Rprintf(" > node %s has %d parent(s), %d child(ren), %d neighbour(s) and %d nodes in the markov blanket.\n", NODE(cur), num_parents, num_children, num_neighbours, num_blanket); /* allocate and initialize the names of the elements. */ PROTECT(names = allocVector(STRSXP, 4)); SET_STRING_ELT(names, 0, mkChar("mb")); SET_STRING_ELT(names, 1, mkChar("nbr")); SET_STRING_ELT(names, 2, mkChar("parents")); SET_STRING_ELT(names, 3, mkChar("children")); /* allocate the list and set its attributes. */ PROTECT(structure = allocVector(VECSXP, 4)); setAttrib(structure, R_NamesSymbol, names); /* allocate and fill the "children" element of the list. */ PROTECT(children = allocVector(STRSXP, num_children)); for (i = 0, j = 0; (i < nrows) && (j < num_children); i++) { if (status[i] == CHILD) SET_STRING_ELT(children, j++, STRING_ELT(nodes, i)); }/*FOR*/ /* allocate and fill the "parents" element of the list. */ PROTECT(parents = allocVector(STRSXP, num_parents)); for (i = 0, j = 0; (i < nrows) && (j < num_parents); i++) { if (status[i] == PARENT) SET_STRING_ELT(parents, j++, STRING_ELT(nodes, i)); }/*FOR*/ /* allocate and fill the "nbr" element of the list. */ PROTECT(nbr = allocVector(STRSXP, num_neighbours)); for (i = 0, j = 0; (i < nrows) && (j < num_neighbours); i++) { if (status[i] >= NEIGHBOUR) SET_STRING_ELT(nbr, j++, STRING_ELT(nodes, i)); }/*FOR*/ /* allocate and fill the "mb" element of the list. */ PROTECT(mb = allocVector(STRSXP, num_blanket)); for (i = 0, j = 0; (i < nrows) && (j < num_blanket + num_neighbours); i++) { if (status[i] >= BLANKET) SET_STRING_ELT(mb, j++, STRING_ELT(nodes, i)); }/*FOR*/ /* attach the string vectors to the list. */ SET_VECTOR_ELT(structure, 0, mb); SET_VECTOR_ELT(structure, 1, nbr); SET_VECTOR_ELT(structure, 2, parents); SET_VECTOR_ELT(structure, 3, children); UNPROTECT(6); return structure; }/*CACHE_NODE_STRUCTURE*/

SEXP c_dfRowsToList(SEXP s_df, SEXP s_pars, SEXP s_types, SEXP s_parnames, SEXP s_lens, SEXP s_cnames) { int *types = INTEGER(s_types); int npars = LENGTH(s_lens); int *lens = INTEGER(s_lens); int nrow_df = LENGTH(VECTOR_ELT(s_df, 0)); int row, par, k; /* loop counters for rows, cols, params, vector param elements */ int type; /* type of column we are currently handling */ int parlen; /* length of param we are currently handling */ int colcount = 0; /* when we iterate params, what is the (first) column of s_df that corresponds? */ SEXP s_res, s_rowlist, s_parval, s_call; Rboolean all_missing; /* we iterate thru rows then params. */ s_res = PROTECT(NEW_LIST(nrow_df)); s_call = PROTECT(lang3(install("discreteNameToValue"), R_NilValue, R_NilValue)); for (row = 0; row < nrow_df; row++) { s_rowlist = PROTECT(NEW_LIST(npars)); /* convert row to R objects and define them in envir s_env */ colcount = 0; for (par = 0; par < npars; par++) { /* iter thru params */ parlen = lens[par]; type = types[colcount]; all_missing = TRUE; /* copy vector-param block of row to s_parval */ if (type == 1) { /* numerics */ s_parval = PROTECT(NEW_NUMERIC(parlen)); for (k = 0; k < parlen; k++) { REAL(s_parval)[k] = REAL(VECTOR_ELT(s_df, colcount+k))[row]; if (!ISNAN(REAL(s_parval)[k])) all_missing = FALSE; } } else if (type == 2) { /* integers */ s_parval = PROTECT(NEW_INTEGER(parlen)); for (k = 0; k < parlen; k++) { INTEGER(s_parval)[k] = INTEGER(VECTOR_ELT(s_df, colcount+k))[row]; if (INTEGER(s_parval)[k] != NA_INTEGER) all_missing = FALSE; } } else if (type == 3) { /* factors */ s_parval = PROTECT(NEW_CHARACTER(parlen)); for (k = 0; k < parlen; k++) { SET_STRING_ELT(s_parval, k, STRING_ELT(VECTOR_ELT(s_df, colcount+k), row)); if (STRING_ELT(s_parval, k) != NA_STRING) all_missing = FALSE; } } else if (type == 4) { /* logical */ s_parval = PROTECT(NEW_LOGICAL(parlen)); for (k = 0; k < parlen; k++) { LOGICAL(s_parval)[k] = LOGICAL(VECTOR_ELT(s_df, colcount+k))[row]; if (LOGICAL(s_parval)[k] != NA_LOGICAL) all_missing = FALSE; } } else if (type == 5) { /* character */ s_parval = PROTECT(NEW_CHARACTER(parlen)); for (k = 0; k < parlen; k++) { SET_STRING_ELT(s_parval, k, STRING_ELT(VECTOR_ELT(s_df, colcount+k), row)); if (STRING_ELT(s_parval, k) != NA_STRING) all_missing = FALSE; } } /* are all entries in s_parval NA ? */ if (all_missing) s_parval = ScalarLogical(NA_LOGICAL); /* convert discrete names to values */ if (!all_missing && type == 3) { SETCADR(s_call, VECTOR_ELT(s_pars, par)); SETCADDR(s_call, s_parval); s_parval = PROTECT(eval(s_call, R_GlobalEnv)); UNPROTECT(1); /* eval */ } /* only support for cnames for num, int, log and char vecs currently */ if (type == 1 || type == 2 || type == 4 || type == 5) SET_NAMES(s_parval, VECTOR_ELT(s_cnames, par)); SET_VECTOR_ELT(s_rowlist, par, s_parval); SET_NAMES(s_rowlist, s_parnames); colcount += parlen; UNPROTECT(1); /* s_parval */ } SET_VECTOR_ELT(s_res, row, s_rowlist); UNPROTECT(1); /* s_rowlist */ } UNPROTECT(2); /* s_res, s_call */ return s_res; }

/* GetNativeSystemInfo is XP or later */ pGNSI = (PGNSI) GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetNativeSystemInfo"); if(NULL != pGNSI) pGNSI(&si); else GetSystemInfo(&si); if(si.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_AMD64) strcat(ver, " x64"); } SET_STRING_ELT(ans, 1, mkChar(ver)); if((int)osvi.dwMajorVersion >= 5) { if(osvi.wServicePackMajor > 0) snprintf(ver, 256, "build %d, Service Pack %d", LOWORD(osvi.dwBuildNumber), (int) osvi.wServicePackMajor); else snprintf(ver, 256, "build %d", LOWORD(osvi.dwBuildNumber)); } else snprintf(ver, 256, "build %d, %s", LOWORD(osvi.dwBuildNumber), osvi.szCSDVersion); SET_STRING_ELT(ans, 2, mkChar(ver)); GetComputerNameW(name, &namelen); wcstoutf8(buf, name, 1000); SET_STRING_ELT(ans, 3, mkCharCE(buf, CE_UTF8)); #ifdef WIN64 SET_STRING_ELT(ans, 4, mkChar("x86-64")); #else SET_STRING_ELT(ans, 4, mkChar("x86")); #endif GetUserNameW(user, &userlen); wcstoutf8(buf, user, 1000); SET_STRING_ELT(ans, 5, mkCharCE(buf, CE_UTF8)); SET_STRING_ELT(ans, 6, STRING_ELT(ans, 5)); SET_STRING_ELT(ans, 7, STRING_ELT(ans, 5)); PROTECT(ansnames = allocVector(STRSXP, 8)); SET_STRING_ELT(ansnames, 0, mkChar("sysname")); SET_STRING_ELT(ansnames, 1, mkChar("release")); SET_STRING_ELT(ansnames, 2, mkChar("version")); SET_STRING_ELT(ansnames, 3, mkChar("nodename")); SET_STRING_ELT(ansnames, 4, mkChar("machine")); SET_STRING_ELT(ansnames, 5, mkChar("login")); SET_STRING_ELT(ansnames, 6, mkChar("user")); SET_STRING_ELT(ansnames, 7, mkChar("effective_user")); setAttrib(ans, R_NamesSymbol, ansnames); UNPROTECT(2); return ans; } SEXP do_syssleep(SEXP call, SEXP op, SEXP args, SEXP rho) { DWORD mtime; int ntime; double time; checkArity(op, args); time = asReal(CAR(args)); if (ISNAN(time) || time < 0) errorcall(call, _("invalid '%s' value"), "time"); ntime = 1000*(time) + 0.5; while (ntime > 0) { mtime = min(500, ntime); ntime -= mtime; Sleep(mtime); R_ProcessEvents(); } return R_NilValue; } #ifdef LEA_MALLOC #define MALLINFO_FIELD_TYPE size_t struct mallinfo { MALLINFO_FIELD_TYPE arena; /* non-mmapped space allocated from system */ MALLINFO_FIELD_TYPE ordblks; /* number of free chunks */ MALLINFO_FIELD_TYPE smblks; /* number of fastbin blocks */ MALLINFO_FIELD_TYPE hblks; /* number of mmapped regions */ MALLINFO_FIELD_TYPE hblkhd; /* space in mmapped regions */ MALLINFO_FIELD_TYPE usmblks; /* maximum total allocated space */ MALLINFO_FIELD_TYPE fsmblks; /* space available in freed fastbin blocks */ MALLINFO_FIELD_TYPE uordblks; /* total allocated space */ MALLINFO_FIELD_TYPE fordblks; /* total free space */ MALLINFO_FIELD_TYPE keepcost; /* top-most, releasable (via malloc_trim) space */ }; extern R_size_t R_max_memory; struct mallinfo mallinfo(void); #endif SEXP in_memsize(SEXP ssize) { SEXP ans; int maxmem = NA_LOGICAL; if(isLogical(ssize)) maxmem = asLogical(ssize); else if(isReal(ssize)) { R_size_t newmax; double mem = asReal(ssize); if (!R_FINITE(mem)) error(_("incorrect argument")); #ifdef LEA_MALLOC #ifndef WIN64 if(mem >= 4096) error(_("don't be silly!: your machine has a 4Gb address limit")); #endif newmax = mem * 1048576.0; if (newmax < R_max_memory) warning(_("cannot decrease memory limit: ignored")); else R_max_memory = newmax; #endif } else error(_("incorrect argument")); PROTECT(ans = allocVector(REALSXP, 1)); #ifdef LEA_MALLOC if(maxmem == NA_LOGICAL) REAL(ans)[0] = R_max_memory; else if(maxmem) REAL(ans)[0] = mallinfo().usmblks; else REAL(ans)[0] = mallinfo().uordblks; REAL(ans)[0] /= 1048576.0; #else REAL(ans)[0] = NA_REAL; #endif UNPROTECT(1); return ans; } SEXP do_dllversion(SEXP call, SEXP op, SEXP args, SEXP rho) { SEXP path = R_NilValue, ans; const wchar_t *dll; DWORD dwVerInfoSize; DWORD dwVerHnd; checkArity(op, args); path = CAR(args); if(!isString(path) || LENGTH(path) != 1) errorcall(call, _("invalid '%s' argument"), "path"); dll = filenameToWchar(STRING_ELT(path, 0), FALSE); dwVerInfoSize = GetFileVersionInfoSizeW(dll, &dwVerHnd); PROTECT(ans = allocVector(STRSXP, 2)); SET_STRING_ELT(ans, 0, mkChar("")); SET_STRING_ELT(ans, 1, mkChar("")); if (dwVerInfoSize) { BOOL fRet; LPSTR lpstrVffInfo; LPSTR lszVer = NULL; UINT cchVer = 0; lpstrVffInfo = (LPSTR) malloc(dwVerInfoSize); if (GetFileVersionInfoW(dll, 0L, dwVerInfoSize, lpstrVffInfo)) { fRet = VerQueryValue(lpstrVffInfo, TEXT("\\StringFileInfo\\040904E4\\FileVersion"), (LPVOID)&lszVer, &cchVer); if(fRet) SET_STRING_ELT(ans, 0, mkChar(lszVer)); fRet = VerQueryValue(lpstrVffInfo, TEXT("\\StringFileInfo\\040904E4\\R Version"), (LPVOID)&lszVer, &cchVer); if(fRet) SET_STRING_ELT(ans, 1, mkChar(lszVer)); else { fRet = VerQueryValue(lpstrVffInfo, TEXT("\\StringFileInfo\\040904E4\\Compiled under R Version"), (LPVOID)&lszVer, &cchVer); if(fRet) SET_STRING_ELT(ans, 1, mkChar(lszVer)); } } else ans = R_NilValue; free(lpstrVffInfo); } else ans = R_NilValue; UNPROTECT(1); return ans; } int Rwin_rename(const char *from, const char *to) { return (MoveFileEx(from, to, MOVEFILE_REPLACE_EXISTING | MOVEFILE_COPY_ALLOWED | MOVEFILE_WRITE_THROUGH) == 0); } int Rwin_wrename(const wchar_t *from, const wchar_t *to) { return (MoveFileExW(from, to, MOVEFILE_REPLACE_EXISTING | MOVEFILE_COPY_ALLOWED | MOVEFILE_WRITE_THROUGH) == 0); } const char *formatError(DWORD res) { static char buf[1000], *p; FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, res, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), buf, 1000, NULL); p = buf+strlen(buf) -1; if(*p == '\n') *p = '\0'; p = buf+strlen(buf) -1; if(*p == '\r') *p = '\0'; p = buf+strlen(buf) -1; if(*p == '.') *p = '\0'; return buf; } void R_UTF8fixslash(char *s); /* from main/util.c */ SEXP do_normalizepath(SEXP call, SEXP op, SEXP args, SEXP rho) { SEXP ans, paths = CAR(args), el, slash; int i, n = LENGTH(paths), res; char tmp[MAX_PATH], longpath[MAX_PATH], *tmp2; wchar_t wtmp[32768], wlongpath[32768], *wtmp2; int mustWork, fslash = 0; checkArity(op, args); if(!isString(paths)) errorcall(call, _("'path' must be a character vector")); slash = CADR(args); if(!isString(slash) || LENGTH(slash) != 1) errorcall(call, "'winslash' must be a character string"); const char *sl = CHAR(STRING_ELT(slash, 0)); if (strcmp(sl, "/") && strcmp(sl, "\\")) errorcall(call, "'winslash' must be '/' or '\\\\'"); if (strcmp(sl, "/") == 0) fslash = 1; mustWork = asLogical(CADDR(args)); PROTECT(ans = allocVector(STRSXP, n)); for (i = 0; i < n; i++) { int warn = 0; SEXP result; el = STRING_ELT(paths, i); result = el; if(getCharCE(el) == CE_UTF8) { if ((res = GetFullPathNameW(filenameToWchar(el, FALSE), 32768, wtmp, &wtmp2)) && res <= 32768) { if ((res = GetLongPathNameW(wtmp, wlongpath, 32768)) && res <= 32768) { wcstoutf8(longpath, wlongpath, wcslen(wlongpath)+1); if(fslash) R_UTF8fixslash(longpath); result = mkCharCE(longpath, CE_UTF8); } else if(mustWork == 1) { errorcall(call, "path[%d]=\"%s\": %s", i+1, translateChar(el), formatError(GetLastError())); } else { wcstoutf8(tmp, wtmp, wcslen(wtmp)+1); if(fslash) R_UTF8fixslash(tmp); result = mkCharCE(tmp, CE_UTF8); warn = 1; } } else if(mustWork == 1) { errorcall(call, "path[%d]=\"%s\": %s", i+1, translateChar(el), formatError(GetLastError())); } else { if (fslash) { strcpy(tmp, translateCharUTF8(el)); R_UTF8fixslash(tmp); result = mkCharCE(tmp, CE_UTF8); } warn = 1; } if (warn && (mustWork == NA_LOGICAL)) warningcall(call, "path[%d]=\"%ls\": %s", i+1, filenameToWchar(el,FALSE), formatError(GetLastError())); } else { if ((res = GetFullPathName(translateChar(el), MAX_PATH, tmp, &tmp2)) && res <= MAX_PATH) { if ((res = GetLongPathName(tmp, longpath, MAX_PATH)) && res <= MAX_PATH) { if(fslash) R_fixslash(longpath); result = mkChar(longpath); } else if(mustWork == 1) { errorcall(call, "path[%d]=\"%s\": %s", i+1, translateChar(el), formatError(GetLastError())); } else { if(fslash) R_fixslash(tmp); result = mkChar(tmp); warn = 1; } } else if(mustWork == 1) { errorcall(call, "path[%d]=\"%s\": %s", i+1, translateChar(el), formatError(GetLastError())); } else { if (fslash) { strcpy(tmp, translateChar(el)); R_fixslash(tmp); result = mkChar(tmp); } warn = 1; } if (warn && (mustWork == NA_LOGICAL)) warningcall(call, "path[%d]=\"%s\": %s", i+1, translateChar(el), formatError(GetLastError())); } SET_STRING_ELT(ans, i, result); } UNPROTECT(1); return ans; }

SEXP tiers(SEXP nodes, SEXP debug) { int i = 0, j = 0, k = 0, narcs = 0, nnodes = 0, ntiers = LENGTH(nodes); int *tier_size = NULL, *debuglevel = LOGICAL(debug), tier_start = 0, cur = 0; SEXP flattened, blacklist, temp; /* allocate the counters for tiers' sizes.*/ tier_size = alloc1dcont(ntiers); if (!isString(nodes)) { /* "node" is a list, each tier is an element. */ for (i = ntiers - 1; i >= 0; i--) { temp = VECTOR_ELT(nodes, i); tier_size[i] = LENGTH(temp); nnodes += tier_size[i]; narcs += (nnodes - tier_size[i]) * tier_size[i]; }/*FOR*/ /* flatten the tiers to keep manipulation later on simple. */ PROTECT(flattened = allocVector(STRSXP, nnodes)); for (i = 0, k = 0; i < ntiers; i++) { temp = VECTOR_ELT(nodes, i); for (j = 0; j < tier_size[i]; j++) SET_STRING_ELT(flattened, k++, STRING_ELT(temp, j)); }/*FOR*/ }/*THEN*/ else { /* "node" is a character vector, which means that each node is in its own tier * and that there is no need to flatted it. */ flattened = nodes; nnodes = LENGTH(nodes); for (i = 0; i < ntiers; i++) tier_size[i] = 1; /* the blacklist is the one resulting from a complete node ordering. */ narcs = ntiers * (ntiers - 1) / 2; }/*ELSE*/ /* allocate the return value. */ PROTECT(blacklist = allocMatrix(STRSXP, narcs, 2)); for (k = 0, i = 0; k < nnodes; k++) { temp = STRING_ELT(flattened, k); if (*debuglevel > 0) Rprintf("* current node is %s in tier %d.\n", CHAR(temp), i + 1); for (j = tier_start + tier_size[i]; j < nnodes; j++) { if (*debuglevel) Rprintf(" > blacklisting %s -> %s\n", CHAR(STRING_ELT(flattened, j)), CHAR(temp)); SET_STRING_ELT(blacklist, cur, STRING_ELT(flattened, j)); SET_STRING_ELT(blacklist, cur + narcs, temp); cur++; }/*FOR*/ if (k >= tier_start + tier_size[i] - 1) tier_start += tier_size[i++]; if (i == ntiers) break; } /* allocate, initialize and set the column names. */ finalize_arcs(blacklist); if (!isString(nodes)) UNPROTECT(2); else UNPROTECT(1); return blacklist; }/*TIERS*/

/** * Commit * * @param repo S4 class git_repository * @param message The message for the commit * @param author S4 class git_signature * @param committer S4 class git_signature * @return S4 class git_commit */ SEXP git2r_commit( SEXP repo, SEXP message, SEXP author, SEXP committer) { int err; SEXP result = R_NilValue; git_signature *c_author = NULL; git_signature *c_committer = NULL; git_index *index = NULL; git_oid oid; git_repository *repository = NULL; git_commit *commit = NULL; if (git2r_arg_check_string(message)) git2r_error(git2r_err_string_arg, __func__, "message"); if (git2r_arg_check_signature(author)) git2r_error(git2r_err_signature_arg, __func__, "author"); if (git2r_arg_check_signature(committer)) git2r_error(git2r_err_signature_arg, __func__, "committer"); repository = git2r_repository_open(repo); if (!repository) git2r_error(git2r_err_invalid_repository, __func__, NULL); err = git2r_signature_from_arg(&c_author, author); if (GIT_OK != err) goto cleanup; err = git2r_signature_from_arg(&c_committer, committer); if (GIT_OK != err) goto cleanup; err = git2r_any_changes_in_index(repository); if (GIT_OK != err) goto cleanup; err = git_repository_index(&index, repository); if (GIT_OK != err) goto cleanup; err = git2r_commit_create( &oid, repository, index, CHAR(STRING_ELT(message, 0)), c_author, c_committer); if (GIT_OK != err) goto cleanup; err = git_commit_lookup(&commit, repository, &oid); if (GIT_OK != err) goto cleanup; PROTECT(result = NEW_OBJECT(MAKE_CLASS("git_commit"))); git2r_commit_init(commit, repo, result); cleanup: if (c_author) git_signature_free(c_author); if (c_committer) git_signature_free(c_committer); if (index) git_index_free(index); if (repository) git_repository_free(repository); if (commit) git_commit_free(commit); if (R_NilValue != result) UNPROTECT(1); if (GIT_OK != err) git2r_error(git2r_err_from_libgit2, __func__, giterr_last()->message); return result; }

// Coordinate descent for poisson models SEXP cdfit_poisson(SEXP X_, SEXP y_, SEXP penalty_, SEXP lambda, SEXP eps_, SEXP max_iter_, SEXP gamma_, SEXP multiplier, SEXP alpha_, SEXP dfmax_, SEXP user_, SEXP warn_) { // Declarations int n = length(y_); int p = length(X_)/n; int L = length(lambda); SEXP res, beta0, beta, Dev, iter; PROTECT(beta0 = allocVector(REALSXP, L)); double *b0 = REAL(beta0); for (int i=0; i<L; i++) b0[i] = 0; PROTECT(beta = allocVector(REALSXP, L*p)); double *b = REAL(beta); for (int j=0; j<(L*p); j++) b[j] = 0; PROTECT(Dev = allocVector(REALSXP, L)); PROTECT(iter = allocVector(INTSXP, L)); for (int i=0; i<L; i++) INTEGER(iter)[i] = 0; double *a = Calloc(p, double); // Beta from previous iteration for (int j=0; j<p; j++) a[j] = 0; double a0 = 0; // Beta0 from previous iteration double *X = REAL(X_); double *y = REAL(y_); const char *penalty = CHAR(STRING_ELT(penalty_, 0)); double *lam = REAL(lambda); double eps = REAL(eps_)[0]; int max_iter = INTEGER(max_iter_)[0]; double gamma = REAL(gamma_)[0]; double *m = REAL(multiplier); double alpha = REAL(alpha_)[0]; int dfmax = INTEGER(dfmax_)[0]; int user = INTEGER(user_)[0]; int warn = INTEGER(warn_)[0]; double *r = Calloc(n, double); double *w = Calloc(n, double); double *s = Calloc(n, double); double *z = Calloc(p, double); double *eta = Calloc(n, double); int *e1 = Calloc(p, int); for (int j=0; j<p; j++) e1[j] = 0; int *e2 = Calloc(p, int); for (int j=0; j<p; j++) e2[j] = 0; double xwr, xwx, mu, u, v, cutoff, l1, l2, shift, si; int lstart; // Initialization double ybar = sum(y, n)/n; a0 = b0[0] = log(ybar); double nullDev = 0; for (int i=0;i<n;i++) if (y[i]!=0) nullDev += y[i]*log(y[i]/ybar); for (int i=0; i<n; i++) s[i] = y[i] - ybar; for (int i=0; i<n; i++) eta[i] = a0; for (int j=0; j<p; j++) z[j] = crossprod(X, s, n, j)/n; // If lam[0]=lam_max, skip lam[0] -- closed form sol'n available if (user) { lstart = 0; } else { lstart = 1; REAL(Dev)[0] = nullDev; } // Path for (int l=lstart; l<L; l++) { R_CheckUserInterrupt(); if (l != 0) { // Assign a, a0 a0 = b0[l-1]; for (int j=0; j<p; j++) a[j] = b[(l-1)*p+j]; // Check dfmax int nv = 0; for (int j=0; j<p; j++) { if (a[j] != 0) nv++; } if (nv > dfmax) { for (int ll=l; ll<L; ll++) INTEGER(iter)[ll] = NA_INTEGER; res = cleanupP(s, w, a, r, e1, e2, z, eta, beta0, beta, Dev, iter); return(res); } // Determine eligible set if (strcmp(penalty, "lasso")==0) cutoff = 2*lam[l] - lam[l-1]; if (strcmp(penalty, "MCP")==0) cutoff = lam[l] + gamma/(gamma-1)*(lam[l] - lam[l-1]); if (strcmp(penalty, "SCAD")==0) cutoff = lam[l] + gamma/(gamma-2)*(lam[l] - lam[l-1]); for (int j=0; j<p; j++) if (fabs(z[j]) > (cutoff * alpha * m[j])) e2[j] = 1; } else { // Determine eligible set double lmax = 0; for (int j=0; j<p; j++) if (fabs(z[j]) > lmax) lmax = fabs(z[j]); if (strcmp(penalty, "lasso")==0) cutoff = 2*lam[l] - lmax; if (strcmp(penalty, "MCP")==0) cutoff = lam[l] + gamma/(gamma-1)*(lam[l] - lmax); if (strcmp(penalty, "SCAD")==0) cutoff = lam[l] + gamma/(gamma-2)*(lam[l] - lmax); for (int j=0; j<p; j++) if (fabs(z[j]) > (cutoff * alpha * m[j])) e2[j] = 1; } while (INTEGER(iter)[l] < max_iter) { while (INTEGER(iter)[l] < max_iter) { while (INTEGER(iter)[l] < max_iter) { INTEGER(iter)[l]++; REAL(Dev)[l] = 0; double maxChange = 0; for (int i=0;i<n;i++) { mu = exp(eta[i]); w[i] = mu; s[i] = y[i] - mu; r[i] = s[i]/w[i]; if (y[i]!=0) REAL(Dev)[l] += y[i]*log(y[i]/mu); } if (REAL(Dev)[l]/nullDev < .01) { if (warn) warning("Model saturated; exiting..."); for (int ll=l; ll<L; ll++) INTEGER(iter)[ll] = NA_INTEGER; res = cleanupP(s, w, a, r, e1, e2, z, eta, beta0, beta, Dev, iter); return(res); } // Intercept xwr = crossprod(w, r, n, 0); xwx = sum(w, n); b0[l] = xwr/xwx + a0; for (int i=0; i<n; i++) { si = b0[l] - a0; r[i] -= si; eta[i] += si; } // Covariates for (int j=0; j<p; j++) { if (e1[j]) { // Calculate u, v xwr = wcrossprod(X, r, w, n, j); xwx = wsqsum(X, w, n, j); u = xwr/n + (xwx/n)*a[j]; v = xwx/n; // Update b_j l1 = lam[l] * m[j] * alpha; l2 = lam[l] * m[j] * (1-alpha); if (strcmp(penalty,"MCP")==0) b[l*p+j] = MCP(u, l1, l2, gamma, v); if (strcmp(penalty,"SCAD")==0) b[l*p+j] = SCAD(u, l1, l2, gamma, v); if (strcmp(penalty,"lasso")==0) b[l*p+j] = lasso(u, l1, l2, v); // Update r shift = b[l*p+j] - a[j]; if (shift !=0) { for (int i=0;i<n;i++) { si = shift*X[j*n+i]; r[i] -= si; eta[i] += si; } if (fabs(shift)*v > maxChange) maxChange = fabs(shift)*v; } } } // Check for convergence a0 = b0[l]; for (int j=0; j<p; j++) a[j] = b[l*p+j]; if (maxChange < eps) break; } // Scan for violations in strong set int violations = 0; for (int j=0; j<p; j++) { if (e1[j]==0 & e2[j]==1) { z[j] = crossprod(X, s, n, j)/n; l1 = lam[l] * m[j] * alpha; if (fabs(z[j]) > l1) { e1[j] = e2[j] = 1; violations++; } } } if (violations==0) break; } // Scan for violations in rest int violations = 0; for (int j=0; j<p; j++) { if (e2[j]==0) { z[j] = crossprod(X, s, n, j)/n; l1 = lam[l] * m[j] * alpha; if (fabs(z[j]) > l1) { e1[j] = e2[j] = 1; violations++; } } } if (violations==0) break; } } res = cleanupP(s, w, a, r, e1, e2, z, eta, beta0, beta, Dev, iter); return(res); }

SEXP attribute_hidden do_sprintf(/*const*/ CXXR::Expression* call, const CXXR::BuiltInFunction* op, CXXR::Environment* env, CXXR::RObject* const* args, int num_args, const CXXR::PairList* tags) { int i, nargs, cnt, v, thislen, nfmt, nprotect = 0; /* fmt2 is a copy of fmt with '*' expanded. bit will hold numeric formats and %<w>s, so be quite small. */ char fmt[MAXLINE+1], fmt2[MAXLINE+10], *fmtp, bit[MAXLINE+1], *outputString; const char *formatString; size_t n, cur, chunk; SEXP format, _this, a[MAXNARGS], ans /* -Wall */ = R_NilValue; int ns, maxlen, lens[MAXNARGS], nthis, nstar, star_arg = 0; static R_StringBuffer outbuff = {nullptr, 0, MAXELTSIZE}; Rboolean has_star, use_UTF8; #define _my_sprintf(_X_) \ { \ int nc = snprintf(bit, MAXLINE+1, fmtp, _X_); \ if (nc > MAXLINE) \ error(_("required resulting string length %d is greater than maximal %d"), \ nc, MAXLINE); \ } nargs = num_args; /* grab the format string */ format = num_args ? args[0] : nullptr; if (!isString(format)) error(_("'fmt' is not a character vector")); nfmt = length(format); if (nfmt == 0) return allocVector(STRSXP, 0); args = (args + 1); nargs--; if(nargs >= MAXNARGS) error(_("only %d arguments are allowed"), MAXNARGS); /* record the args for possible coercion and later re-ordering */ for(i = 0; i < nargs; i++, args = (args + 1)) { SEXPTYPE t_ai; a[i] = args[0]; if((t_ai = TYPEOF(a[i])) == LANGSXP || t_ai == SYMSXP) /* << maybe add more .. */ error(_("invalid type of argument[%d]: '%s'"), i+1, CHAR(type2str(t_ai))); lens[i] = length(a[i]); if(lens[i] == 0) return allocVector(STRSXP, 0); } #define CHECK_maxlen \ maxlen = nfmt; \ for(i = 0; i < nargs; i++) \ if(maxlen < lens[i]) maxlen = lens[i]; \ if(maxlen % nfmt) \ error(_("arguments cannot be recycled to the same length")); \ for(i = 0; i < nargs; i++) \ if(maxlen % lens[i]) \ error(_("arguments cannot be recycled to the same length")) CHECK_maxlen; outputString = CXXRCONSTRUCT(static_cast<char*>, R_AllocStringBuffer(0, &outbuff)); /* We do the format analysis a row at a time */ for(ns = 0; ns < maxlen; ns++) { outputString[0] = '\0'; use_UTF8 = CXXRCONSTRUCT(Rboolean, getCharCE(STRING_ELT(format, ns % nfmt)) == CE_UTF8); if (!use_UTF8) { for(i = 0; i < nargs; i++) { if (!isString(a[i])) continue; if (getCharCE(STRING_ELT(a[i], ns % lens[i])) == CE_UTF8) { use_UTF8 = TRUE; break; } } } formatString = TRANSLATE_CHAR(format, ns % nfmt); n = strlen(formatString); if (n > MAXLINE) error(_("'fmt' length exceeds maximal format length %d"), MAXLINE); /* process the format string */ for (cur = 0, cnt = 0; cur < n; cur += chunk) { const char *curFormat = formatString + cur, *ss; char *starc; ss = nullptr; if (formatString[cur] == '%') { /* handle special format command */ if (cur < n - 1 && formatString[cur + 1] == '%') { /* take care of %% in the format */ chunk = 2; strcpy(bit, "%"); } else { /* recognise selected types from Table B-1 of K&R */ /* NB: we deal with "%%" in branch above. */ /* This is MBCS-OK, as we are in a format spec */ chunk = strcspn(curFormat + 1, "diosfeEgGxXaA") + 2; if (cur + chunk > n) error(_("unrecognised format specification '%s'"), curFormat); strncpy(fmt, curFormat, chunk); fmt[chunk] = '\0'; nthis = -1; /* now look for %n$ or %nn$ form */ if (strlen(fmt) > 3 && fmt[1] >= '1' && fmt[1] <= '9') { v = fmt[1] - '0'; if(fmt[2] == '$') { if(v > nargs) error(_("reference to non-existent argument %d"), v); nthis = v-1; memmove(fmt+1, fmt+3, strlen(fmt)-2); } else if(fmt[2] >= '0' && fmt[2] <= '9' && fmt[3] == '$') { v = 10*v + fmt[2] - '0'; if(v > nargs) error(_("reference to non-existent argument %d"), v); nthis = v-1; memmove(fmt+1, fmt+4, strlen(fmt)-3); } } starc = Rf_strchr(fmt, '*'); if (starc) { /* handle * format if present */ nstar = -1; if (strlen(starc) > 3 && starc[1] >= '1' && starc[1] <= '9') { v = starc[1] - '0'; if(starc[2] == '$') { if(v > nargs) error(_("reference to non-existent argument %d"), v); nstar = v-1; memmove(starc+1, starc+3, strlen(starc)-2); } else if(starc[2] >= '0' && starc[2] <= '9' && starc[3] == '$') { v = 10*v + starc[2] - '0'; if(v > nargs) error(_("reference to non-existent argument %d"), v); nstar = v-1; memmove(starc+1, starc+4, strlen(starc)-3); } } if(nstar < 0) { if (cnt >= nargs) error(_("too few arguments")); nstar = cnt++; } if (Rf_strchr(starc+1, '*')) error(_("at most one asterisk '*' is supported in each conversion specification")); _this = a[nstar]; if(ns == 0 && TYPEOF(_this) == REALSXP) { _this = coerceVector(_this, INTSXP); PROTECT(a[nstar] = _this); nprotect++; } if(TYPEOF(_this) != INTSXP || LENGTH(_this)<1 || INTEGER(_this)[ns % LENGTH(_this)] == NA_INTEGER) error(_("argument for '*' conversion specification must be a number")); star_arg = INTEGER(_this)[ns % LENGTH(_this)]; has_star = TRUE; } else has_star = FALSE; if (fmt[strlen(fmt) - 1] == '%') { /* handle % with formatting options */ if (has_star) snprintf(bit, MAXLINE+1, fmt, star_arg); else strcpy(bit, fmt); /* was sprintf(..) for which some compiler warn */ } else { Rboolean did_this = FALSE; if(nthis < 0) { if (cnt >= nargs) error(_("too few arguments")); nthis = cnt++; } _this = a[nthis]; if (has_star) { size_t nf; char *p, *q = fmt2; for (p = fmt; *p; p++) if (*p == '*') q += sprintf(q, "%d", star_arg); else *q++ = *p; *q = '\0'; nf = strlen(fmt2); if (nf > MAXLINE) error(_("'fmt' length exceeds maximal format length %d"), MAXLINE); fmtp = fmt2; } else fmtp = fmt; #define CHECK_this_length \ PROTECT(_this); \ thislen = length(_this); \ if(thislen == 0) \ error(_("coercion has changed vector length to 0")) /* Now let us see if some minimal coercion would be sensible, but only do so once, for ns = 0: */ if(ns == 0) { SEXP tmp; Rboolean do_check; switch(*findspec(fmtp)) { case 'd': case 'i': case 'o': case 'x': case 'X': if(TYPEOF(_this) == REALSXP) { double r = REAL(_this)[0]; if(double(int( r)) == r) _this = coerceVector(_this, INTSXP); PROTECT(a[nthis] = _this); nprotect++; } break; case 'a': case 'A': case 'e': case 'f': case 'g': case 'E': case 'G': if(TYPEOF(_this) != REALSXP && /* no automatic as.double(<string>) : */ TYPEOF(_this) != STRSXP) { PROTECT(tmp = lang2(install("as.double"), _this)); #define COERCE_THIS_TO_A \ _this = eval(tmp, env); \ UNPROTECT(1); \ PROTECT(a[nthis] = _this); \ nprotect++; \ did_this = TRUE; \ CHECK_this_length; \ do_check = (CXXRCONSTRUCT(Rboolean, lens[nthis] == maxlen)); \ lens[nthis] = thislen; /* may have changed! */ \ if(do_check && thislen < maxlen) { \ CHECK_maxlen; \ } COERCE_THIS_TO_A } break; case 's': if(TYPEOF(_this) != STRSXP) { /* as.character method might call sprintf() */ size_t nc = strlen(outputString); char *z = Calloc(nc+1, char); strcpy(z, outputString); PROTECT(tmp = lang2(install("as.character"), _this)); COERCE_THIS_TO_A strcpy(outputString, z); Free(z); } break; default: break; } } /* ns == 0 (first-time only) */ if(!did_this) CHECK_this_length; switch(TYPEOF(_this)) { case LGLSXP: { int x = LOGICAL(_this)[ns % thislen]; if (checkfmt(fmtp, "di")) error(_("invalid format '%s'; %s"), fmtp, _("use format %d or %i for logical objects")); if (x == NA_LOGICAL) { fmtp[strlen(fmtp)-1] = 's'; _my_sprintf("NA") } else { _my_sprintf(x) } break; } case INTSXP: { int x = INTEGER(_this)[ns % thislen]; if (checkfmt(fmtp, "dioxX")) error(_("invalid format '%s'; %s"), fmtp, _("use format %d, %i, %o, %x or %X for integer objects")); if (x == NA_INTEGER) { fmtp[strlen(fmtp)-1] = 's'; _my_sprintf("NA") } else { _my_sprintf(x) } break; }

SEXP do_edit(SEXP call, SEXP op, SEXP args, SEXP rho) { int i, rc; ParseStatus status; SEXP x, fn, envir, ti, ed, t; char *filename, *editcmd, *vmaxsave, *cmd; FILE *fp; #ifdef Win32 char *title; #endif checkArity(op, args); vmaxsave = vmaxget(); x = CAR(args); args = CDR(args); if (TYPEOF(x) == CLOSXP) envir = CLOENV(x); else envir = R_NilValue; PROTECT(envir); fn = CAR(args); args = CDR(args); if (!isString(fn)) error(_("invalid argument to edit()")); if (LENGTH(STRING_ELT(fn, 0)) > 0) { filename = R_alloc(strlen(CHAR(STRING_ELT(fn, 0))), sizeof(char)); strcpy(filename, CHAR(STRING_ELT(fn, 0))); } else filename = DefaultFileName; if (x != R_NilValue) { if((fp=R_fopen(R_ExpandFileName(filename), "w")) == NULL) errorcall(call, _("unable to open file")); if (LENGTH(STRING_ELT(fn, 0)) == 0) EdFileUsed++; if (TYPEOF(x) != CLOSXP || isNull(t = getAttrib(x, R_SourceSymbol))) t = deparse1(x, 0, FORSOURCING); /* deparse for sourcing, not for display */ for (i = 0; i < LENGTH(t); i++) fprintf(fp, "%s\n", CHAR(STRING_ELT(t, i))); fclose(fp); } ti = CAR(args); args = CDR(args); ed = CAR(args); if (!isString(ed)) errorcall(call, _("argument 'editor' type not valid")); cmd = CHAR(STRING_ELT(ed, 0)); if (strlen(cmd) == 0) errorcall(call, _("argument 'editor' is not set")); editcmd = R_alloc(strlen(cmd) + strlen(filename) + 6, sizeof(char)); #ifdef Win32 if (!strcmp(cmd,"internal")) { if (!isString(ti)) error(_("'title' must be a string")); if (LENGTH(STRING_ELT(ti, 0)) > 0) { title = R_alloc(strlen(CHAR(STRING_ELT(ti, 0)))+1, sizeof(char)); strcpy(title, CHAR(STRING_ELT(ti, 0))); } else { title = R_alloc(strlen(filename)+1, sizeof(char)); strcpy(title, filename); } Rgui_Edit(filename, title, 1); } else { /* Quote path if necessary */ if(cmd[0] != '"' && Rf_strchr(cmd, ' ')) sprintf(editcmd, "\"%s\" \"%s\"", cmd, filename); else sprintf(editcmd, "%s \"%s\"", cmd, filename); rc = runcmd(editcmd, 1, 1, ""); if (rc == NOLAUNCH) errorcall(call, _("unable to run editor '%s'"), cmd); if (rc != 0) warningcall(call, _("editor ran but returned error status")); } #else if (ptr_R_EditFile) rc = ptr_R_EditFile(filename); else { sprintf(editcmd, "%s %s", cmd, filename); rc = R_system(editcmd); } if (rc != 0) errorcall(call, _("problem with running editor %s"), cmd); #endif if((fp = R_fopen(R_ExpandFileName(filename), "r")) == NULL) errorcall(call, _("unable to open file to read")); R_ParseCnt = 0; x = PROTECT(R_ParseFile(fp, -1, &status)); fclose(fp); if (status != PARSE_OK) errorcall(call, _("an error occurred on line %d\n use a command like\n x <- edit()\n to recover"), R_ParseError); R_ResetConsole(); { /* can't just eval(x) here */ int j, n; SEXP tmp = R_NilValue; n = LENGTH(x); for (j = 0 ; j < n ; j++) tmp = eval(VECTOR_ELT(x, j), R_GlobalEnv); x = tmp; } if (TYPEOF(x) == CLOSXP && envir != R_NilValue) SET_CLOENV(x, envir); UNPROTECT(2); vmaxset(vmaxsave); return (x); }

/* conditional independence tests. */ SEXP ctest(SEXP x, SEXP y, SEXP sx, SEXP data, SEXP test, SEXP B, SEXP alpha, SEXP learning) { int ntests = length(x), nobs = 0; double *pvalue = NULL, statistic = 0, df = NA_REAL; const char *t = CHAR(STRING_ELT(test, 0)); test_e test_type = test_label(t); SEXP xx, yy, zz, result; /* allocate the return value, which the same length as x. */ PROTECT(result = allocVector(REALSXP, ntests)); setAttrib(result, R_NamesSymbol, x); pvalue = REAL(result); /* set all elements to zero. */ memset(pvalue, '\0', ntests * sizeof(double)); /* extract the variables from the data. */ PROTECT(xx = c_dataframe_column(data, x, FALSE, FALSE)); PROTECT(yy = c_dataframe_column(data, y, TRUE, FALSE)); PROTECT(zz = c_dataframe_column(data, sx, FALSE, FALSE)); nobs = length(yy); if (IS_DISCRETE_ASYMPTOTIC_TEST(test_type)) { /* parametric tests for discrete variables. */ statistic = ct_discrete(xx, yy, zz, nobs, ntests, pvalue, &df, test_type); }/*THEN*/ else if ((test_type == COR) || (test_type == ZF) || (test_type == MI_G) || (test_type == MI_G_SH)) { /* parametric tests for Gaussian variables. */ statistic = ct_gaustests(xx, yy, zz, nobs, ntests, pvalue, &df, test_type); }/*THEN*/ else if (test_type == MI_CG) { /* conditional linear Gaussian mutual information test. */ statistic = ct_micg(xx, yy, zz, nobs, ntests, pvalue, &df); }/*THEN*/ else if (IS_DISCRETE_PERMUTATION_TEST(test_type)) { statistic = ct_dperm(xx, yy, zz, nobs, ntests, pvalue, &df, test_type, INT(B), IS_SMC(test_type) ? NUM(alpha) : 1); }/*THEN*/ else if (IS_CONTINUOUS_PERMUTATION_TEST(test_type)) { statistic = ct_gperm(xx, yy, zz, nobs, ntests, pvalue, &df, test_type, INT(B), IS_SMC(test_type) ? NUM(alpha) : 1); }/*THEN*/ UNPROTECT(4); /* catch-all for unknown tests (after deallocating memory.) */ if (test_type == ENOTEST) error("unknown test statstic '%s'.", t); /* increase the test counter. */ test_counter += ntests; if (isTRUE(learning)) return result; else return c_create_htest(statistic, test, pvalue[ntests - 1], df, B); }/*CTEST*/

/* convert an arc set to an edge list. */ SEXP arcs2elist(SEXP arcs, SEXP nodes, SEXP id, SEXP sublist) { int i = 0, j = 0, k = 0, nnodes = LENGTH(nodes), narcs = LENGTH(arcs)/2; int *e = NULL, *coords = NULL, *children = NULL; int *convert = LOGICAL(id), *sub = LOGICAL(sublist); SEXP try, elist, edges, temp, temp_name = R_NilValue; /* allocate the return value. */ PROTECT(elist = allocVector(VECSXP, nnodes)); /* set the node names. */ setAttrib(elist, R_NamesSymbol, nodes); if (*sub > 0) { /* allocate and initialize the subset name. */ PROTECT(temp_name = allocVector(STRSXP, 1)); SET_STRING_ELT(temp_name, 0, mkChar("edges")); }/*THEN*/ /* allocate the scratch space to keep track of the children of each node. */ children = alloc1dcont(nnodes); /* match the node labels in the arc set. */ PROTECT(try = match(nodes, arcs, 0)); coords = INTEGER(try); for (i = 0; i < narcs; i++) { children[coords[i] - 1]++; }/*FOR*/ for (i = 0; i < nnodes; i++) { /* allocate and set up the edge array. */ if (*convert > 0) { PROTECT(edges = allocVector(INTSXP, children[i])); e = INTEGER(edges); }/*THEN*/ else { PROTECT(edges = allocVector(STRSXP, children[i])); }/*ELSE*/ /* copy the coordinates of the adjacent nodes. */ for (j = 0, k = 0; j < narcs; j++) { if (coords[j] != i + 1) continue; if (*convert > 0) e[k++] = coords[narcs + j]; else SET_STRING_ELT(edges, k++, STRING_ELT(arcs, narcs + j)); if (k == children[i]) break; }/*FOR*/ if (*sub > 0) { /* allocate and set up the "edge" sublist for graphNEL. */ PROTECT(temp = allocVector(VECSXP, 1)); setAttrib(temp, R_NamesSymbol, temp_name); SET_VECTOR_ELT(temp, 0, edges); SET_VECTOR_ELT(elist, i, temp); UNPROTECT(1); }/*THEN*/ else { SET_VECTOR_ELT(elist, i, edges); }/*ELSE*/ UNPROTECT(1); }/*FOR*/ if (*sub > 0) UNPROTECT(3); else UNPROTECT(2); return elist; }/*ARCS2ELIST*/

SEXP attribute_hidden do_relop_dflt(SEXP call, SEXP op, SEXP x, SEXP y) { SEXP klass = R_NilValue, dims, tsp=R_NilValue, xnames, ynames; int nx, ny, xarray, yarray, xts, yts; Rboolean mismatch = FALSE, iS; PROTECT_INDEX xpi, ypi; PROTECT_WITH_INDEX(x, &xpi); PROTECT_WITH_INDEX(y, &ypi); nx = length(x); ny = length(y); /* pre-test to handle the most common case quickly. Used to skip warning too .... */ if (ATTRIB(x) == R_NilValue && ATTRIB(y) == R_NilValue && TYPEOF(x) == REALSXP && TYPEOF(y) == REALSXP && LENGTH(x) > 0 && LENGTH(y) > 0) { SEXP ans = real_relop((RELOP_TYPE) PRIMVAL(op), x, y); if (nx > 0 && ny > 0) mismatch = ((nx > ny) ? nx % ny : ny % nx) != 0; if (mismatch) { PROTECT(ans); warningcall(call, _("longer object length is not a multiple of shorter object length")); UNPROTECT(1); } UNPROTECT(2); return ans; } /* That symbols and calls were allowed was undocumented prior to R 2.5.0. We deparse them as deparse() would, minus attributes */ if ((iS = isSymbol(x)) || TYPEOF(x) == LANGSXP) { SEXP tmp = allocVector(STRSXP, 1); PROTECT(tmp); SET_STRING_ELT(tmp, 0, (iS) ? PRINTNAME(x) : STRING_ELT(deparse1(x, 0, DEFAULTDEPARSE), 0)); REPROTECT(x = tmp, xpi); UNPROTECT(1); } if ((iS = isSymbol(y)) || TYPEOF(y) == LANGSXP) { SEXP tmp = allocVector(STRSXP, 1); PROTECT(tmp); SET_STRING_ELT(tmp, 0, (iS) ? PRINTNAME(y) : STRING_ELT(deparse1(y, 0, DEFAULTDEPARSE), 0)); REPROTECT(y = tmp, ypi); UNPROTECT(1); } if (!isVector(x) || !isVector(y)) { if (isNull(x) || isNull(y)) { UNPROTECT(2); return allocVector(LGLSXP,0); } errorcall(call, _("comparison (%d) is possible only for atomic and list types"), PRIMVAL(op)); } if (TYPEOF(x) == EXPRSXP || TYPEOF(y) == EXPRSXP) errorcall(call, _("comparison is not allowed for expressions")); /* ELSE : x and y are both atomic or list */ if (LENGTH(x) <= 0 || LENGTH(y) <= 0) { UNPROTECT(2); return allocVector(LGLSXP,0); } mismatch = FALSE; xarray = isArray(x); yarray = isArray(y); xts = isTs(x); yts = isTs(y); if (nx > 0 && ny > 0) mismatch = ((nx > ny) ? nx % ny : ny % nx) != 0; if (xarray || yarray) { if (xarray && yarray) { if (!conformable(x, y)) errorcall(call, _("non-conformable arrays")); PROTECT(dims = getAttrib(x, R_DimSymbol)); } else if (xarray) { PROTECT(dims = getAttrib(x, R_DimSymbol)); } else /*(yarray)*/ { PROTECT(dims = getAttrib(y, R_DimSymbol)); } PROTECT(xnames = getAttrib(x, R_DimNamesSymbol)); PROTECT(ynames = getAttrib(y, R_DimNamesSymbol)); } else { PROTECT(dims = R_NilValue); PROTECT(xnames = getAttrib(x, R_NamesSymbol)); PROTECT(ynames = getAttrib(y, R_NamesSymbol)); } if (xts || yts) { if (xts && yts) { if (!tsConform(x, y)) errorcall(call, _("non-conformable time series")); PROTECT(tsp = getAttrib(x, R_TspSymbol)); PROTECT(klass = getAttrib(x, R_ClassSymbol)); } else if (xts) { if (length(x) < length(y)) ErrorMessage(call, ERROR_TSVEC_MISMATCH); PROTECT(tsp = getAttrib(x, R_TspSymbol)); PROTECT(klass = getAttrib(x, R_ClassSymbol)); } else /*(yts)*/ { if (length(y) < length(x)) ErrorMessage(call, ERROR_TSVEC_MISMATCH); PROTECT(tsp = getAttrib(y, R_TspSymbol)); PROTECT(klass = getAttrib(y, R_ClassSymbol)); } } if (mismatch) warningcall(call, _("longer object length is not a multiple of shorter object length")); if (isString(x) || isString(y)) { REPROTECT(x = coerceVector(x, STRSXP), xpi); REPROTECT(y = coerceVector(y, STRSXP), ypi); x = string_relop((RELOP_TYPE) PRIMVAL(op), x, y); } else if (isComplex(x) || isComplex(y)) { REPROTECT(x = coerceVector(x, CPLXSXP), xpi); REPROTECT(y = coerceVector(y, CPLXSXP), ypi); x = complex_relop((RELOP_TYPE) PRIMVAL(op), x, y, call); } else if (isReal(x) || isReal(y)) { REPROTECT(x = coerceVector(x, REALSXP), xpi); REPROTECT(y = coerceVector(y, REALSXP), ypi); x = real_relop((RELOP_TYPE) PRIMVAL(op), x, y); } else if (isInteger(x) || isInteger(y)) { REPROTECT(x = coerceVector(x, INTSXP), xpi); REPROTECT(y = coerceVector(y, INTSXP), ypi); x = integer_relop((RELOP_TYPE) PRIMVAL(op), x, y); } else if (isLogical(x) || isLogical(y)) { REPROTECT(x = coerceVector(x, LGLSXP), xpi); REPROTECT(y = coerceVector(y, LGLSXP), ypi); x = integer_relop((RELOP_TYPE) PRIMVAL(op), x, y); } else if (TYPEOF(x) == RAWSXP || TYPEOF(y) == RAWSXP) { REPROTECT(x = coerceVector(x, RAWSXP), xpi); REPROTECT(y = coerceVector(y, RAWSXP), ypi); x = raw_relop((RELOP_TYPE) PRIMVAL(op), x, y); } else errorcall(call, _("comparison of these types is not implemented")); PROTECT(x); if (dims != R_NilValue) { setAttrib(x, R_DimSymbol, dims); if (xnames != R_NilValue) setAttrib(x, R_DimNamesSymbol, xnames); else if (ynames != R_NilValue) setAttrib(x, R_DimNamesSymbol, ynames); } else { if (length(x) == length(xnames)) setAttrib(x, R_NamesSymbol, xnames); else if (length(x) == length(ynames)) setAttrib(x, R_NamesSymbol, ynames); } if (xts || yts) { setAttrib(x, R_TspSymbol, tsp); setAttrib(x, R_ClassSymbol, klass); UNPROTECT(2); } UNPROTECT(6); return x; }

static SEXP ExtractSubset(SEXP x, SEXP result, SEXP indx) //, SEXP call) { /* ExtractSubset is currently copied/inspired by subset.c from GNU-R This is slated to be reimplemented using the previous method in xts to get the correct dimnames */ int i, ii, n, nx, mode; SEXP tmp, tmp2; mode = TYPEOF(x); n = LENGTH(indx); nx = length(x); tmp = result; /*if (x == R_NilValue)*/ if (isNull(x)) return x; for (i = 0; i < n; i++) { ii = INTEGER(indx)[i]; if (ii != NA_INTEGER) ii--; switch (mode) { case LGLSXP: if (0 <= ii && ii < nx && ii != NA_LOGICAL) LOGICAL(result)[i] = LOGICAL(x)[ii]; else LOGICAL(result)[i] = NA_LOGICAL; break; case INTSXP: if (0 <= ii && ii < nx && ii != NA_INTEGER) INTEGER(result)[i] = INTEGER(x)[ii]; else INTEGER(result)[i] = NA_INTEGER; break; case REALSXP: if (0 <= ii && ii < nx && ii != NA_INTEGER) REAL(result)[i] = REAL(x)[ii]; else REAL(result)[i] = NA_REAL; break; case CPLXSXP: if (0 <= ii && ii < nx && ii != NA_INTEGER) { COMPLEX(result)[i] = COMPLEX(x)[ii]; } else { COMPLEX(result)[i].r = NA_REAL; COMPLEX(result)[i].i = NA_REAL; } break; case STRSXP: if (0 <= ii && ii < nx && ii != NA_INTEGER) SET_STRING_ELT(result, i, STRING_ELT(x, ii)); else SET_STRING_ELT(result, i, NA_STRING); break; case VECSXP: case EXPRSXP: if (0 <= ii && ii < nx && ii != NA_INTEGER) SET_VECTOR_ELT(result, i, VECTOR_ELT(x, ii)); else SET_VECTOR_ELT(result, i, R_NilValue); break; case LISTSXP: /* cannot happen: pairlists are coerced to lists */ case LANGSXP: if (0 <= ii && ii < nx && ii != NA_INTEGER) { tmp2 = nthcdr(x, ii); SETCAR(tmp, CAR(tmp2)); SET_TAG(tmp, TAG(tmp2)); } else SETCAR(tmp, R_NilValue); tmp = CDR(tmp); break; case RAWSXP: if (0 <= ii && ii < nx && ii != NA_INTEGER) RAW(result)[i] = RAW(x)[ii]; else RAW(result)[i] = (Rbyte) 0; break; default: error("error in subset\n"); break; } } return result; }

/* do the two objects compute as identical? Also used in unique.c */ Rboolean R_compute_identical(SEXP x, SEXP y, int flags) { SEXP ax, ay, atrx, atry; if(x == y) /* same pointer */ return TRUE; if(TYPEOF(x) != TYPEOF(y)) return FALSE; if(OBJECT(x) != OBJECT(y)) return FALSE; /* Skip attribute checks for CHARSXP -- such attributes are used for the cache. */ if(TYPEOF(x) == CHARSXP) { /* This matches NAs */ return Seql(x, y); } ax = ATTRIB(x); ay = ATTRIB(y); if (!ATTR_AS_SET) { if(!R_compute_identical(ax, ay, flags)) return FALSE; } /* Attributes are special: they should be tagged pairlists. We don't test them if they are not, and we do not test the order if they are. This code is not very efficient, but then neither is using pairlists for attributes. If long attribute lists become more common (and they are used for S4 slots) we should store them in a hash table. */ else if(ax != R_NilValue || ay != R_NilValue) { if(ax == R_NilValue || ay == R_NilValue) return FALSE; if(TYPEOF(ax) != LISTSXP || TYPEOF(ay) != LISTSXP) { warning(_("ignoring non-pairlist attributes")); } else { SEXP elx, ely; if(length(ax) != length(ay)) return FALSE; /* They are the same length and should have unique non-empty non-NA tags */ for(elx = ax; elx != R_NilValue; elx = CDR(elx)) { const char *tx = CHAR(PRINTNAME(TAG(elx))); for(ely = ay; ely != R_NilValue; ely = CDR(ely)) if(streql(tx, CHAR(PRINTNAME(TAG(ely))))) { /* We need to treat row.names specially here */ if(streql(tx, "row.names")) { PROTECT(atrx = getAttrib(x, R_RowNamesSymbol)); PROTECT(atry = getAttrib(y, R_RowNamesSymbol)); if(!R_compute_identical(atrx, atry, flags)) { UNPROTECT(2); return FALSE; } else UNPROTECT(2); } else if(!R_compute_identical(CAR(elx), CAR(ely), flags)) return FALSE; break; } if(ely == R_NilValue) return FALSE; } } } switch (TYPEOF(x)) { case NILSXP: return TRUE; case LGLSXP: if (length(x) != length(y)) return FALSE; /* Use memcmp (which is ISO C90) to speed up the comparison */ return memcmp((void *)LOGICAL(x), (void *)LOGICAL(y), length(x) * sizeof(int)) == 0 ? TRUE : FALSE; case INTSXP: if (length(x) != length(y)) return FALSE; /* Use memcmp (which is ISO C90) to speed up the comparison */ return memcmp((void *)INTEGER(x), (void *)INTEGER(y), length(x) * sizeof(int)) == 0 ? TRUE : FALSE; case REALSXP: { int n = length(x); if(n != length(y)) return FALSE; else { double *xp = REAL(x), *yp = REAL(y); int i, ne_strict = NUM_EQ | (SINGLE_NA << 1); for(i = 0; i < n; i++) if(neWithNaN(xp[i], yp[i], ne_strict)) return FALSE; } return TRUE; } case CPLXSXP: { int n = length(x); if(n != length(y)) return FALSE; else { Rcomplex *xp = COMPLEX(x), *yp = COMPLEX(y); int i, ne_strict = NUM_EQ | (SINGLE_NA << 1); for(i = 0; i < n; i++) if(neWithNaN(xp[i].r, yp[i].r, ne_strict) || neWithNaN(xp[i].i, yp[i].i, ne_strict)) return FALSE; } return TRUE; } case STRSXP: { int i, n = length(x); if(n != length(y)) return FALSE; for(i = 0; i < n; i++) { /* This special-casing for NAs is not needed */ Rboolean na1 = (STRING_ELT(x, i) == NA_STRING), na2 = (STRING_ELT(y, i) == NA_STRING); if(na1 ^ na2) return FALSE; if(na1 && na2) continue; if (! Seql(STRING_ELT(x, i), STRING_ELT(y, i))) return FALSE; } return TRUE; } case CHARSXP: /* Probably unreachable, but better safe than sorry... */ { /* This matches NAs */ return Seql(x, y); } case VECSXP: case EXPRSXP: { int i, n = length(x); if(n != length(y)) return FALSE; for(i = 0; i < n; i++) if(!R_compute_identical(VECTOR_ELT(x, i),VECTOR_ELT(y, i), flags)) return FALSE; return TRUE; } case LANGSXP: case LISTSXP: { while (x != R_NilValue) { if(y == R_NilValue) return FALSE; if(!R_compute_identical(CAR(x), CAR(y), flags)) return FALSE; if(!R_compute_identical(PRINTNAME(TAG(x)), PRINTNAME(TAG(y)), flags)) return FALSE; x = CDR(x); y = CDR(y); } return(y == R_NilValue); } case CLOSXP: return(R_compute_identical(FORMALS(x), FORMALS(y), flags) && R_compute_identical(BODY_EXPR(x), BODY_EXPR(y), flags) && (CLOENV(x) == CLOENV(y) ? TRUE : FALSE) && (IGNORE_BYTECODE || R_compute_identical(BODY(x), BODY(y), flags)) ); case SPECIALSXP: case BUILTINSXP: return(PRIMOFFSET(x) == PRIMOFFSET(y) ? TRUE : FALSE); case ENVSXP: case SYMSXP: case WEAKREFSXP: case BCODESXP: /**** is this the best approach? */ return(x == y ? TRUE : FALSE); case EXTPTRSXP: return (EXTPTR_PTR(x) == EXTPTR_PTR(y) ? TRUE : FALSE); case RAWSXP: if (length(x) != length(y)) return FALSE; /* Use memcmp (which is ISO C90) to speed up the comparison */ return memcmp((void *)RAW(x), (void *)RAW(y), length(x) * sizeof(Rbyte)) == 0 ? TRUE : FALSE; /* case PROMSXP: args are evaluated, so will not be seen */ /* test for equality of the substituted expression -- or should we require both expression and environment to be identical? */ /*#define PREXPR(x) ((x)->u.promsxp.expr) #define PRENV(x) ((x)->u.promsxp.env) return(R_compute_identical(subsititute(PREXPR(x), PRENV(x), flags), subsititute(PREXPR(y), PRENV(y))));*/ case S4SXP: /* attributes already tested, so all slots identical */ return TRUE; default: /* these are all supposed to be types that represent constant entities, so no further testing required ?? */ printf("Unknown Type: %s (%x)\n", type2char(TYPEOF(x)), TYPEOF(x)); return TRUE; } }

SEXP _do_subset_xts (SEXP x, SEXP sr, SEXP sc, SEXP drop) { SEXP result; int i, j, nr, nc, nrs, ncs; int P=0; SEXP Dim = getAttrib(x, R_DimSymbol); nrs = nrows(x);ncs = ncols(x); nr = length(sr); nc = length(sc); SEXP oindex, nindex; oindex = getAttrib(x, install("index")); PROTECT(nindex = allocVector(TYPEOF(oindex), nr)); P++; PROTECT(result = allocVector(TYPEOF(x), nr*nc)); P++; j = 0; double *real_nindex=NULL, *real_oindex, *real_result=NULL, *real_x=NULL; int *int_nindex=NULL, *int_oindex, *int_result=NULL, *int_x=NULL; int *int_sr=NULL, *int_sc=NULL; int_sr = INTEGER(sr); int_sc = INTEGER(sc); copyAttributes(x, result); if(TYPEOF(x)==LGLSXP) { int_x = LOGICAL(x); int_result = LOGICAL(result); if(TYPEOF(nindex)==INTSXP) { int_nindex = INTEGER(nindex); int_oindex = INTEGER(oindex); for(i=0; i<nr; i++) { if(int_sr[i] == NA_INTEGER) error("'i' contains NA"); if(int_sr[i] > nrs || int_sc[j] > ncs) error("'i' or 'j' out of range"); int_nindex[i] = int_oindex[int_sr[i]-1]; if(int_sc[j] == NA_INTEGER) int_result[i+j*nr] = NA_INTEGER; else int_result[i+j*nr] = int_x[int_sr[i]-1 + ((int_sc[j]-1) * nrs)]; } } else if(TYPEOF(nindex)==REALSXP) { real_nindex = REAL(nindex); real_oindex = REAL(oindex); for(i=0; i<nr; i++) { real_nindex[i] = real_oindex[int_sr[i]-1]; if(int_sc[j] == NA_INTEGER) int_result[i+j*nr] = NA_INTEGER; else int_result[i+j*nr] = int_x[int_sr[i]-1 + ((int_sc[j]-1) * nrs)]; } } copyAttributes(oindex, nindex); setAttrib(result, install("index"), nindex); for(j=1; j<nc; j++) { for(i=0; i<nr; i++) { if(int_sc[j] == NA_INTEGER) int_result[i+j*nr] = NA_INTEGER; else int_result[i+j*nr] = int_x[int_sr[i]-1 + ((int_sc[j]-1) * nrs)]; } } } else /* branch into INTSXP and REALSXP, as these are most common/important types for time series data */ if(TYPEOF(x)==INTSXP) { int_x = INTEGER(x); int_result = INTEGER(result); if(TYPEOF(nindex)==INTSXP) { int_nindex = INTEGER(nindex); int_oindex = INTEGER(oindex); for(i=0; i<nr; i++) { if(int_sr[i] == NA_INTEGER) error("'i' contains NA"); if(int_sr[i] > nrs || int_sc[j] > ncs) error("'i' or 'j' out of range"); int_nindex[i] = int_oindex[int_sr[i]-1]; if(int_sc[j] == NA_INTEGER) int_result[i+j*nr] = NA_INTEGER; else int_result[i+j*nr] = int_x[int_sr[i]-1 + ((int_sc[j]-1) * nrs)]; } } else if(TYPEOF(nindex)==REALSXP) { real_nindex = REAL(nindex); real_oindex = REAL(oindex); for(i=0; i<nr; i++) { if(int_sr[i] == NA_INTEGER) error("'i' contains NA"); if(int_sr[i] > nrs || int_sc[j] > ncs) error("'i' or 'j' out of range"); real_nindex[i] = real_oindex[int_sr[i]-1]; if(int_sc[j] == NA_INTEGER) int_result[i+j*nr] = NA_INTEGER; else int_result[i+j*nr] = int_x[int_sr[i]-1 + ((int_sc[j]-1) * nrs)]; } } copyAttributes(oindex, nindex); setAttrib(result, install("index"), nindex); /* loop through remaining columns */ for(j=1; j<nc; j++) { for(i=0; i<nr; i++) { if(int_sc[j] == NA_INTEGER) int_result[i+j*nr] = NA_INTEGER; else int_result[i+j*nr] = int_x[int_sr[i]-1 + ((int_sc[j]-1) * nrs)]; } } } else if(TYPEOF(x)==REALSXP) { real_x = REAL(x); real_result = REAL(result); if(TYPEOF(nindex)==INTSXP) { int_nindex = INTEGER(nindex); int_oindex = INTEGER(oindex); for(i=0; i<nr; i++) { if(int_sr[i] == NA_INTEGER) error("'i' contains NA"); if(int_sr[i] > nrs || int_sc[j] > ncs) error("'i' or 'j' out of range"); int_nindex[i] = int_oindex[int_sr[i]-1]; if(int_sc[j] == NA_INTEGER) real_result[i+j*nr] = NA_REAL; else real_result[i+j*nr] = real_x[int_sr[i]-1 + ((int_sc[j]-1) * nrs)]; } } else if(TYPEOF(nindex)==REALSXP) { real_nindex = REAL(nindex); real_oindex = REAL(oindex); for(i=0; i<nr; i++) { if(int_sr[i] == NA_INTEGER) error("'i' contains NA"); if(int_sr[i] > nrs || int_sc[j] > ncs) error("'i' or 'j' out of range"); real_nindex[i] = real_oindex[int_sr[i]-1]; if(int_sc[j] == NA_INTEGER) real_result[i+j*nr] = NA_REAL; else real_result[i+j*nr] = real_x[int_sr[i]-1 + ((int_sc[j]-1) * nrs)]; } } copyAttributes(oindex, nindex); setAttrib(result, install("index"), nindex); for(j=1; j<nc; j++) { for(i=0; i<nr; i++) { if(int_sc[j] == NA_INTEGER) real_result[i+j*nr] = NA_REAL; else real_result[i+j*nr] = real_x[int_sr[i]-1 + ((int_sc[j]-1) * nrs)]; } } } else if(TYPEOF(x)==CPLXSXP) { /* real_x = REAL(x); real_result = REAL(result); */ if(TYPEOF(nindex)==INTSXP) { int_nindex = INTEGER(nindex); int_oindex = INTEGER(oindex); for(i=0; i<nr; i++) { if(int_sr[i] == NA_INTEGER) error("'i' contains NA"); if(int_sr[i] > nrs || int_sc[j] > ncs) error("'i' or 'j' out of range"); int_nindex[i] = int_oindex[int_sr[i]-1]; if(int_sc[j] == NA_INTEGER) { COMPLEX(result)[i+j*nr].r = NA_REAL; COMPLEX(result)[i+j*nr].i = NA_REAL; } else COMPLEX(result)[i+j*nr] = COMPLEX(x)[int_sr[i]-1 + ((int_sc[j]-1) * nrs)]; } } else if(TYPEOF(nindex)==REALSXP) { real_nindex = REAL(nindex); real_oindex = REAL(oindex); for(i=0; i<nr; i++) { if(int_sr[i] == NA_INTEGER) error("'i' contains NA"); if(int_sr[i] > nrs || int_sc[j] > ncs) error("'i' or 'j' out of range"); real_nindex[i] = real_oindex[int_sr[i]-1]; if(int_sc[j] == NA_INTEGER) { COMPLEX(result)[i+j*nr].r = NA_REAL; COMPLEX(result)[i+j*nr].i = NA_REAL; } else COMPLEX(result)[i+j*nr] = COMPLEX(x)[int_sr[i]-1 + ((int_sc[j]-1) * nrs)]; } } copyAttributes(oindex, nindex); setAttrib(result, install("index"), nindex); for(j=1; j<nc; j++) { for(i=0; i<nr; i++) { if(int_sc[j] == NA_INTEGER) { COMPLEX(result)[i+j*nr].r = NA_REAL; COMPLEX(result)[i+j*nr].i = NA_REAL; } else COMPLEX(result)[i+j*nr] = COMPLEX(x)[int_sr[i]-1 + ((int_sc[j]-1) * nrs)]; } } } else if(TYPEOF(x)==STRSXP) { if(TYPEOF(nindex)==INTSXP) { int_nindex = INTEGER(nindex); int_oindex = INTEGER(oindex); for(i=0; i<nr; i++) { if(int_sr[i] == NA_INTEGER) error("'i' contains NA"); if(int_sr[i] > nrs || int_sc[j] > ncs) error("'i' or 'j' out of range"); int_nindex[i] = int_oindex[int_sr[i]-1]; if(int_sc[j] == NA_INTEGER) SET_STRING_ELT(result, i+j*nr, NA_STRING); else SET_STRING_ELT(result, i+j*nr, STRING_ELT(x, int_sr[i]-1 + ((int_sc[j]-1) * nrs))); } } else if(TYPEOF(nindex)==REALSXP) { real_nindex = REAL(nindex); real_oindex = REAL(oindex); for(i=0; i<nr; i++) { if(int_sr[i] == NA_INTEGER) error("'i' contains NA"); if(int_sr[i] > nrs || int_sc[j] > ncs) error("'i' or 'j' out of range"); real_nindex[i] = real_oindex[int_sr[i]-1]; if(int_sc[j] == NA_INTEGER) SET_STRING_ELT(result, i+j*nr, NA_STRING); else SET_STRING_ELT(result, i+j*nr, STRING_ELT(x, int_sr[i]-1 + ((int_sc[j]-1) * nrs))); } } copyAttributes(oindex, nindex); setAttrib(result, install("index"), nindex); for(j=1; j<nc; j++) { for(i=0; i<nr; i++) { if(int_sc[j] == NA_INTEGER) SET_STRING_ELT(result, i+j*nr, NA_STRING); else SET_STRING_ELT(result, i+j*nr, STRING_ELT(x, int_sr[i]-1 + ((int_sc[j]-1) * nrs))); } } } else if(TYPEOF(x)==RAWSXP) { /* real_x = REAL(x); real_result = REAL(result); */ if(TYPEOF(nindex)==INTSXP) { int_nindex = INTEGER(nindex); int_oindex = INTEGER(oindex); for(i=0; i<nr; i++) { if(int_sr[i] == NA_INTEGER) error("'i' contains NA"); if(int_sr[i] > nrs || int_sc[j] > ncs) error("'i' or 'j' out of range"); int_nindex[i] = int_oindex[int_sr[i]-1]; if(int_sc[j] == NA_INTEGER) RAW(result)[i+j*nr] = 0; else RAW(result)[i+j*nr] = RAW(x)[int_sr[i]-1 + ((int_sc[j]-1) * nrs)]; } } else if(TYPEOF(nindex)==REALSXP) { real_nindex = REAL(nindex); real_oindex = REAL(oindex); for(i=0; i<nr; i++) { if(int_sr[i] == NA_INTEGER) error("'i' contains NA"); if(int_sr[i] > nrs || int_sc[j] > ncs) error("'i' or 'j' out of range"); real_nindex[i] = real_oindex[int_sr[i]-1]; if(int_sc[j] == NA_INTEGER) RAW(result)[i+j*nr] = 0; else RAW(result)[i+j*nr] = RAW(x)[int_sr[i]-1 + ((int_sc[j]-1) * nrs)]; } } copyAttributes(oindex, nindex); setAttrib(result, install("index"), nindex); for(j=1; j<nc; j++) { for(i=0; i<nr; i++) { if(int_sc[j] == NA_INTEGER) RAW(result)[i+j*nr] = 0; else RAW(result)[i+j*nr] = RAW(x)[int_sr[i]-1 + ((int_sc[j]-1) * nrs)]; } } } if(!isNull(Dim) && nr >= 0 && nc >= 0) { SEXP dim; PROTECT(dim = allocVector(INTSXP,2));P++; INTEGER(dim)[0] = nr; INTEGER(dim)[1] = nc; setAttrib(result, R_DimSymbol, dim); if (nr >= 0 && nc >= 0) { SEXP dimnames, dimnamesnames, newdimnames; dimnames = getAttrib(x, R_DimNamesSymbol); dimnamesnames = getAttrib(dimnames, R_NamesSymbol); if (!isNull(dimnames)) { PROTECT(newdimnames = allocVector(VECSXP, 2)); if (TYPEOF(dimnames) == VECSXP) { SET_VECTOR_ELT(newdimnames, 0, ExtractSubset(VECTOR_ELT(dimnames, 0), allocVector(STRSXP, nr), sr)); SET_VECTOR_ELT(newdimnames, 1, ExtractSubset(VECTOR_ELT(dimnames, 1), allocVector(STRSXP, nc), sc)); } else { SET_VECTOR_ELT(newdimnames, 0, ExtractSubset(CAR(dimnames), allocVector(STRSXP, nr), sr)); SET_VECTOR_ELT(newdimnames, 1, ExtractSubset(CADR(dimnames), allocVector(STRSXP, nc), sc)); } setAttrib(newdimnames, R_NamesSymbol, dimnamesnames); setAttrib(result, R_DimNamesSymbol, newdimnames); UNPROTECT(1); } } } setAttrib(result, R_ClassSymbol, getAttrib(x, R_ClassSymbol)); if(nc == 1 && LOGICAL(drop)[0]) setAttrib(result, R_DimSymbol, R_NilValue); UNPROTECT(P); return result; }

void bmerge_r(int xlowIn, int xuppIn, int ilowIn, int iuppIn, int col, int lowmax, int uppmax) // col is >0 and <=ncol-1 if this range of [xlow,xupp] and [ilow,iupp] match up to but not including that column // lowmax=1 if xlowIn is the lower bound of this group (needed for roll) // uppmax=1 if xuppIn is the upper bound of this group (needed for roll) { int xlow=xlowIn, xupp=xuppIn, ilow=ilowIn, iupp=iuppIn, j, k, ir, lir, tmp; ir = lir = ilow + (iupp-ilow)/2; // lir = logical i row. if (o) ir = o[lir]-1; // ir = the actual i row if i were ordered ic = VECTOR_ELT(i,icols[col]-1); // ic = i column xc = VECTOR_ELT(x,xcols[col]-1); // xc = x column // it was checked in bmerge() that the types are equal switch (TYPEOF(xc)) { case LGLSXP : case INTSXP : // including factors ival.i = INTEGER(ic)[ir]; while(xlow < xupp-1) { mid = xlow + (xupp-xlow)/2; // Same as (xlow+xupp)/2 but without risk of overflow xval.i = INTEGER(xc)[mid]; if (xval.i<ival.i) { // relies on NA_INTEGER == INT_MIN, tested in init.c xlow=mid; } else if (xval.i>ival.i) { // TO DO: is *(&xlow, &xupp)[0|1]=mid more efficient than branch? xupp=mid; } else { // xval.i == ival.i including NA_INTEGER==NA_INTEGER // branch mid to find start and end of this group in this column // TO DO?: not if mult=first|last and col<ncol-1 tmplow = mid; tmpupp = mid; while(tmplow<xupp-1) { mid = tmplow + (xupp-tmplow)/2; xval.i = INTEGER(xc)[mid]; if (xval.i == ival.i) tmplow=mid; else xupp=mid; } while(xlow<tmpupp-1) { mid = xlow + (tmpupp-xlow)/2; xval.i = INTEGER(xc)[mid]; if (xval.i == ival.i) tmpupp=mid; else xlow=mid; } // xlow and xupp now surround the group in xc, we only need this range for the next column break; } } tmplow = lir; tmpupp = lir; while(tmplow<iupp-1) { // TO DO: could double up from lir rather than halving from iupp mid = tmplow + (iupp-tmplow)/2; xval.i = INTEGER(ic)[ o ? o[mid]-1 : mid ]; // reuse xval to search in i if (xval.i == ival.i) tmplow=mid; else iupp=mid; } while(ilow<tmpupp-1) { mid = ilow + (tmpupp-ilow)/2; xval.i = INTEGER(ic)[ o ? o[mid]-1 : mid ]; if (xval.i == ival.i) tmpupp=mid; else ilow=mid; } // ilow and iupp now surround the group in ic, too break; case STRSXP : ival.s = STRING_ELT(ic,ir); while(xlow < xupp-1) { mid = xlow + (xupp-xlow)/2; xval.s = STRING_ELT(xc,mid); if (enc_warn && (ENC_KNOWN(ival.s) || ENC_KNOWN(xval.s))) { // The || is only done here to avoid the warning message being repeating in this code. warning("A known encoding (latin1 or UTF-8) was detected in a join column. data.table compares the bytes currently, so doesn't support *mixed* encodings well; i.e., using both latin1 and UTF-8, or if any unknown encodings are non-ascii and some of those are marked known and others not. But if either latin1 or UTF-8 is used exclusively, and all unknown encodings are ascii, then the result should be ok. In future we will check for you and avoid this warning if everything is ok. The tricky part is doing this without impacting performance for ascii-only cases."); // TO DO: check and warn in forder whether any strings are non-ascii (>127) but unknown encoding // check in forder whether both latin1 and UTF-8 have been used // See bugs #5159 and #5266 and related #5295 to revisit enc_warn = FALSE; // just warn once } tmp = StrCmp(xval.s, ival.s); // uses pointer equality first, NA_STRING are allowed and joined to, then uses strcmp on CHAR(). if (tmp == 0) { // TO DO: deal with mixed encodings and locale optionally tmplow = mid; tmpupp = mid; while(tmplow<xupp-1) { mid = tmplow + (xupp-tmplow)/2; xval.s = STRING_ELT(xc,mid); if (ival.s == xval.s) tmplow=mid; else xupp=mid; // the == here assumes (within this column) no mixing of latin1 and UTF-8, and no unknown non-ascii } // TO DO: add checks to forder, see above. while(xlow<tmpupp-1) { mid = xlow + (tmpupp-xlow)/2; xval.s = STRING_ELT(xc,mid); if (ival.s == xval.s) tmpupp=mid; else xlow=mid; // see above re == } break; } else if (tmp < 0) { xlow=mid; } else { xupp=mid; } } tmplow = lir; tmpupp = lir; while(tmplow<iupp-1) { mid = tmplow + (iupp-tmplow)/2; xval.s = STRING_ELT(ic, o ? o[mid]-1 : mid); if (xval.s == ival.s) tmplow=mid; else iupp=mid; // see above re == } while(ilow<tmpupp-1) { mid = ilow + (tmpupp-ilow)/2; xval.s = STRING_ELT(ic, o ? o[mid]-1 : mid); if (xval.s == ival.s) tmpupp=mid; else ilow=mid; // see above re == } break; case REALSXP : class = getAttrib(xc, R_ClassSymbol); twiddle = (isString(class) && STRING_ELT(class, 0)==char_integer64) ? &i64twiddle : &dtwiddle; ival.ll = twiddle(DATAPTR(ic), ir, 1); while(xlow < xupp-1) { mid = xlow + (xupp-xlow)/2; xval.ll = twiddle(DATAPTR(xc), mid, 1); if (xval.ll<ival.ll) { xlow=mid; } else if (xval.ll>ival.ll) { xupp=mid; } else { // xval.ll == ival.ll) tmplow = mid; tmpupp = mid; while(tmplow<xupp-1) { mid = tmplow + (xupp-tmplow)/2; xval.ll = twiddle(DATAPTR(xc), mid, 1); if (xval.ll == ival.ll) tmplow=mid; else xupp=mid; } while(xlow<tmpupp-1) { mid = xlow + (tmpupp-xlow)/2; xval.ll = twiddle(DATAPTR(xc), mid, 1); if (xval.ll == ival.ll) tmpupp=mid; else xlow=mid; } break; } } tmplow = lir; tmpupp = lir; while(tmplow<iupp-1) { mid = tmplow + (iupp-tmplow)/2; xval.ll = twiddle(DATAPTR(ic), o ? o[mid]-1 : mid, 1 ); if (xval.ll == ival.ll) tmplow=mid; else iupp=mid; } while(ilow<tmpupp-1) { mid = ilow + (tmpupp-ilow)/2; xval.ll = twiddle(DATAPTR(ic), o ? o[mid]-1 : mid, 1 ); if (xval.ll == ival.ll) tmpupp=mid; else ilow=mid; } break; default: error("Type '%s' not supported as key column", type2char(TYPEOF(xc))); } if (xlow<xupp-1) { // if value found, low and upp surround it, unlike standard binary search where low falls on it if (col<ncol-1) bmerge_r(xlow, xupp, ilow, iupp, col+1, 1, 1); // final two 1's are lowmax and uppmax else { int len = xupp-xlow-1; if (len>1) allLen1[0] = FALSE; for (j=ilow+1; j<iupp; j++) { // usually iterates once only for j=ir if (o) k=o[j]-1; else k=j; retFirst[k] = xlow+2; // extra +1 for 1-based indexing at R level retLength[k]= len; } } } else if (roll!=0.0 && col==ncol-1) { // runs once per i row (not each search test), so not hugely time critical if (xlow != xupp-1 || xlow<xlowIn || xupp>xuppIn) error("Internal error: xlow!=xupp-1 || xlow<xlowIn || xupp>xuppIn"); if (nearest) { // value of roll ignored currently when nearest if ( (!lowmax || xlow>xlowIn) && (!uppmax || xupp<xuppIn) ) { if ( ( TYPEOF(ic)==REALSXP && REAL(ic)[ir]-REAL(xc)[xlow] <= REAL(xc)[xupp]-REAL(ic)[ir] ) || ( TYPEOF(ic)<=INTSXP && INTEGER(ic)[ir]-INTEGER(xc)[xlow] <= INTEGER(xc)[xupp]-INTEGER(ic)[ir] )) { retFirst[ir] = xlow+1; retLength[ir] = 1; } else { retFirst[ir] = xupp+1; retLength[ir] = 1; } } else if (uppmax && xupp==xuppIn && rollends[1]) { retFirst[ir] = xlow+1; retLength[ir] = 1; } else if (lowmax && xlow==xlowIn && rollends[0]) { retFirst[ir] = xupp+1; retLength[ir] = 1; } } else { if ( ( (roll>0.0 && (!lowmax || xlow>xlowIn) && (xupp<xuppIn || !uppmax || rollends[1])) || (roll<0.0 && xupp==xuppIn && uppmax && rollends[1]) ) && ( (TYPEOF(ic)==REALSXP && REAL(ic)[ir]-REAL(xc)[xlow]-rollabs<1e-6) || (TYPEOF(ic)<=INTSXP && (double)(INTEGER(ic)[ir]-INTEGER(xc)[xlow])-rollabs<1e-6 ) || (TYPEOF(ic)==STRSXP) )) { retFirst[ir] = xlow+1; retLength[ir] = 1; } else if ( ( (roll<0.0 && (!uppmax || xupp<xuppIn) && (xlow>xlowIn || !lowmax || rollends[0])) || (roll>0.0 && xlow==xlowIn && lowmax && rollends[0]) ) && ( (TYPEOF(ic)==REALSXP && REAL(xc)[xupp]-REAL(ic)[ir]-rollabs<1e-6) || (TYPEOF(ic)<=INTSXP && (double)(INTEGER(xc)[xupp]-INTEGER(ic)[ir])-rollabs<1e-6 ) || (TYPEOF(ic)==STRSXP) )) { retFirst[ir] = xupp+1; // == xlow+2 retLength[ir] = 1; } } if (iupp-ilow > 2 && retFirst[ir]!=NA_INTEGER) { // >=2 equal values in the last column being rolling to the same point. for (j=ilow+1; j<iupp; j++) { // will rewrite retFirst[ir] to itself, but that's ok if (o) k=o[j]-1; else k=j; retFirst[k] = retFirst[ir]; retLength[k]= retLength[ir]; } } } if (ilow>ilowIn && (xlow>xlowIn || (roll!=0.0 && col==ncol-1))) bmerge_r(xlowIn, xlow+1, ilowIn, ilow+1, col, lowmax, uppmax && xlow+1==xuppIn); if (iupp<iuppIn && (xupp<xuppIn || (roll!=0.0 && col==ncol-1))) bmerge_r(xupp-1, xuppIn, iupp-1, iuppIn, col, lowmax && xupp-1==xlowIn, uppmax); }

/* base::Sys.info */ SEXP do_sysinfo(SEXP call, SEXP op, SEXP args, SEXP rho) { SEXP ans, ansnames; OSVERSIONINFOEX osvi; char ver[256], buf[1000]; wchar_t name[MAX_COMPUTERNAME_LENGTH + 1], user[UNLEN+1]; DWORD namelen = MAX_COMPUTERNAME_LENGTH + 1, userlen = UNLEN+1; checkArity(op, args); PROTECT(ans = allocVector(STRSXP, 8)); osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX); if(!GetVersionEx((OSVERSIONINFO *)&osvi)) error(_("unsupported version of Windows")); SET_STRING_ELT(ans, 0, mkChar("Windows")); /* Here for unknown future versions */ snprintf(ver, 256, "%d.%d", (int)osvi.dwMajorVersion, (int)osvi.dwMinorVersion); if((int)osvi.dwMajorVersion >= 5) { PGNSI pGNSI; SYSTEM_INFO si; if(osvi.dwMajorVersion == 6) { if(osvi.wProductType == VER_NT_WORKSTATION) { if(osvi.dwMinorVersion == 0) strcpy(ver, "Vista"); else strcpy(ver, "7"); } else strcpy(ver, "Server 2008"); } if(osvi.dwMajorVersion == 5 && osvi.dwMinorVersion == 0) strcpy(ver, "2000"); if(osvi.dwMajorVersion == 5 && osvi.dwMinorVersion == 1) strcpy(ver, "XP"); if(osvi.dwMajorVersion == 5 && osvi.dwMinorVersion == 2) { if(osvi.wProductType == VER_NT_WORKSTATION) strcpy(ver, "XP Professional"); else strcpy(ver, "Server 2003"); } /* GetNativeSystemInfo is XP or later */ pGNSI = (PGNSI) GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetNativeSystemInfo"); if(NULL != pGNSI) pGNSI(&si); else GetSystemInfo(&si); if(si.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_AMD64) strcat(ver, " x64"); } SET_STRING_ELT(ans, 1, mkChar(ver)); if((int)osvi.dwMajorVersion >= 5) { if(osvi.wServicePackMajor > 0) snprintf(ver, 256, "build %d, Service Pack %d", LOWORD(osvi.dwBuildNumber), (int) osvi.wServicePackMajor); else snprintf(ver, 256, "build %d", LOWORD(osvi.dwBuildNumber)); } else snprintf(ver, 256, "build %d, %s", LOWORD(osvi.dwBuildNumber), osvi.szCSDVersion); SET_STRING_ELT(ans, 2, mkChar(ver)); GetComputerNameW(name, &namelen); wcstoutf8(buf, name, 1000); SET_STRING_ELT(ans, 3, mkCharCE(buf, CE_UTF8)); #ifdef WIN64 SET_STRING_ELT(ans, 4, mkChar("x86-64")); #else SET_STRING_ELT(ans, 4, mkChar("x86")); #endif GetUserNameW(user, &userlen); wcstoutf8(buf, user, 1000); SET_STRING_ELT(ans, 5, mkCharCE(buf, CE_UTF8)); SET_STRING_ELT(ans, 6, STRING_ELT(ans, 5)); SET_STRING_ELT(ans, 7, STRING_ELT(ans, 5)); PROTECT(ansnames = allocVector(STRSXP, 8)); SET_STRING_ELT(ansnames, 0, mkChar("sysname")); SET_STRING_ELT(ansnames, 1, mkChar("release")); SET_STRING_ELT(ansnames, 2, mkChar("version")); SET_STRING_ELT(ansnames, 3, mkChar("nodename")); SET_STRING_ELT(ansnames, 4, mkChar("machine")); SET_STRING_ELT(ansnames, 5, mkChar("login")); SET_STRING_ELT(ansnames, 6, mkChar("user")); SET_STRING_ELT(ansnames, 7, mkChar("effective_user")); setAttrib(ans, R_NamesSymbol, ansnames); UNPROTECT(2); return ans; }

/* complete.cases(.) */ SEXP compcases(SEXP args) { SEXP s, t, u, rval; int i, len; args = CDR(args); len = -1; for (s = args; s != R_NilValue; s = CDR(s)) { if (isList(CAR(s))) { for (t = CAR(s); t != R_NilValue; t = CDR(t)) if (isMatrix(CAR(t))) { u = getAttrib(CAR(t), R_DimSymbol); if (len < 0) len = INTEGER(u)[0]; else if (len != INTEGER(u)[0]) goto bad; } else if (isVector(CAR(t))) { if (len < 0) len = LENGTH(CAR(t)); else if (len != LENGTH(CAR(t))) goto bad; } else error(R_MSG_type, type2char(TYPEOF(CAR(t)))); } /* FIXME : Need to be careful with the use of isVector() */ /* since this includes lists and expressions. */ else if (isNewList(CAR(s))) { int it, nt; t = CAR(s); nt = length(t); /* 0-column data frames are a special case */ if(nt) { for (it = 0 ; it < nt ; it++) { if (isMatrix(VECTOR_ELT(t, it))) { u = getAttrib(VECTOR_ELT(t, it), R_DimSymbol); if (len < 0) len = INTEGER(u)[0]; else if (len != INTEGER(u)[0]) goto bad; } else if (isVector(VECTOR_ELT(t, it))) { if (len < 0) len = LENGTH(VECTOR_ELT(t, it)); else if (len != LENGTH(VECTOR_ELT(t, it))) goto bad; } else error(R_MSG_type, "unknown"); } } else { u = getAttrib(t, R_RowNamesSymbol); if (!isNull(u)) { if (len < 0) len = LENGTH(u); else if (len != INTEGER(u)[0]) goto bad; } } } else if (isMatrix(CAR(s))) { u = getAttrib(CAR(s), R_DimSymbol); if (len < 0) len = INTEGER(u)[0]; else if (len != INTEGER(u)[0]) goto bad; } else if (isVector(CAR(s))) { if (len < 0) len = LENGTH(CAR(s)); else if (len != LENGTH(CAR(s))) goto bad; } else error(R_MSG_type, type2char(TYPEOF(CAR(s)))); } if (len < 0) error(_("no input has determined the number of cases")); PROTECT(rval = allocVector(LGLSXP, len)); for (i = 0; i < len; i++) INTEGER(rval)[i] = 1; /* FIXME : there is a lot of shared code here for vectors. */ /* It should be abstracted out and optimized. */ for (s = args; s != R_NilValue; s = CDR(s)) { if (isList(CAR(s))) { /* Now we only need to worry about vectors */ /* since we use mod to handle arrays. */ /* FIXME : using mod like this causes */ /* a potential performance hit. */ for (t = CAR(s); t != R_NilValue; t = CDR(t)) { u = CAR(t); for (i = 0; i < LENGTH(u); i++) { switch (TYPEOF(u)) { case INTSXP: case LGLSXP: if (INTEGER(u)[i] == NA_INTEGER) INTEGER(rval)[i % len] = 0; break; case REALSXP: if (ISNAN(REAL(u)[i])) INTEGER(rval)[i % len] = 0; break; case CPLXSXP: if (ISNAN(COMPLEX(u)[i].r) || ISNAN(COMPLEX(u)[i].i)) INTEGER(rval)[i % len] = 0; break; case STRSXP: if (STRING_ELT(u, i) == NA_STRING) INTEGER(rval)[i % len] = 0; break; default: UNPROTECT(1); error(R_MSG_type, type2char(TYPEOF(u))); } } } } if (isNewList(CAR(s))) { int it, nt; t = CAR(s); nt = length(t); for (it = 0 ; it < nt ; it++) { u = VECTOR_ELT(t, it); for (i = 0; i < LENGTH(u); i++) { switch (TYPEOF(u)) { case INTSXP: case LGLSXP: if (INTEGER(u)[i] == NA_INTEGER) INTEGER(rval)[i % len] = 0; break; case REALSXP: if (ISNAN(REAL(u)[i])) INTEGER(rval)[i % len] = 0; break; case CPLXSXP: if (ISNAN(COMPLEX(u)[i].r) || ISNAN(COMPLEX(u)[i].i)) INTEGER(rval)[i % len] = 0; break; case STRSXP: if (STRING_ELT(u, i) == NA_STRING) INTEGER(rval)[i % len] = 0; break; default: UNPROTECT(1); error(R_MSG_type, type2char(TYPEOF(u))); } } } } else { for (i = 0; i < LENGTH(CAR(s)); i++) { u = CAR(s); switch (TYPEOF(u)) { case INTSXP: case LGLSXP: if (INTEGER(u)[i] == NA_INTEGER) INTEGER(rval)[i % len] = 0; break; case REALSXP: if (ISNAN(REAL(u)[i])) INTEGER(rval)[i % len] = 0; break; case CPLXSXP: if (ISNAN(COMPLEX(u)[i].r) || ISNAN(COMPLEX(u)[i].i)) INTEGER(rval)[i % len] = 0; break; case STRSXP: if (STRING_ELT(u, i) == NA_STRING) INTEGER(rval)[i % len] = 0; break; default: UNPROTECT(1); error(R_MSG_type, type2char(TYPEOF(u))); } } } } UNPROTECT(1); return rval; bad: error(_("not all arguments have the same length")); return R_NilValue; /* -Wall */ }

SEXP attribute_hidden do_mapply(SEXP call, SEXP op, SEXP args, SEXP rho) { checkArity(op, args); SEXP f = CAR(args), varyingArgs = CADR(args), constantArgs = CADDR(args); int m, zero = 0; R_xlen_t *lengths, *counters, longest = 0; m = length(varyingArgs); SEXP vnames = PROTECT(getAttrib(varyingArgs, R_NamesSymbol)); Rboolean named = CXXRCONSTRUCT(Rboolean, vnames != R_NilValue); lengths = static_cast<R_xlen_t *>( CXXR_alloc(m, sizeof(R_xlen_t))); for (int i = 0; i < m; i++) { SEXP tmp1 = VECTOR_ELT(varyingArgs, i); lengths[i] = xlength(tmp1); if (isObject(tmp1)) { // possibly dispatch on length() /* Cache the .Primitive: unclear caching is worthwhile. */ static SEXP length_op = NULL; if (length_op == NULL) length_op = R_Primitive("length"); // DispatchOrEval() needs 'args' to be a pairlist SEXP ans, tmp2 = PROTECT(list1(tmp1)); if (DispatchOrEval(call, length_op, "length", tmp2, rho, &ans, 0, 1)) lengths[i] = R_xlen_t( (TYPEOF(ans) == REALSXP ? REAL(ans)[0] : asInteger(ans))); UNPROTECT(1); } if (lengths[i] == 0) zero++; if (lengths[i] > longest) longest = lengths[i]; } if (zero && longest) error(_("zero-length inputs cannot be mixed with those of non-zero length")); counters = static_cast<R_xlen_t *>( CXXR_alloc(m, sizeof(R_xlen_t))); memset(counters, 0, m * sizeof(R_xlen_t)); SEXP mindex = PROTECT(allocVector(VECSXP, m)); SEXP nindex = PROTECT(allocVector(VECSXP, m)); /* build a call like f(dots[[1]][[4]], dots[[2]][[4]], dots[[3]][[4]], d=7) */ SEXP fcall = R_NilValue; // -Wall if (constantArgs == R_NilValue) ; else if (isVectorList(constantArgs)) fcall = VectorToPairList(constantArgs); else error(_("argument 'MoreArgs' of 'mapply' is not a list")); PROTECT_INDEX fi; PROTECT_WITH_INDEX(fcall, &fi); Rboolean realIndx = CXXRCONSTRUCT(Rboolean, longest > INT_MAX); SEXP Dots = install("dots"); for (int j = m - 1; j >= 0; j--) { SET_VECTOR_ELT(mindex, j, ScalarInteger(j + 1)); SET_VECTOR_ELT(nindex, j, allocVector(realIndx ? REALSXP : INTSXP, 1)); SEXP tmp1 = PROTECT(lang3(R_Bracket2Symbol, Dots, VECTOR_ELT(mindex, j))); SEXP tmp2 = PROTECT(lang3(R_Bracket2Symbol, tmp1, VECTOR_ELT(nindex, j))); REPROTECT(fcall = CONS(tmp2, fcall), fi); UNPROTECT(2); if (named && CHAR(STRING_ELT(vnames, j))[0] != '\0') SET_TAG(fcall, installTrChar(STRING_ELT(vnames, j))); } REPROTECT(fcall = LCONS(f, fcall), fi); SEXP ans = PROTECT(allocVector(VECSXP, longest)); for (int i = 0; i < longest; i++) { for (int j = 0; j < m; j++) { counters[j] = (++counters[j] > lengths[j]) ? 1 : counters[j]; if (realIndx) REAL(VECTOR_ELT(nindex, j))[0] = double( counters[j]); else INTEGER(VECTOR_ELT(nindex, j))[0] = int( counters[j]); } SEXP tmp = eval(fcall, rho); if (NAMED(tmp)) tmp = duplicate(tmp); SET_VECTOR_ELT(ans, i, tmp); } for (int j = 0; j < m; j++) if (counters[j] != lengths[j]) warning(_("longer argument not a multiple of length of shorter")); UNPROTECT(5); return ans; }