SEXP getAttrib(SEXP vec, SEXP name) { if(TYPEOF(vec) == CHARSXP) error("cannot have attributes on a CHARSXP"); /* pre-test to avoid expensive operations if clearly not needed -- LT */ if (ATTRIB(vec) == R_NilValue && ! (TYPEOF(vec) == LISTSXP || TYPEOF(vec) == LANGSXP)) return R_NilValue; if (isString(name)) name = install(translateChar(STRING_ELT(name, 0))); /* special test for c(NA, n) rownames of data frames: */ if (name == R_RowNamesSymbol) { SEXP s = getAttrib0(vec, R_RowNamesSymbol); if(isInteger(s) && LENGTH(s) == 2 && INTEGER(s)[0] == NA_INTEGER) { int i, n = abs(INTEGER(s)[1]); PROTECT(s = allocVector(INTSXP, n)); for(i = 0; i < n; i++) INTEGER(s)[i] = i+1; UNPROTECT(1); } return s; } else return getAttrib0(vec, name); }
attribute_hidden SEXP do_shortRowNames(SEXP call, SEXP op, SEXP args, SEXP env) { /* return n if the data frame 'vec' has c(NA, n) rownames; * nrow(.) otherwise; note that data frames with nrow(.) == 0 * have no row.names. ==> is also used in dim.data.frame() */ checkArity(op, args); SEXP s = getAttrib0(CAR(args), R_RowNamesSymbol), ans = s; int type = asInteger(CADR(args)); if( type < 0 || type > 2) error(_("invalid '%s' argument"), "type"); if(type >= 1) { int n = (isInteger(s) && LENGTH(s) == 2 && INTEGER(s)[0] == NA_INTEGER) ? INTEGER(s)[1] : (isNull(s) ? 0 : LENGTH(s)); ans = ScalarInteger((type == 1) ? n : abs(n)); } return ans; }
SEXP as_output_dataframe(SEXP sWhat, SEXP sSep, SEXP sNsep, SEXP sRownamesFlag, SEXP sConn, SEXP sRecycle) { unsigned long i, j; if (TYPEOF(sWhat) != VECSXP) Rf_error("object must be a data.frame"); unsigned long ncol = XLENGTH(sWhat); unsigned long nrow = 0; unsigned long row_len = 0; if (ncol) nrow = XLENGTH(VECTOR_ELT(sWhat, 0)); int rownamesFlag = asInteger(sRownamesFlag); if (TYPEOF(sSep) != STRSXP || LENGTH(sSep) != 1) Rf_error("sep must be a single string"); if (TYPEOF(sNsep) != STRSXP || LENGTH(sNsep) != 1) Rf_error("nsep must be a single string"); char sep = CHAR(STRING_ELT(sSep, 0))[0]; char nsep = CHAR(STRING_ELT(sNsep, 0))[0]; char lend = '\n'; SEXP sRnames = getAttrib0(sWhat, R_RowNamesSymbol); int isConn = inherits(sConn, "connection"), mod = 0; int recycle = (asInteger(sRecycle) > 0) ? 1 : 0; SEXP as_character = R_NilValue; unsigned long *sizes = 0; if (TYPEOF(sRnames) != STRSXP) sRnames = NULL; for (j = 0; j < ncol; j++) { /* we have to call as.character() for objects with a class since they may require a different representation */ if (requires_as_character(VECTOR_ELT(sWhat, j))) { /* did we create a modified copy yet? If not, do so */ if (!mod) { /* shallow copy - we use it only internally so should be ok */ SEXP sData = PROTECT(allocVector(VECSXP, XLENGTH(sWhat))); memcpy(&(VECTOR_ELT(sData, 0)), &(VECTOR_ELT(sWhat, 0)), sizeof(SEXP) * XLENGTH(sWhat)); sWhat = sData; mod = 1; as_character = Rf_install("as.character"); } SEXP asc = PROTECT(lang2(as_character, VECTOR_ELT(sWhat, j))); SET_VECTOR_ELT(sWhat, j, eval(asc, R_GlobalEnv)); UNPROTECT(1); } row_len += guess_size(TYPEOF(VECTOR_ELT(sWhat, j))); } if (ncol && recycle) { /* this allows us to support lists directly without requiring for them to be a data frames - in those cases we have to check the length of the columns to determine the longest */ unsigned long min_len = (unsigned long) XLENGTH(VECTOR_ELT(sWhat, 0)); for (j = 0; j < ncol; j++) { unsigned long l = 0; SEXP el = VECTOR_ELT(sWhat, j); /* NOTE: we can assume that el must be a scalar vector since anything that isn't will be passed through as.character() */ l = (unsigned long) XLENGTH(el); if (l < min_len) min_len = l; if (l > nrow) nrow = l; } /* if all elements have the smae tlength then we don't need to re-cycle, so treat the list exactly like a data frame */ if (nrow == min_len) recycle = 0; else { /* cache lengths since XLENGTH is actually not a cheap operation */ SEXP foo = PROTECT(allocVector(RAWSXP, sizeof(long) * ncol)); sizes = (unsigned long*) RAW(foo); for (j = 0; j < ncol; j++) sizes[j] = (unsigned long) XLENGTH(VECTOR_ELT(sWhat, j)); } } if (rownamesFlag == 1) row_len++; SEXP buf = dybuf_alloc(isConn ? DEFAULT_CONN_BUFFER_SIZE : (row_len * nrow), sConn); for (i = 0; i < nrow; i++) { if (rownamesFlag) { if (sRnames) { const char *c = CHAR(STRING_ELT(sRnames, i)); dybuf_add(buf, c, strlen(c)); } else { /* FIXME: use sprintf("%d", i) for automatic row names? */ } dybuf_add1(buf, nsep); } if (recycle) /* slower - we need to use modulo to recycle */ /* FIXME: modulo is slow for large vectors, should we just keep separate index for every column? It may be worth measuring the impact ... We are already trying to be smart by avoiding modulo for the two most common cases: full-length vectors and vectors of length 1 so this will only impact non-trivial recycling */ for (j = 0; j < ncol; j++) { store(buf, VECTOR_ELT(sWhat, j), (i < sizes[j]) ? i : ((sizes[j] == 1) ? 0 : (i % sizes[j]))); if (j < ncol - 1) dybuf_add1(buf, (rownamesFlag == 2 && j == 0) ? nsep : sep); } else for (j = 0; j < ncol; j++) { store(buf, VECTOR_ELT(sWhat, j), i); if (j < ncol - 1) dybuf_add1(buf, (rownamesFlag == 2 && j == 0) ? nsep : sep); } dybuf_add1(buf, lend); } if (recycle) UNPROTECT(1); /* sizes cache */ if (mod) UNPROTECT(1); /* sData */ SEXP res = dybuf_collect(buf); UNPROTECT(1); /* buffer */ return res; }