/* Sets fields of typeInfo, ruling out possible types based on s. * * The typeInfo struct should be initialized with all fields TRUE. */ static void ruleout_types(const char *s, Typecvt_Info *typeInfo, LocalData *data) { int res; char *endp; if (typeInfo->islogical) { if (strcmp(s, "F") == 0 || strcmp(s, "FALSE") == 0 || strcmp(s, "T") == 0 || strcmp(s, "TRUE") == 0) { typeInfo->isinteger = FALSE; typeInfo->isreal = FALSE; typeInfo->iscomplex = FALSE; } else { typeInfo->islogical = TRUE; } } if (typeInfo->isinteger) { res = Strtoi(s, 10); if (res == NA_INTEGER) typeInfo->isinteger = FALSE; } if (typeInfo->isreal) { Strtod(s, &endp, TRUE, data); if (!isBlankString(endp)) typeInfo->isreal = FALSE; } if (typeInfo->iscomplex) { strtoc(s, &endp, TRUE, data); if (!isBlankString(endp)) typeInfo->iscomplex = FALSE; } }
SEXP df_split(SEXP s, SEXP sSep, SEXP sNamesSep, SEXP sResilient, SEXP sNcol, SEXP sWhat, SEXP sColNames, SEXP sSkip, SEXP sNlines, SEXP sQuote) { char sep; int nsep, use_ncol, resilient, ncol; long i, j, k, m, len, nmsep_flag, skip, quoteLen; unsigned long nrow; char num_buf[48]; const char *c, *c2, *sraw = 0, *send = 0, *quoteChars; long nlines = asLong(sNlines, -1); SEXP sOutput, tmp, sOutputNames, st, clv; /* Parse inputs */ sep = CHAR(STRING_ELT(sSep, 0))[0]; nsep = (TYPEOF(sNamesSep) == STRSXP && LENGTH(sNamesSep) > 0) ? ((int) (unsigned char) *CHAR(STRING_ELT(sNamesSep, 0))) : -1; nmsep_flag = (nsep > 0); use_ncol = asInteger(sNcol); resilient = asInteger(sResilient); ncol = use_ncol; /* NOTE: "character" is prepended by the R code if nmsep is TRUE, so ncol *does* include the key column */ skip = asLong(sSkip, 0); /* parse quote information */ quoteChars = CHAR(STRING_ELT(sQuote, 0)); quoteLen = strlen(quoteChars); /* count non-NA columns */ for (i = 0; i < use_ncol; i++) if (TYPEOF(VECTOR_ELT(sWhat,i)) == NILSXP) ncol--; /* check input */ if (TYPEOF(s) == RAWSXP) { nrow = (nlines >= 0) ? count_lines_bounded(s, nlines + skip) : count_lines(s); sraw = (const char*) RAW(s); send = sraw + XLENGTH(s); if (nrow >= skip) { unsigned long slen = XLENGTH(s); nrow = nrow - skip; i = 0; while (i < skip && (sraw = memchr(sraw, '\n', slen))) { sraw++; i++; } } else { nrow = 0; sraw = send; } } else if (TYPEOF(s) == STRSXP) { nrow = XLENGTH(s); if (nrow >= skip) { nrow -= skip; } else { skip = nrow; nrow = 0; } } else Rf_error("invalid input to split - must be a raw or character vector"); if (nlines >= 0 && nrow > nlines) nrow = nlines; /* allocate result */ PROTECT(sOutput = allocVector(VECSXP, ncol)); /* set names */ setAttrib(sOutput, R_NamesSymbol, sOutputNames = allocVector(STRSXP, ncol)); if (nrow > INT_MAX) Rf_warning("R currently doesn't support large data frames, but we have %lu rows, returning a named list instead", nrow); else { /* set automatic row names */ PROTECT(tmp = allocVector(INTSXP, 2)); INTEGER(tmp)[0] = NA_INTEGER; INTEGER(tmp)[1] = -nrow; setAttrib(sOutput, R_RowNamesSymbol, tmp); UNPROTECT(1); /* set class */ classgets(sOutput, mkString("data.frame")); } /* Create SEXP for each element of the output */ j = 0; for (i = 0; i < use_ncol; i++) { if (TYPEOF(VECTOR_ELT(sWhat,i)) != NILSXP) /* copy col.name */ SET_STRING_ELT(sOutputNames, j, STRING_ELT(sColNames, i)); switch (TYPEOF(VECTOR_ELT(sWhat,i))) { case LGLSXP: case INTSXP: case REALSXP: case CPLXSXP: case STRSXP: case RAWSXP: SET_VECTOR_ELT(sOutput, j++, allocVector(TYPEOF(VECTOR_ELT(sWhat,i)), nrow)); break; case VECSXP: SET_VECTOR_ELT(sOutput, j++, st = allocVector(REALSXP, nrow)); clv = PROTECT(allocVector(STRSXP, 2)); SET_STRING_ELT(clv, 0, mkChar("POSIXct")); SET_STRING_ELT(clv, 1, mkChar("POSIXt")); setAttrib(st, R_ClassSymbol, clv); /* this is somewhat a security precaution such that users don't get surprised -- if there is no TZ R will render it in local time - which is correct but may confuse people that didn't use GMT to start with */ setAttrib(st, install("tzone"), mkString("GMT")); UNPROTECT(1); break; case NILSXP: break; default: Rf_error("Unsupported input to what %u.", TYPEOF(VECTOR_ELT(sWhat,i))); break; } } /* Cycle through the rows and extract the data */ for (k = 0; k < nrow; k++) { const char *l = 0, *le; if (TYPEOF(s) == RAWSXP) { l = sraw; le = memchr(l, '\n', send - l); if (!le) le = send; sraw = le + 1; if (*(le - 1) == '\r' ) le--; /* account for DOS-style '\r\n' */ } else { l = CHAR(STRING_ELT(s, k + skip)); le = l + strlen(l); /* probably lame, but using strings is way inefficient anyway ;) */ } if (nmsep_flag) { c = memchr(l, nsep, le - l); if (c) { SET_STRING_ELT(VECTOR_ELT(sOutput, 0), k, Rf_mkCharLen(l, c - l)); l = c + 1; } else SET_STRING_ELT(VECTOR_ELT(sOutput, 0), k, R_BlankString); } i = nmsep_flag; j = nmsep_flag; while (l < le) { if (!(c = memchr(l, sep, le - l))) c = le; if (i >= use_ncol) { if (resilient) break; Rf_error("line %lu: too many input columns (expected %u)", k, use_ncol); } switch(TYPEOF(VECTOR_ELT(sWhat,i))) { // NOTE: no matching case for NILSXP case LGLSXP: len = (int) (c - l); if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; int tr = StringTrue(num_buf), fa = StringFalse(num_buf); LOGICAL(VECTOR_ELT(sOutput, j))[k] = (tr || fa) ? tr : NA_INTEGER; j++; break; case INTSXP: len = (int) (c - l); /* watch for overflow and truncate -- should we warn? */ if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; INTEGER(VECTOR_ELT(sOutput, j))[k] = Strtoi(num_buf, 10); j++; break; case REALSXP: len = (int) (c - l); /* watch for overflow and truncate -- should we warn? */ if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; REAL(VECTOR_ELT(sOutput, j))[k] = R_atof(num_buf); j++; break; case CPLXSXP: len = (int) (c - l); /* watch for overflow and truncate -- should we warn? */ if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; COMPLEX(VECTOR_ELT(sOutput, j))[k] = strtoc(num_buf, TRUE); j++; break; case STRSXP: c2 = c; if (quoteLen) { for (m = 0; m < quoteLen; m++) { if (*l == quoteChars[m]) { l++; if (!(c2 = memchr(l, quoteChars[m], le - l))) { Rf_error("End of line within quoted string."); } else { if (!(c = memchr(c2, (unsigned char) sep, le - c2))) c = le; } } } } SET_STRING_ELT(VECTOR_ELT(sOutput, j), k, Rf_mkCharLen(l, c2 - l)); j++; break; case RAWSXP: len = (int) (c - l); /* watch for overflow and truncate -- should we warn? */ if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; RAW(VECTOR_ELT(sOutput, j))[k] = strtoraw(num_buf); j++; break; case VECSXP: REAL(VECTOR_ELT(sOutput, j))[k] = parse_ts(l, c); j++; } l = c + 1; i++; } /* fill-up unused columns */ while (i < use_ncol) { switch (TYPEOF(VECTOR_ELT(sWhat,i))) { // NOTE: no matching case for NILSXP case LGLSXP: LOGICAL(VECTOR_ELT(sOutput, j++))[k] = NA_INTEGER; break; case INTSXP: INTEGER(VECTOR_ELT(sOutput, j++))[k] = NA_INTEGER; break; case REALSXP: case VECSXP: REAL(VECTOR_ELT(sOutput, j++))[k] = NA_REAL; break; case CPLXSXP: COMPLEX(VECTOR_ELT(sOutput, j))[k].r = NA_REAL; COMPLEX(VECTOR_ELT(sOutput, j++))[k].i = NA_REAL; break; case STRSXP: SET_STRING_ELT(VECTOR_ELT(sOutput, j++), k, R_NaString); break; case RAWSXP: RAW(VECTOR_ELT(sOutput, j))[k] = (Rbyte) 0; break; } i++; } } UNPROTECT(1); /* sOutput */ return(sOutput); }
SEXP mat_split(SEXP s, SEXP sSep, SEXP sNamesSep, SEXP sResilient, SEXP sNcol, SEXP sWhat, SEXP sSkip, SEXP sNlines, SEXP sQuote) { unsigned int ncol = 1, np = 0, resilient = asInteger(sResilient); unsigned long nrow, i, k, N, len, quoteLen; int use_ncol = asInteger(sNcol); int nsep = -1; long skip = asLong(sSkip, 0); long nlines = asLong(sNlines, -1); SEXP res, rnam; char sep; char num_buf[48]; /* sraw/send is only used for raw vector parsing, but we have to set it to 0 to make gcc happy which cannot figure out that it's actually unused */ const char *c, *c2, *sraw = 0, *send = 0, *l, *le, *quoteChars; /* parse sep input */ if (TYPEOF(sNamesSep) == STRSXP && LENGTH(sNamesSep) > 0) nsep = (int) (unsigned char) *CHAR(STRING_ELT(sNamesSep, 0)); if (TYPEOF(sSep) != STRSXP || LENGTH(sSep) < 1) Rf_error("invalid separator"); sep = CHAR(STRING_ELT(sSep, 0))[0]; /* parse quote information */ quoteChars = CHAR(STRING_ELT(sQuote, 0)); quoteLen = strlen(quoteChars); /* check the input data */ if (TYPEOF(s) == RAWSXP) { nrow = (nlines >= 0) ? count_lines_bounded(s, nlines + skip) : count_lines(s); sraw = (const char*) RAW(s); send = sraw + XLENGTH(s); if (nrow >= skip) { unsigned long slen = XLENGTH(s); nrow = nrow - skip; i = 0; while (i < skip && (sraw = memchr(sraw, '\n', slen))) { sraw++; i++; } } else { nrow = 0; sraw = send; } } else if (TYPEOF(s) == STRSXP) { nrow = LENGTH(s); if (nrow >= skip) { nrow -= skip; } else { skip = nrow; nrow = 0; } } else { Rf_error("invalid input to split - must be a raw or character vector"); } if (nlines >= 0 && nrow > nlines) nrow = nlines; /* If no rows left, return an empty matrix */ if (!nrow) { if (np) UNPROTECT(np); return allocMatrix(TYPEOF(sWhat), 0, 0); } /* count number of columns */ if (use_ncol < 1) { if (TYPEOF(s) == RAWSXP) { c = sraw; le = memchr(sraw, '\n', send - sraw); if (TYPEOF(sWhat) == STRSXP && quoteLen) { ncol = 0; while(1) { ncol++; for (k = 0; k < quoteLen; k++) { if (*c == quoteChars[k]) { c++; if (! (c = memchr(c, quoteChars[k], le - c))) Rf_error("End of line within quote string on line 1; cannot determine num columns!"); break; /* note: breaks inner 'for' loop, not 'if' statement */ } } if (!(c = (memchr(c, (unsigned char) sep, le - c)))) break; c++; } } else { ncol = 1; while ((c = memchr(c, (unsigned char) sep, le - c))) { ncol++; c++; } } } else { c = CHAR(STRING_ELT(s, 0)); if (TYPEOF(sWhat) == STRSXP && quoteLen) { ncol = 0; while(1) { ncol++; for (k = 0; k < quoteLen; k++) { if (*c == quoteChars[k]) { c++; if (!(c = strchr(c, quoteChars[k]))) Rf_error("End of line within quote string on line 1; cannot determine num columns!"); break; /* note: breaks inner 'for' loop, not 'if' statement */ } } if (!(c = (strchr(c, sep)))) break; c++; } } else { while ((c = strchr(c, sep))) { ncol++; c++; } } } /* if sep and nsep are the same then the first "column" is the key and not the column */ if (nsep == (int) (unsigned char) sep) ncol--; } else ncol = use_ncol; /* allocate space for the result */ N = ncol * nrow; switch(TYPEOF(sWhat)) { case LGLSXP: case INTSXP: case REALSXP: case CPLXSXP: case STRSXP: case RAWSXP: res = PROTECT(allocMatrix(TYPEOF(sWhat), nrow, ncol)); break; default: Rf_error("Unsupported input to what."); break; } if (nsep >= 0) { SEXP dn; setAttrib(res, R_DimNamesSymbol, (dn = allocVector(VECSXP, 2))); SET_VECTOR_ELT(dn, 0, (rnam = allocVector(STRSXP, nrow))); } np++; /* cycle over the rows and parse the data */ for (i = 0; i < nrow; i++) { int j = i; /* find the row of data */ if (TYPEOF(s) == RAWSXP) { l = sraw; le = memchr(l, '\n', send - l); if (!le) le = send; sraw = le + 1; if (*(le - 1) == '\r' ) le--; /* account for DOS-style '\r\n' */ } else { l = CHAR(STRING_ELT(s, i + skip)); le = l + strlen(l); } /* if nsep, load rowname */ if (nsep >= 0) { c = memchr(l, nsep, le - l); if (c) { SET_STRING_ELT(rnam, i, Rf_mkCharLen(l, c - l)); l = c + 1; } else SET_STRING_ELT(rnam, i, R_BlankString); } /* now split the row into elements */ while (l < le) { if (!(c = memchr(l, sep, le - l))) c = le; if (j >= N) { if (resilient) break; Rf_error("line %lu: too many columns (expected %u)", (unsigned long)(i + 1), ncol); } switch(TYPEOF(sWhat)) { case LGLSXP: len = (int) (c - l); if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; int tr = StringTrue(num_buf), fa = StringFalse(num_buf); LOGICAL(res)[j] = (tr || fa) ? tr : NA_INTEGER; break; case INTSXP: len = (int) (c - l); /* watch for overflow and truncate -- should we warn? */ if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; INTEGER(res)[j] = Strtoi(num_buf, 10); break; case REALSXP: len = (int) (c - l); /* watch for overflow and truncate -- should we warn? */ if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; REAL(res)[j] = R_atof(num_buf); break; case CPLXSXP: len = (int) (c - l); /* watch for overflow and truncate -- should we warn? */ if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; COMPLEX(res)[j] = strtoc(num_buf, TRUE); break; case STRSXP: c2 = c; if (quoteLen) { for (k = 0; k < quoteLen; k++) { if (*l == quoteChars[k]) { l++; if (!(c2 = memchr(l, quoteChars[k], le - l))) { Rf_warning("End of line within quoted string!"); c = c2 = le; } else { if (!(c = memchr(c2, (unsigned char) sep, le - c2))) c = le; } } } } SET_STRING_ELT(res, j, Rf_mkCharLen(l, c2 - l)); break; case RAWSXP: len = (int) (c - l); /* watch for overflow and truncate -- should we warn? */ if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; RAW(res)[j] = strtoraw(num_buf); break; } l = c + 1; j += nrow; } /* fill up unused columns with NAs */ while (j < N) { switch (TYPEOF(sWhat)) { case LGLSXP: LOGICAL(res)[j] = NA_INTEGER; break; case INTSXP: INTEGER(res)[j] = NA_INTEGER; break; case REALSXP: REAL(res)[j] = NA_REAL; break; case CPLXSXP: COMPLEX(res)[j].r = NA_REAL; COMPLEX(res)[j].i = NA_REAL; break; case STRSXP: SET_STRING_ELT(res, j, R_NaString); break; case RAWSXP: RAW(res)[j] = (Rbyte) 0; break; } j += nrow; } } UNPROTECT(np); return res; }
SEXP typeconvert(SEXP call, SEXP op, SEXP args, SEXP env) { SEXP cvec, a, dup, levs, dims, names, dec; SEXP rval = R_NilValue; /* -Wall */ int i, j, len, asIs; Rboolean done = FALSE; char *endp; const char *tmp = NULL; LocalData data = {NULL, 0, 0, '.', NULL, NO_COMCHAR, 0, NULL, FALSE, FALSE, 0, FALSE, FALSE}; Typecvt_Info typeInfo; /* keep track of possible types of cvec */ typeInfo.islogical = TRUE; /* we can't rule anything out initially */ typeInfo.isinteger = TRUE; typeInfo.isreal = TRUE; typeInfo.iscomplex = TRUE; data.NAstrings = R_NilValue; args = CDR(args); if (!isString(CAR(args))) error(_("the first argument must be of mode character")); data.NAstrings = CADR(args); if (TYPEOF(data.NAstrings) != STRSXP) error(_("invalid '%s' argument"), "na.strings"); asIs = asLogical(CADDR(args)); if (asIs == NA_LOGICAL) asIs = 0; dec = CADDDR(args); if (isString(dec) || isNull(dec)) { if (length(dec) == 0) data.decchar = '.'; else data.decchar = translateChar(STRING_ELT(dec, 0))[0]; } cvec = CAR(args); len = length(cvec); /* save the dim/dimnames attributes */ PROTECT(dims = getAttrib(cvec, R_DimSymbol)); if (isArray(cvec)) PROTECT(names = getAttrib(cvec, R_DimNamesSymbol)); else PROTECT(names = getAttrib(cvec, R_NamesSymbol)); /* Use the first non-NA to screen */ for (i = 0; i < len; i++) { tmp = CHAR(STRING_ELT(cvec, i)); if (!(STRING_ELT(cvec, i) == NA_STRING || strlen(tmp) == 0 || isNAstring(tmp, 1, &data) || isBlankString(tmp))) break; } if (i < len) { /* not all entries are NA */ ruleout_types(tmp, &typeInfo, &data); } if (typeInfo.islogical) { PROTECT(rval = allocVector(LGLSXP, len)); for (i = 0; i < len; i++) { tmp = CHAR(STRING_ELT(cvec, i)); if (STRING_ELT(cvec, i) == NA_STRING || strlen(tmp) == 0 || isNAstring(tmp, 1, &data) || isBlankString(tmp)) LOGICAL(rval)[i] = NA_LOGICAL; else { if (strcmp(tmp, "F") == 0 || strcmp(tmp, "FALSE") == 0) LOGICAL(rval)[i] = 0; else if(strcmp(tmp, "T") == 0 || strcmp(tmp, "TRUE") == 0) LOGICAL(rval)[i] = 1; else { typeInfo.islogical = FALSE; ruleout_types(tmp, &typeInfo, &data); break; } } } if (typeInfo.islogical) done = TRUE; else UNPROTECT(1); } if (!done && typeInfo.isinteger) { PROTECT(rval = allocVector(INTSXP, len)); for (i = 0; i < len; i++) { tmp = CHAR(STRING_ELT(cvec, i)); if (STRING_ELT(cvec, i) == NA_STRING || strlen(tmp) == 0 || isNAstring(tmp, 1, &data) || isBlankString(tmp)) INTEGER(rval)[i] = NA_INTEGER; else { INTEGER(rval)[i] = Strtoi(tmp, 10); if (INTEGER(rval)[i] == NA_INTEGER) { typeInfo.isinteger = FALSE; ruleout_types(tmp, &typeInfo, &data); break; } } } if(typeInfo.isinteger) done = TRUE; else UNPROTECT(1); } if (!done && typeInfo.isreal) { PROTECT(rval = allocVector(REALSXP, len)); for (i = 0; i < len; i++) { tmp = CHAR(STRING_ELT(cvec, i)); if (STRING_ELT(cvec, i) == NA_STRING || strlen(tmp) == 0 || isNAstring(tmp, 1, &data) || isBlankString(tmp)) REAL(rval)[i] = NA_REAL; else { REAL(rval)[i] = Strtod(tmp, &endp, FALSE, &data); if (!isBlankString(endp)) { typeInfo.isreal = FALSE; ruleout_types(tmp, &typeInfo, &data); break; } } } if(typeInfo.isreal) done = TRUE; else UNPROTECT(1); } if (!done && typeInfo.iscomplex) { PROTECT(rval = allocVector(CPLXSXP, len)); for (i = 0; i < len; i++) { tmp = CHAR(STRING_ELT(cvec, i)); if (STRING_ELT(cvec, i) == NA_STRING || strlen(tmp) == 0 || isNAstring(tmp, 1, &data) || isBlankString(tmp)) COMPLEX(rval)[i].r = COMPLEX(rval)[i].i = NA_REAL; else { COMPLEX(rval)[i] = strtoc(tmp, &endp, FALSE, &data); if (!isBlankString(endp)) { typeInfo.iscomplex = FALSE; /* this is not needed, unless other cases are added */ ruleout_types(tmp, &typeInfo, &data); break; } } } if(typeInfo.iscomplex) done = TRUE; else UNPROTECT(1); } if (!done) { if (asIs) { PROTECT(rval = duplicate(cvec)); for (i = 0; i < len; i++) if(isNAstring(CHAR(STRING_ELT(rval, i)), 1, &data)) SET_STRING_ELT(rval, i, NA_STRING); } else { PROTECT(dup = duplicated(cvec, FALSE)); j = 0; for (i = 0; i < len; i++) { /* <NA> is never to be a level here */ if (STRING_ELT(cvec, i) == NA_STRING) continue; if (LOGICAL(dup)[i] == 0 && !isNAstring(CHAR(STRING_ELT(cvec, i)), 1, &data)) j++; } PROTECT(levs = allocVector(STRSXP,j)); j = 0; for (i = 0; i < len; i++) { if (STRING_ELT(cvec, i) == NA_STRING) continue; if (LOGICAL(dup)[i] == 0 && !isNAstring(CHAR(STRING_ELT(cvec, i)), 1, &data)) SET_STRING_ELT(levs, j++, STRING_ELT(cvec, i)); } /* We avoid an allocation by reusing dup, * a LGLSXP of the right length */ rval = dup; SET_TYPEOF(rval, INTSXP); /* put the levels in lexicographic order */ sortVector(levs, FALSE); PROTECT(a = matchE(levs, cvec, NA_INTEGER, env)); for (i = 0; i < len; i++) INTEGER(rval)[i] = INTEGER(a)[i]; setAttrib(rval, R_LevelsSymbol, levs); PROTECT(a = mkString("factor")); setAttrib(rval, R_ClassSymbol, a); UNPROTECT(3); } } setAttrib(rval, R_DimSymbol, dims); setAttrib(rval, isArray(cvec) ? R_DimNamesSymbol : R_NamesSymbol, names); UNPROTECT(3); return rval; }
static void extractItem(char *buffer, SEXP ans, int i, LocalData *d) { char *endp; switch(TYPEOF(ans)) { case NILSXP: break; case LGLSXP: if (isNAstring(buffer, 0, d)) LOGICAL(ans)[i] = NA_INTEGER; else { int tr = StringTrue(buffer), fa = StringFalse(buffer); if(tr || fa) LOGICAL(ans)[i] = tr; else expected("a logical", buffer, d); } break; case INTSXP: if (isNAstring(buffer, 0, d)) INTEGER(ans)[i] = NA_INTEGER; else { INTEGER(ans)[i] = Strtoi(buffer, 10); if (INTEGER(ans)[i] == NA_INTEGER) expected("an integer", buffer, d); } break; case REALSXP: if (isNAstring(buffer, 0, d)) REAL(ans)[i] = NA_REAL; else { REAL(ans)[i] = Strtod(buffer, &endp, TRUE, d); if (!isBlankString(endp)) expected("a real", buffer, d); } break; case CPLXSXP: if (isNAstring(buffer, 0, d)) COMPLEX(ans)[i].r = COMPLEX(ans)[i].i = NA_REAL; else { COMPLEX(ans)[i] = strtoc(buffer, &endp, TRUE, d); if (!isBlankString(endp)) expected("a complex", buffer, d); } break; case STRSXP: if (isNAstring(buffer, 1, d)) SET_STRING_ELT(ans, i, NA_STRING); else SET_STRING_ELT(ans, i, insertString(buffer, d)); break; case RAWSXP: if (isNAstring(buffer, 0, d)) RAW(ans)[i] = 0; else { RAW(ans)[i] = strtoraw(buffer, &endp); if (!isBlankString(endp)) expected("a raw", buffer, d); } break; default: UNIMPLEMENTED_TYPE("extractItem", ans); } }
SEXP mat_split(SEXP s, SEXP sSep, SEXP sNamesSep, SEXP sResilient, SEXP sNcol, SEXP sWhat, SEXP sSkip, SEXP sNlines) { unsigned int ncol = 1, nrow, np = 0, i, N, resilient = asInteger(sResilient); int use_ncol = asInteger(sNcol); int nsep = -1; int skip = INTEGER(sSkip)[0]; int nlines = INTEGER(sNlines)[0]; int len; SEXP res, rnam, zerochar = 0; char sep; char num_buf[48]; double * res_ptr; const char *c, *sraw, *send, *l, *le;; /* parse sep input */ if (TYPEOF(sNamesSep) == STRSXP && LENGTH(sNamesSep) > 0) nsep = (int) (unsigned char) *CHAR(STRING_ELT(sNamesSep, 0)); if (TYPEOF(sSep) != STRSXP || LENGTH(sSep) < 1) Rf_error("invalid separator"); sep = CHAR(STRING_ELT(sSep, 0))[0]; /* check the input data */ if (TYPEOF(s) == RAWSXP) { nrow = (nlines >= 0) ? count_lines_bounded(s, nlines + skip) : count_lines(s); sraw = (const char*) RAW(s); send = sraw + XLENGTH(s); if (nrow >= skip) { nrow = nrow - skip; for (i = 0; i < skip; i++) sraw = memchr(sraw,'\n',XLENGTH(s)) + 1; } else { nrow = 0; sraw = send; } } else if (TYPEOF(s) == STRSXP) { nrow = LENGTH(s); if (nrow >= skip) { nrow -= skip; } else { skip = nrow; nrow = 0; } } else { Rf_error("invalid input to split - must be a raw or character vector"); } if (nlines >= 0 && nrow > nlines) nrow = nlines; /* If no rows left, return an empty matrix */ if (!nrow) { if (np) UNPROTECT(np); return allocMatrix(TYPEOF(sWhat), 0, 0); } /* count number of columns */ if (use_ncol < 1) { if (TYPEOF(s) == RAWSXP) { ncol = 1; c = sraw; le = memchr(sraw, '\n', send - sraw); while ((c = memchr(c, (unsigned char) sep, le - c))) { ncol++; c++; } } else { c = CHAR(STRING_ELT(s, 0)); while ((c = strchr(c, sep))) { ncol++; c++; } /* if sep and nsep are the same then the first "column" is the key and not the column */ if (nsep == (int) (unsigned char) sep) ncol--; } } else ncol = use_ncol; /* allocate space for the result */ N = ncol * nrow; switch(TYPEOF(sWhat)) { case LGLSXP: case INTSXP: case REALSXP: case CPLXSXP: case STRSXP: case RAWSXP: res = PROTECT(allocMatrix(TYPEOF(sWhat), nrow, ncol)); break; default: Rf_error("Unsupported input to what."); break; } if (nsep >= 0) { SEXP dn; setAttrib(res, R_DimNamesSymbol, (dn = allocVector(VECSXP, 2))); SET_VECTOR_ELT(dn, 0, (rnam = allocVector(STRSXP, nrow))); } np++; /* cycle over the rows and parse the data */ for (i = 0; i < nrow; i++) { int j = i; /* find the row of data */ if (TYPEOF(s) == RAWSXP) { l = sraw; le = memchr(l, '\n', send - l); if (!le) le = send; sraw = le + 1; } else { l = CHAR(STRING_ELT(s, i + skip)); le = l + strlen(l); } /* if nsep, load rowname */ if (nsep >= 0) { c = memchr(l, nsep, le - l); if (c) { SET_STRING_ELT(rnam, i, Rf_mkCharLen(l, c - l)); l = c + 1; } else SET_STRING_ELT(rnam, i, R_BlankString); } /* now split the row into elements */ while (l < le) { if (!(c = memchr(l, sep, le - l))) c = le; if (j >= N) { if (resilient) break; Rf_error("line %lu: too many columns (expected %u)", (unsigned long)(i + 1), ncol); } switch(TYPEOF(sWhat)) { case LGLSXP: len = (int) (c - l); if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; int tr = StringTrue(num_buf), fa = StringFalse(num_buf); LOGICAL(res)[j] = (tr || fa) ? tr : NA_INTEGER; break; case INTSXP: len = (int) (c - l); /* watch for overflow and truncate -- should we warn? */ if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; INTEGER(res)[j] = Strtoi(num_buf, 10); break; case REALSXP: len = (int) (c - l); /* watch for overflow and truncate -- should we warn? */ if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; REAL(res)[j] = R_atof(num_buf); break; case CPLXSXP: len = (int) (c - l); /* watch for overflow and truncate -- should we warn? */ if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; COMPLEX(res)[j] = strtoc(num_buf, TRUE); break; case STRSXP: SET_STRING_ELT(res, j, Rf_mkCharLen(l, c - l)); break; case RAWSXP: len = (int) (c - l); /* watch for overflow and truncate -- should we warn? */ if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; RAW(res)[j] = strtoraw(num_buf); break; } l = c + 1; j += nrow; } /* fill up unused columns with NAs */ while (j < N) { switch (TYPEOF(sWhat)) { case LGLSXP: LOGICAL(res)[j] = NA_INTEGER; break; case INTSXP: INTEGER(res)[j] = NA_INTEGER; break; case REALSXP: REAL(res)[j] = NA_REAL; break; case CPLXSXP: COMPLEX(res)[j].r = NA_REAL; COMPLEX(res)[j].i = NA_REAL; break; case STRSXP: SET_STRING_ELT(res, j, R_NaString); break; case RAWSXP: RAW(res)[j] = (Rbyte) 0; break; } j += nrow; } } UNPROTECT(np); return res; }