extern "C" SEXP sourcetools_read(SEXP absolutePathSEXP) { const char* absolutePath = CHAR(STRING_ELT(absolutePathSEXP, 0)); std::string contents; bool result = sourcetools::read(absolutePath, &contents); if (!result) { Rf_warning("Failed to read file"); return R_NilValue; } sourcetools::r::Protect protect; SEXP resultSEXP = protect(Rf_allocVector(STRSXP, 1)); SET_STRING_ELT(resultSEXP, 0, Rf_mkCharLen(contents.c_str(), contents.size())); return resultSEXP; }
extern "C" SEXP sourcetools_read_lines(SEXP absolutePathSEXP) { const char* absolutePath = CHAR(STRING_ELT(absolutePathSEXP, 0)); std::vector<std::string> lines; bool result = sourcetools::read_lines(absolutePath, &lines); if (!result) { Rf_warning("Failed to read file"); return R_NilValue; } sourcetools::index_type n = lines.size(); sourcetools::r::Protect protect; SEXP resultSEXP = protect(Rf_allocVector(STRSXP, n)); for (sourcetools::index_type i = 0; i < n; ++i) SET_STRING_ELT(resultSEXP, i, Rf_mkCharLen(lines[i].c_str(), lines[i].size())); return resultSEXP; }
/** * Get substring * * * @param str character vector * @param from integer vector (possibly with negative indices) * @param to integer vector (possibly with negative indices) or NULL * @param length integer vector or NULL * @return character vector * * @version 0.1-?? (Bartek Tartanus) * stri_sub * * @version 0.1-?? (Marek Gagolewski) * use StriContainerUTF8 and stri__UChar32_to_UTF8_index * * @version 0.1-?? (Marek Gagolewski, 2013-06-01) * use StriContainerUTF8's UChar32-to-UTF8 index * * @version 0.1-?? (Marek Gagolewski, 2013-06-16) * make StriException-friendly * * @version 0.2-1 (Marek Gagolewski, 2014-03-20) * Use StriContainerUTF8_indexable * * @version 0.2-1 (Marek Gagolewski, 2014-04-03) * Use stri__sub_prepare_from_to_length() * * @version 0.3-1 (Marek Gagolewski, 2014-11-04) * Issue #112: str_prepare_arg* retvals were not PROTECTed from gc * * @version 0.5-9003 (Marek Gagolewski, 2015-08-05) * Bugfix #183: floating point exception when to or length is an empty vector */ SEXP stri_sub(SEXP str, SEXP from, SEXP to, SEXP length) { PROTECT(str = stri_prepare_arg_string(str, "str")); R_len_t str_len = LENGTH(str); R_len_t from_len = 0; R_len_t to_len = 0; R_len_t length_len = 0; int* from_tab = 0; int* to_tab = 0; int* length_tab = 0; R_len_t sub_protected = 1+ /* how many objects to PROTECT on ret? */ stri__sub_prepare_from_to_length(from, to, length, from_len, to_len, length_len, from_tab, to_tab, length_tab); R_len_t vectorize_len = stri__recycling_rule(true, 3, str_len, from_len, (to_len>length_len)?to_len:length_len); if (vectorize_len <= 0) { UNPROTECT(sub_protected); return Rf_allocVector(STRSXP, 0); } STRI__ERROR_HANDLER_BEGIN(sub_protected) StriContainerUTF8_indexable str_cont(str, vectorize_len); SEXP ret; STRI__PROTECT(ret = Rf_allocVector(STRSXP, vectorize_len)); for (R_len_t i = str_cont.vectorize_init(); i != str_cont.vectorize_end(); i = str_cont.vectorize_next(i)) { R_len_t cur_from = from_tab[i % from_len]; R_len_t cur_to = (to_tab)?to_tab[i % to_len]:length_tab[i % length_len]; if (str_cont.isNA(i) || cur_from == NA_INTEGER || cur_to == NA_INTEGER) { SET_STRING_ELT(ret, i, NA_STRING); continue; } if (length_tab) { if (cur_to <= 0) { SET_STRING_ELT(ret, i, R_BlankString); continue; } cur_to = cur_from + cur_to - 1; if (cur_from < 0 && cur_to >= 0) cur_to = -1; } const char* str_cur_s = str_cont.get(i).c_str(); R_len_t cur_from2; // UTF-8 byte indices R_len_t cur_to2; // UTF-8 byte indices stri__sub_get_indices(str_cont, i, cur_from, cur_to, cur_from2, cur_to2); if (cur_to2 > cur_from2) { // just copy SET_STRING_ELT(ret, i, Rf_mkCharLenCE(str_cur_s+cur_from2, cur_to2-cur_from2, CE_UTF8)); } else { // maybe a warning here? SET_STRING_ELT(ret, i, Rf_mkCharLen(NULL, 0)); } } STRI__UNPROTECT_ALL return ret; STRI__ERROR_HANDLER_END(;/* nothing special to be done on error */) }
SEXP df_split(SEXP s, SEXP sSep, SEXP sNamesSep, SEXP sResilient, SEXP sNcol, SEXP sWhat, SEXP sColNames, SEXP sSkip, SEXP sNlines, SEXP sQuote) { char sep; int nsep, use_ncol, resilient, ncol; long i, j, k, m, len, nmsep_flag, skip, quoteLen; unsigned long nrow; char num_buf[48]; const char *c, *c2, *sraw = 0, *send = 0, *quoteChars; long nlines = asLong(sNlines, -1); SEXP sOutput, tmp, sOutputNames, st, clv; /* Parse inputs */ sep = CHAR(STRING_ELT(sSep, 0))[0]; nsep = (TYPEOF(sNamesSep) == STRSXP && LENGTH(sNamesSep) > 0) ? ((int) (unsigned char) *CHAR(STRING_ELT(sNamesSep, 0))) : -1; nmsep_flag = (nsep > 0); use_ncol = asInteger(sNcol); resilient = asInteger(sResilient); ncol = use_ncol; /* NOTE: "character" is prepended by the R code if nmsep is TRUE, so ncol *does* include the key column */ skip = asLong(sSkip, 0); /* parse quote information */ quoteChars = CHAR(STRING_ELT(sQuote, 0)); quoteLen = strlen(quoteChars); /* count non-NA columns */ for (i = 0; i < use_ncol; i++) if (TYPEOF(VECTOR_ELT(sWhat,i)) == NILSXP) ncol--; /* check input */ if (TYPEOF(s) == RAWSXP) { nrow = (nlines >= 0) ? count_lines_bounded(s, nlines + skip) : count_lines(s); sraw = (const char*) RAW(s); send = sraw + XLENGTH(s); if (nrow >= skip) { unsigned long slen = XLENGTH(s); nrow = nrow - skip; i = 0; while (i < skip && (sraw = memchr(sraw, '\n', slen))) { sraw++; i++; } } else { nrow = 0; sraw = send; } } else if (TYPEOF(s) == STRSXP) { nrow = XLENGTH(s); if (nrow >= skip) { nrow -= skip; } else { skip = nrow; nrow = 0; } } else Rf_error("invalid input to split - must be a raw or character vector"); if (nlines >= 0 && nrow > nlines) nrow = nlines; /* allocate result */ PROTECT(sOutput = allocVector(VECSXP, ncol)); /* set names */ setAttrib(sOutput, R_NamesSymbol, sOutputNames = allocVector(STRSXP, ncol)); if (nrow > INT_MAX) Rf_warning("R currently doesn't support large data frames, but we have %lu rows, returning a named list instead", nrow); else { /* set automatic row names */ PROTECT(tmp = allocVector(INTSXP, 2)); INTEGER(tmp)[0] = NA_INTEGER; INTEGER(tmp)[1] = -nrow; setAttrib(sOutput, R_RowNamesSymbol, tmp); UNPROTECT(1); /* set class */ classgets(sOutput, mkString("data.frame")); } /* Create SEXP for each element of the output */ j = 0; for (i = 0; i < use_ncol; i++) { if (TYPEOF(VECTOR_ELT(sWhat,i)) != NILSXP) /* copy col.name */ SET_STRING_ELT(sOutputNames, j, STRING_ELT(sColNames, i)); switch (TYPEOF(VECTOR_ELT(sWhat,i))) { case LGLSXP: case INTSXP: case REALSXP: case CPLXSXP: case STRSXP: case RAWSXP: SET_VECTOR_ELT(sOutput, j++, allocVector(TYPEOF(VECTOR_ELT(sWhat,i)), nrow)); break; case VECSXP: SET_VECTOR_ELT(sOutput, j++, st = allocVector(REALSXP, nrow)); clv = PROTECT(allocVector(STRSXP, 2)); SET_STRING_ELT(clv, 0, mkChar("POSIXct")); SET_STRING_ELT(clv, 1, mkChar("POSIXt")); setAttrib(st, R_ClassSymbol, clv); /* this is somewhat a security precaution such that users don't get surprised -- if there is no TZ R will render it in local time - which is correct but may confuse people that didn't use GMT to start with */ setAttrib(st, install("tzone"), mkString("GMT")); UNPROTECT(1); break; case NILSXP: break; default: Rf_error("Unsupported input to what %u.", TYPEOF(VECTOR_ELT(sWhat,i))); break; } } /* Cycle through the rows and extract the data */ for (k = 0; k < nrow; k++) { const char *l = 0, *le; if (TYPEOF(s) == RAWSXP) { l = sraw; le = memchr(l, '\n', send - l); if (!le) le = send; sraw = le + 1; if (*(le - 1) == '\r' ) le--; /* account for DOS-style '\r\n' */ } else { l = CHAR(STRING_ELT(s, k + skip)); le = l + strlen(l); /* probably lame, but using strings is way inefficient anyway ;) */ } if (nmsep_flag) { c = memchr(l, nsep, le - l); if (c) { SET_STRING_ELT(VECTOR_ELT(sOutput, 0), k, Rf_mkCharLen(l, c - l)); l = c + 1; } else SET_STRING_ELT(VECTOR_ELT(sOutput, 0), k, R_BlankString); } i = nmsep_flag; j = nmsep_flag; while (l < le) { if (!(c = memchr(l, sep, le - l))) c = le; if (i >= use_ncol) { if (resilient) break; Rf_error("line %lu: too many input columns (expected %u)", k, use_ncol); } switch(TYPEOF(VECTOR_ELT(sWhat,i))) { // NOTE: no matching case for NILSXP case LGLSXP: len = (int) (c - l); if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; int tr = StringTrue(num_buf), fa = StringFalse(num_buf); LOGICAL(VECTOR_ELT(sOutput, j))[k] = (tr || fa) ? tr : NA_INTEGER; j++; break; case INTSXP: len = (int) (c - l); /* watch for overflow and truncate -- should we warn? */ if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; INTEGER(VECTOR_ELT(sOutput, j))[k] = Strtoi(num_buf, 10); j++; break; case REALSXP: len = (int) (c - l); /* watch for overflow and truncate -- should we warn? */ if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; REAL(VECTOR_ELT(sOutput, j))[k] = R_atof(num_buf); j++; break; case CPLXSXP: len = (int) (c - l); /* watch for overflow and truncate -- should we warn? */ if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; COMPLEX(VECTOR_ELT(sOutput, j))[k] = strtoc(num_buf, TRUE); j++; break; case STRSXP: c2 = c; if (quoteLen) { for (m = 0; m < quoteLen; m++) { if (*l == quoteChars[m]) { l++; if (!(c2 = memchr(l, quoteChars[m], le - l))) { Rf_error("End of line within quoted string."); } else { if (!(c = memchr(c2, (unsigned char) sep, le - c2))) c = le; } } } } SET_STRING_ELT(VECTOR_ELT(sOutput, j), k, Rf_mkCharLen(l, c2 - l)); j++; break; case RAWSXP: len = (int) (c - l); /* watch for overflow and truncate -- should we warn? */ if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; RAW(VECTOR_ELT(sOutput, j))[k] = strtoraw(num_buf); j++; break; case VECSXP: REAL(VECTOR_ELT(sOutput, j))[k] = parse_ts(l, c); j++; } l = c + 1; i++; } /* fill-up unused columns */ while (i < use_ncol) { switch (TYPEOF(VECTOR_ELT(sWhat,i))) { // NOTE: no matching case for NILSXP case LGLSXP: LOGICAL(VECTOR_ELT(sOutput, j++))[k] = NA_INTEGER; break; case INTSXP: INTEGER(VECTOR_ELT(sOutput, j++))[k] = NA_INTEGER; break; case REALSXP: case VECSXP: REAL(VECTOR_ELT(sOutput, j++))[k] = NA_REAL; break; case CPLXSXP: COMPLEX(VECTOR_ELT(sOutput, j))[k].r = NA_REAL; COMPLEX(VECTOR_ELT(sOutput, j++))[k].i = NA_REAL; break; case STRSXP: SET_STRING_ELT(VECTOR_ELT(sOutput, j++), k, R_NaString); break; case RAWSXP: RAW(VECTOR_ELT(sOutput, j))[k] = (Rbyte) 0; break; } i++; } } UNPROTECT(1); /* sOutput */ return(sOutput); }
SEXP mat_split(SEXP s, SEXP sSep, SEXP sNamesSep, SEXP sResilient, SEXP sNcol, SEXP sWhat, SEXP sSkip, SEXP sNlines, SEXP sQuote) { unsigned int ncol = 1, np = 0, resilient = asInteger(sResilient); unsigned long nrow, i, k, N, len, quoteLen; int use_ncol = asInteger(sNcol); int nsep = -1; long skip = asLong(sSkip, 0); long nlines = asLong(sNlines, -1); SEXP res, rnam; char sep; char num_buf[48]; /* sraw/send is only used for raw vector parsing, but we have to set it to 0 to make gcc happy which cannot figure out that it's actually unused */ const char *c, *c2, *sraw = 0, *send = 0, *l, *le, *quoteChars; /* parse sep input */ if (TYPEOF(sNamesSep) == STRSXP && LENGTH(sNamesSep) > 0) nsep = (int) (unsigned char) *CHAR(STRING_ELT(sNamesSep, 0)); if (TYPEOF(sSep) != STRSXP || LENGTH(sSep) < 1) Rf_error("invalid separator"); sep = CHAR(STRING_ELT(sSep, 0))[0]; /* parse quote information */ quoteChars = CHAR(STRING_ELT(sQuote, 0)); quoteLen = strlen(quoteChars); /* check the input data */ if (TYPEOF(s) == RAWSXP) { nrow = (nlines >= 0) ? count_lines_bounded(s, nlines + skip) : count_lines(s); sraw = (const char*) RAW(s); send = sraw + XLENGTH(s); if (nrow >= skip) { unsigned long slen = XLENGTH(s); nrow = nrow - skip; i = 0; while (i < skip && (sraw = memchr(sraw, '\n', slen))) { sraw++; i++; } } else { nrow = 0; sraw = send; } } else if (TYPEOF(s) == STRSXP) { nrow = LENGTH(s); if (nrow >= skip) { nrow -= skip; } else { skip = nrow; nrow = 0; } } else { Rf_error("invalid input to split - must be a raw or character vector"); } if (nlines >= 0 && nrow > nlines) nrow = nlines; /* If no rows left, return an empty matrix */ if (!nrow) { if (np) UNPROTECT(np); return allocMatrix(TYPEOF(sWhat), 0, 0); } /* count number of columns */ if (use_ncol < 1) { if (TYPEOF(s) == RAWSXP) { c = sraw; le = memchr(sraw, '\n', send - sraw); if (TYPEOF(sWhat) == STRSXP && quoteLen) { ncol = 0; while(1) { ncol++; for (k = 0; k < quoteLen; k++) { if (*c == quoteChars[k]) { c++; if (! (c = memchr(c, quoteChars[k], le - c))) Rf_error("End of line within quote string on line 1; cannot determine num columns!"); break; /* note: breaks inner 'for' loop, not 'if' statement */ } } if (!(c = (memchr(c, (unsigned char) sep, le - c)))) break; c++; } } else { ncol = 1; while ((c = memchr(c, (unsigned char) sep, le - c))) { ncol++; c++; } } } else { c = CHAR(STRING_ELT(s, 0)); if (TYPEOF(sWhat) == STRSXP && quoteLen) { ncol = 0; while(1) { ncol++; for (k = 0; k < quoteLen; k++) { if (*c == quoteChars[k]) { c++; if (!(c = strchr(c, quoteChars[k]))) Rf_error("End of line within quote string on line 1; cannot determine num columns!"); break; /* note: breaks inner 'for' loop, not 'if' statement */ } } if (!(c = (strchr(c, sep)))) break; c++; } } else { while ((c = strchr(c, sep))) { ncol++; c++; } } } /* if sep and nsep are the same then the first "column" is the key and not the column */ if (nsep == (int) (unsigned char) sep) ncol--; } else ncol = use_ncol; /* allocate space for the result */ N = ncol * nrow; switch(TYPEOF(sWhat)) { case LGLSXP: case INTSXP: case REALSXP: case CPLXSXP: case STRSXP: case RAWSXP: res = PROTECT(allocMatrix(TYPEOF(sWhat), nrow, ncol)); break; default: Rf_error("Unsupported input to what."); break; } if (nsep >= 0) { SEXP dn; setAttrib(res, R_DimNamesSymbol, (dn = allocVector(VECSXP, 2))); SET_VECTOR_ELT(dn, 0, (rnam = allocVector(STRSXP, nrow))); } np++; /* cycle over the rows and parse the data */ for (i = 0; i < nrow; i++) { int j = i; /* find the row of data */ if (TYPEOF(s) == RAWSXP) { l = sraw; le = memchr(l, '\n', send - l); if (!le) le = send; sraw = le + 1; if (*(le - 1) == '\r' ) le--; /* account for DOS-style '\r\n' */ } else { l = CHAR(STRING_ELT(s, i + skip)); le = l + strlen(l); } /* if nsep, load rowname */ if (nsep >= 0) { c = memchr(l, nsep, le - l); if (c) { SET_STRING_ELT(rnam, i, Rf_mkCharLen(l, c - l)); l = c + 1; } else SET_STRING_ELT(rnam, i, R_BlankString); } /* now split the row into elements */ while (l < le) { if (!(c = memchr(l, sep, le - l))) c = le; if (j >= N) { if (resilient) break; Rf_error("line %lu: too many columns (expected %u)", (unsigned long)(i + 1), ncol); } switch(TYPEOF(sWhat)) { case LGLSXP: len = (int) (c - l); if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; int tr = StringTrue(num_buf), fa = StringFalse(num_buf); LOGICAL(res)[j] = (tr || fa) ? tr : NA_INTEGER; break; case INTSXP: len = (int) (c - l); /* watch for overflow and truncate -- should we warn? */ if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; INTEGER(res)[j] = Strtoi(num_buf, 10); break; case REALSXP: len = (int) (c - l); /* watch for overflow and truncate -- should we warn? */ if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; REAL(res)[j] = R_atof(num_buf); break; case CPLXSXP: len = (int) (c - l); /* watch for overflow and truncate -- should we warn? */ if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; COMPLEX(res)[j] = strtoc(num_buf, TRUE); break; case STRSXP: c2 = c; if (quoteLen) { for (k = 0; k < quoteLen; k++) { if (*l == quoteChars[k]) { l++; if (!(c2 = memchr(l, quoteChars[k], le - l))) { Rf_warning("End of line within quoted string!"); c = c2 = le; } else { if (!(c = memchr(c2, (unsigned char) sep, le - c2))) c = le; } } } } SET_STRING_ELT(res, j, Rf_mkCharLen(l, c2 - l)); break; case RAWSXP: len = (int) (c - l); /* watch for overflow and truncate -- should we warn? */ if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; RAW(res)[j] = strtoraw(num_buf); break; } l = c + 1; j += nrow; } /* fill up unused columns with NAs */ while (j < N) { switch (TYPEOF(sWhat)) { case LGLSXP: LOGICAL(res)[j] = NA_INTEGER; break; case INTSXP: INTEGER(res)[j] = NA_INTEGER; break; case REALSXP: REAL(res)[j] = NA_REAL; break; case CPLXSXP: COMPLEX(res)[j].r = NA_REAL; COMPLEX(res)[j].i = NA_REAL; break; case STRSXP: SET_STRING_ELT(res, j, R_NaString); break; case RAWSXP: RAW(res)[j] = (Rbyte) 0; break; } j += nrow; } } UNPROTECT(np); return res; }
SEXP mat_split(SEXP s, SEXP sSep, SEXP sNamesSep, SEXP sResilient, SEXP sNcol, SEXP sWhat, SEXP sSkip, SEXP sNlines) { unsigned int ncol = 1, nrow, np = 0, i, N, resilient = asInteger(sResilient); int use_ncol = asInteger(sNcol); int nsep = -1; int skip = INTEGER(sSkip)[0]; int nlines = INTEGER(sNlines)[0]; int len; SEXP res, rnam, zerochar = 0; char sep; char num_buf[48]; double * res_ptr; const char *c, *sraw, *send, *l, *le;; /* parse sep input */ if (TYPEOF(sNamesSep) == STRSXP && LENGTH(sNamesSep) > 0) nsep = (int) (unsigned char) *CHAR(STRING_ELT(sNamesSep, 0)); if (TYPEOF(sSep) != STRSXP || LENGTH(sSep) < 1) Rf_error("invalid separator"); sep = CHAR(STRING_ELT(sSep, 0))[0]; /* check the input data */ if (TYPEOF(s) == RAWSXP) { nrow = (nlines >= 0) ? count_lines_bounded(s, nlines + skip) : count_lines(s); sraw = (const char*) RAW(s); send = sraw + XLENGTH(s); if (nrow >= skip) { nrow = nrow - skip; for (i = 0; i < skip; i++) sraw = memchr(sraw,'\n',XLENGTH(s)) + 1; } else { nrow = 0; sraw = send; } } else if (TYPEOF(s) == STRSXP) { nrow = LENGTH(s); if (nrow >= skip) { nrow -= skip; } else { skip = nrow; nrow = 0; } } else { Rf_error("invalid input to split - must be a raw or character vector"); } if (nlines >= 0 && nrow > nlines) nrow = nlines; /* If no rows left, return an empty matrix */ if (!nrow) { if (np) UNPROTECT(np); return allocMatrix(TYPEOF(sWhat), 0, 0); } /* count number of columns */ if (use_ncol < 1) { if (TYPEOF(s) == RAWSXP) { ncol = 1; c = sraw; le = memchr(sraw, '\n', send - sraw); while ((c = memchr(c, (unsigned char) sep, le - c))) { ncol++; c++; } } else { c = CHAR(STRING_ELT(s, 0)); while ((c = strchr(c, sep))) { ncol++; c++; } /* if sep and nsep are the same then the first "column" is the key and not the column */ if (nsep == (int) (unsigned char) sep) ncol--; } } else ncol = use_ncol; /* allocate space for the result */ N = ncol * nrow; switch(TYPEOF(sWhat)) { case LGLSXP: case INTSXP: case REALSXP: case CPLXSXP: case STRSXP: case RAWSXP: res = PROTECT(allocMatrix(TYPEOF(sWhat), nrow, ncol)); break; default: Rf_error("Unsupported input to what."); break; } if (nsep >= 0) { SEXP dn; setAttrib(res, R_DimNamesSymbol, (dn = allocVector(VECSXP, 2))); SET_VECTOR_ELT(dn, 0, (rnam = allocVector(STRSXP, nrow))); } np++; /* cycle over the rows and parse the data */ for (i = 0; i < nrow; i++) { int j = i; /* find the row of data */ if (TYPEOF(s) == RAWSXP) { l = sraw; le = memchr(l, '\n', send - l); if (!le) le = send; sraw = le + 1; } else { l = CHAR(STRING_ELT(s, i + skip)); le = l + strlen(l); } /* if nsep, load rowname */ if (nsep >= 0) { c = memchr(l, nsep, le - l); if (c) { SET_STRING_ELT(rnam, i, Rf_mkCharLen(l, c - l)); l = c + 1; } else SET_STRING_ELT(rnam, i, R_BlankString); } /* now split the row into elements */ while (l < le) { if (!(c = memchr(l, sep, le - l))) c = le; if (j >= N) { if (resilient) break; Rf_error("line %lu: too many columns (expected %u)", (unsigned long)(i + 1), ncol); } switch(TYPEOF(sWhat)) { case LGLSXP: len = (int) (c - l); if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; int tr = StringTrue(num_buf), fa = StringFalse(num_buf); LOGICAL(res)[j] = (tr || fa) ? tr : NA_INTEGER; break; case INTSXP: len = (int) (c - l); /* watch for overflow and truncate -- should we warn? */ if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; INTEGER(res)[j] = Strtoi(num_buf, 10); break; case REALSXP: len = (int) (c - l); /* watch for overflow and truncate -- should we warn? */ if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; REAL(res)[j] = R_atof(num_buf); break; case CPLXSXP: len = (int) (c - l); /* watch for overflow and truncate -- should we warn? */ if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; COMPLEX(res)[j] = strtoc(num_buf, TRUE); break; case STRSXP: SET_STRING_ELT(res, j, Rf_mkCharLen(l, c - l)); break; case RAWSXP: len = (int) (c - l); /* watch for overflow and truncate -- should we warn? */ if (len > sizeof(num_buf) - 1) len = sizeof(num_buf) - 1; memcpy(num_buf, l, len); num_buf[len] = 0; RAW(res)[j] = strtoraw(num_buf); break; } l = c + 1; j += nrow; } /* fill up unused columns with NAs */ while (j < N) { switch (TYPEOF(sWhat)) { case LGLSXP: LOGICAL(res)[j] = NA_INTEGER; break; case INTSXP: INTEGER(res)[j] = NA_INTEGER; break; case REALSXP: REAL(res)[j] = NA_REAL; break; case CPLXSXP: COMPLEX(res)[j].r = NA_REAL; COMPLEX(res)[j].i = NA_REAL; break; case STRSXP: SET_STRING_ELT(res, j, R_NaString); break; case RAWSXP: RAW(res)[j] = (Rbyte) 0; break; } j += nrow; } } UNPROTECT(np); return res; }