/* "do_parse" - the user interface input/output to files. The internal R_Parse.. functions are defined in ./gram.y (-> gram.c) .Internal( parse(file, n, text, prompt, srcfile, encoding) ) If there is text then that is read and the other arguments are ignored. */ SEXP attribute_hidden do_parse(SEXP call, SEXP op, SEXP args, SEXP env) { SEXP text, prompt, s, source; Rconnection con; Rboolean wasopen, old_latin1 = known_to_be_latin1, old_utf8 = known_to_be_utf8, allKnown = TRUE; int ifile, num, i; const char *encoding; ParseStatus status; checkArity(op, args); R_ParseError = 0; R_ParseErrorMsg[0] = '\0'; ifile = asInteger(CAR(args)); args = CDR(args); con = getConnection(ifile); wasopen = con->isopen; num = asInteger(CAR(args)); args = CDR(args); if (num == 0) return(allocVector(EXPRSXP, 0)); PROTECT(text = coerceVector(CAR(args), STRSXP)); if(length(CAR(args)) && !length(text)) errorcall(call, _("coercion of 'text' to character was unsuccessful")); args = CDR(args); prompt = CAR(args); args = CDR(args); source = CAR(args); args = CDR(args); if(!isString(CAR(args)) || LENGTH(CAR(args)) != 1) error(_("invalid '%s' value"), "encoding"); encoding = CHAR(STRING_ELT(CAR(args), 0)); /* ASCII */ known_to_be_latin1 = known_to_be_utf8 = FALSE; /* allow 'encoding' to override declaration on 'text'. */ if(streql(encoding, "latin1")) { known_to_be_latin1 = TRUE; allKnown = FALSE; } else if(streql(encoding, "UTF-8")) { known_to_be_utf8 = TRUE; allKnown = FALSE; } else if(!streql(encoding, "unknown") && !streql(encoding, "native.enc")) warning(_("argument '%s = \"%s\"' will be ignored"), "encoding", encoding); if (prompt == R_NilValue) PROTECT(prompt); else PROTECT(prompt = coerceVector(prompt, STRSXP)); if (length(text) > 0) { /* If 'text' has known encoding then we can be sure it will be correctly re-encoded to the current encoding by translateChar in the parser and so could mark the result in a Latin-1 or UTF-8 locale. A small complication is that different elements could have different encodings, but all that matters is that all non-ASCII elements have known encoding. */ for(i = 0; i < length(text); i++) if(!ENC_KNOWN(STRING_ELT(text, i)) && !IS_ASCII(STRING_ELT(text, i))) { allKnown = FALSE; break; } if(allKnown) { known_to_be_latin1 = old_latin1; known_to_be_utf8 = old_utf8; } if (num == NA_INTEGER) num = -1; s = R_ParseVector(text, num, &status, source); if (status != PARSE_OK) parseError(call, R_ParseError); } else if (ifile >= 3) {/* file != "" */ if (num == NA_INTEGER) num = -1; try { if(!wasopen && !con->open(con)) error(_("cannot open the connection")); if(!con->canread) error(_("cannot read from this connection")); s = R_ParseConn(con, num, &status, source); if(!wasopen) con->close(con); } catch (...) { if (!wasopen && con->isopen) con->close(con); throw; } if (status != PARSE_OK) parseError(call, R_ParseError); } else { if (num == NA_INTEGER) num = 1; s = R_ParseBuffer(&R_ConsoleIob, num, &status, prompt, source); if (status != PARSE_OK) parseError(call, R_ParseError); } UNPROTECT(2); known_to_be_latin1 = old_latin1; known_to_be_utf8 = old_utf8; return s; }
SEXP attribute_hidden do_readDCF(SEXP call, SEXP op, SEXP args, SEXP env) { int nwhat, nret, nc, nr, m, k, lastm, need; Rboolean blank_skip, field_skip = FALSE; int whatlen, dynwhat, buflen = 8096; // was 100, but that re-alloced often char *line, *buf; regex_t blankline, contline, trailblank, regline, eblankline; regmatch_t regmatch[1]; SEXP file, what, what2, retval, retval2, dims, dimnames; Rconnection con = NULL; Rboolean wasopen, is_eblankline; RCNTXT cntxt; SEXP fold_excludes; Rboolean field_fold = TRUE, has_fold_excludes; const char *field_name; int offset = 0; /* -Wall */ checkArity(op, args); file = CAR(args); con = getConnection(asInteger(file)); wasopen = con->isopen; if(!wasopen) { if(!con->open(con)) error(_("cannot open the connection")); /* Set up a context which will close the connection on error */ begincontext(&cntxt, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv, R_NilValue, R_NilValue); cntxt.cend = &con_cleanup; cntxt.cenddata = con; } if(!con->canread) error(_("cannot read from this connection")); args = CDR(args); PROTECT(what = coerceVector(CAR(args), STRSXP)); /* argument fields */ nwhat = LENGTH(what); dynwhat = (nwhat == 0); args = CDR(args); PROTECT(fold_excludes = coerceVector(CAR(args), STRSXP)); has_fold_excludes = (LENGTH(fold_excludes) > 0); buf = (char *) malloc(buflen); if(!buf) error(_("could not allocate memory for 'read.dcf'")); nret = 20; /* it is easier if we first have a record per column */ PROTECT(retval = allocMatrixNA(STRSXP, LENGTH(what), nret)); /* These used to use [:blank:] but that can match \xa0 as part of a UTF-8 character (and is nbspace on Windows). */ tre_regcomp(&blankline, "^[[:blank:]]*$", REG_NOSUB & REG_EXTENDED); tre_regcomp(&trailblank, "[ \t]+$", REG_EXTENDED); tre_regcomp(&contline, "^[[:blank:]]+", REG_EXTENDED); tre_regcomp(®line, "^[^:]+:[[:blank:]]*", REG_EXTENDED); tre_regcomp(&eblankline, "^[[:space:]]+\\.[[:space:]]*$", REG_EXTENDED); k = 0; lastm = -1; /* index of the field currently being recorded */ blank_skip = TRUE; void *vmax = vmaxget(); while((line = Rconn_getline2(con))) { if(strlen(line) == 0 || tre_regexecb(&blankline, line, 0, 0, 0) == 0) { /* A blank line. The first one after a record ends a new * record, subsequent ones are skipped */ if(!blank_skip) { k++; if(k > nret - 1){ nret *= 2; PROTECT(retval2 = allocMatrixNA(STRSXP, LENGTH(what), nret)); transferVector(retval2, retval); UNPROTECT_PTR(retval); retval = retval2; } blank_skip = TRUE; lastm = -1; field_skip = FALSE; field_fold = TRUE; } } else { blank_skip = FALSE; if(tre_regexecb(&contline, line, 1, regmatch, 0) == 0) { /* A continuation line: wrong if at the beginning of a record. */ if((lastm == -1) && !field_skip) { line[20] = '\0'; error(_("Found continuation line starting '%s ...' at begin of record."), line); } if(lastm >= 0) { need = (int) strlen(CHAR(STRING_ELT(retval, lastm + nwhat * k))) + 2; if(tre_regexecb(&eblankline, line, 0, NULL, 0) == 0) { is_eblankline = TRUE; } else { is_eblankline = FALSE; if(field_fold) { offset = regmatch[0].rm_eo; /* Also remove trailing whitespace. */ if((tre_regexecb(&trailblank, line, 1, regmatch, 0) == 0)) line[regmatch[0].rm_so] = '\0'; } else { offset = 0; } need += (int) strlen(line + offset); } if(buflen < need) { char *tmp = (char *) realloc(buf, need); if(!tmp) { free(buf); error(_("could not allocate memory for 'read.dcf'")); } else buf = tmp; buflen = need; } strcpy(buf,CHAR(STRING_ELT(retval, lastm + nwhat * k))); strcat(buf, "\n"); if(!is_eblankline) strcat(buf, line + offset); SET_STRING_ELT(retval, lastm + nwhat * k, mkChar(buf)); } } else { if(tre_regexecb(®line, line, 1, regmatch, 0) == 0) { for(m = 0; m < nwhat; m++){ whatlen = (int) strlen(CHAR(STRING_ELT(what, m))); if(strlen(line) > whatlen && line[whatlen] == ':' && strncmp(CHAR(STRING_ELT(what, m)), line, whatlen) == 0) { /* An already known field we are recording. */ lastm = m; field_skip = FALSE; field_name = CHAR(STRING_ELT(what, lastm)); if(has_fold_excludes) { field_fold = field_is_foldable_p(field_name, fold_excludes); } if(field_fold) { offset = regmatch[0].rm_eo; /* Also remove trailing whitespace. */ if((tre_regexecb(&trailblank, line, 1, regmatch, 0) == 0)) line[regmatch[0].rm_so] = '\0'; } else { offset = 0; } SET_STRING_ELT(retval, m + nwhat * k, mkChar(line + offset)); break; } else { /* This is a field, but not one prespecified */ lastm = -1; field_skip = TRUE; } } if(dynwhat && (lastm == -1)) { /* A previously unseen field and we are * recording all fields */ field_skip = FALSE; PROTECT(what2 = allocVector(STRSXP, nwhat+1)); PROTECT(retval2 = allocMatrixNA(STRSXP, nrows(retval)+1, ncols(retval))); if(nwhat > 0) { copyVector(what2, what); for(nr = 0; nr < nrows(retval); nr++){ for(nc = 0; nc < ncols(retval); nc++){ SET_STRING_ELT(retval2, nr+nc*nrows(retval2), STRING_ELT(retval, nr+nc*nrows(retval))); } } } UNPROTECT_PTR(retval); UNPROTECT_PTR(what); retval = retval2; what = what2; /* Make sure enough space was used */ need = (int) (Rf_strchr(line, ':') - line + 1); if(buflen < need){ char *tmp = (char *) realloc(buf, need); if(!tmp) { free(buf); error(_("could not allocate memory for 'read.dcf'")); } else buf = tmp; buflen = need; } strncpy(buf, line, Rf_strchr(line, ':') - line); buf[Rf_strchr(line, ':') - line] = '\0'; SET_STRING_ELT(what, nwhat, mkChar(buf)); nwhat++; /* lastm uses C indexing, hence nwhat - 1 */ lastm = nwhat - 1; field_name = CHAR(STRING_ELT(what, lastm)); if(has_fold_excludes) { field_fold = field_is_foldable_p(field_name, fold_excludes); } offset = regmatch[0].rm_eo; if(field_fold) { /* Also remove trailing whitespace. */ if((tre_regexecb(&trailblank, line, 1, regmatch, 0) == 0)) line[regmatch[0].rm_so] = '\0'; } SET_STRING_ELT(retval, lastm + nwhat * k, mkChar(line + offset)); } } else { /* Must be a regular line with no tag ... */ line[20] = '\0'; error(_("Line starting '%s ...' is malformed!"), line); } } } } vmaxset(vmax); if(!wasopen) {endcontext(&cntxt); con->close(con);} free(buf); tre_regfree(&blankline); tre_regfree(&contline); tre_regfree(&trailblank); tre_regfree(®line); tre_regfree(&eblankline); if(!blank_skip) k++; /* and now transpose the whole matrix */ PROTECT(retval2 = allocMatrixNA(STRSXP, k, LENGTH(what))); copyMatrix(retval2, retval, 1); PROTECT(dimnames = allocVector(VECSXP, 2)); PROTECT(dims = allocVector(INTSXP, 2)); INTEGER(dims)[0] = k; INTEGER(dims)[1] = LENGTH(what); SET_VECTOR_ELT(dimnames, 1, what); setAttrib(retval2, R_DimSymbol, dims); setAttrib(retval2, R_DimNamesSymbol, dimnames); UNPROTECT(6); return(retval2); }
SEXP writetable(SEXP call, SEXP op, SEXP args, SEXP env) { SEXP x, sep, rnames, eol, na, dec, quote, xj; int nr, nc, i, j, qmethod; Rboolean wasopen, quote_rn = FALSE, *quote_col; Rconnection con; const char *csep, *ceol, *cna, *sdec, *tmp=NULL /* -Wall */; char cdec; SEXP *levels; R_StringBuffer strBuf = {NULL, 0, MAXELTSIZE}; wt_info wi; RCNTXT cntxt; args = CDR(args); x = CAR(args); args = CDR(args); /* this is going to be a connection open or openable for writing */ if(!inherits(CAR(args), "connection")) error(_("'file' is not a connection")); con = getConnection(asInteger(CAR(args))); args = CDR(args); if(!con->canwrite) error(_("cannot write to this connection")); wasopen = con->isopen; if(!wasopen) { strcpy(con->mode, "wt"); if(!con->open(con)) error(_("cannot open the connection")); } nr = asInteger(CAR(args)); args = CDR(args); nc = asInteger(CAR(args)); args = CDR(args); rnames = CAR(args); args = CDR(args); sep = CAR(args); args = CDR(args); eol = CAR(args); args = CDR(args); na = CAR(args); args = CDR(args); dec = CAR(args); args = CDR(args); quote = CAR(args); args = CDR(args); qmethod = asLogical(CAR(args)); if(nr == NA_INTEGER) error(_("invalid '%s' argument"), "nr"); if(nc == NA_INTEGER) error(_("invalid '%s' argument"), "nc"); if(!isNull(rnames) && !isString(rnames)) error(_("invalid '%s' argument"), "rnames"); if(!isString(sep)) error(_("invalid '%s' argument"), "sep"); if(!isString(eol)) error(_("invalid '%s' argument"), "eol"); if(!isString(na)) error(_("invalid '%s' argument"), "na"); if(!isString(dec)) error(_("invalid '%s' argument"), "dec"); if(qmethod == NA_LOGICAL) error(_("invalid '%s' argument"), "qmethod"); csep = translateChar(STRING_ELT(sep, 0)); ceol = translateChar(STRING_ELT(eol, 0)); cna = translateChar(STRING_ELT(na, 0)); sdec = translateChar(STRING_ELT(dec, 0)); if(strlen(sdec) != 1) error(_("'dec' must be a single character")); cdec = sdec[0]; quote_col = (Rboolean *) R_alloc(nc, sizeof(Rboolean)); for(j = 0; j < nc; j++) quote_col[j] = FALSE; for(i = 0; i < length(quote); i++) { /* NB, quote might be NULL */ int this = INTEGER(quote)[i]; if(this == 0) quote_rn = TRUE; if(this > 0) quote_col[this - 1] = TRUE; } R_AllocStringBuffer(0, &strBuf); PrintDefaults(); wi.savedigits = R_print.digits; R_print.digits = DBL_DIG;/* MAX precision */ wi.con = con; wi.wasopen = wasopen; wi.buf = &strBuf; begincontext(&cntxt, CTXT_CCODE, call, R_BaseEnv, R_BaseEnv, R_NilValue, R_NilValue); cntxt.cend = &wt_cleanup; cntxt.cenddata = &wi; if(isVectorList(x)) { /* A data frame */ /* handle factors internally, check integrity */ levels = (SEXP *) R_alloc(nc, sizeof(SEXP)); for(j = 0; j < nc; j++) { xj = VECTOR_ELT(x, j); if(LENGTH(xj) != nr) error(_("corrupt data frame -- length of column %d does not not match nrows"), j+1); if(inherits(xj, "factor")) { levels[j] = getAttrib(xj, R_LevelsSymbol); } else levels[j] = R_NilValue; } for(i = 0; i < nr; i++) { if(i % 1000 == 999) R_CheckUserInterrupt(); if(!isNull(rnames)) Rconn_printf(con, "%s%s", EncodeElement2(rnames, i, quote_rn, qmethod, &strBuf, cdec), csep); for(j = 0; j < nc; j++) { xj = VECTOR_ELT(x, j); if(j > 0) Rconn_printf(con, "%s", csep); if(isna(xj, i)) tmp = cna; else { if(!isNull(levels[j])) { /* We do not assume factors have integer levels, although they should. */ if(TYPEOF(xj) == INTSXP) tmp = EncodeElement2(levels[j], INTEGER(xj)[i] - 1, quote_col[j], qmethod, &strBuf, cdec); else if(TYPEOF(xj) == REALSXP) tmp = EncodeElement2(levels[j], (int) (REAL(xj)[i] - 1), quote_col[j], qmethod, &strBuf, cdec); else error("column %s claims to be a factor but does not have numeric codes", j+1); } else { tmp = EncodeElement2(xj, i, quote_col[j], qmethod, &strBuf, cdec); } /* if(cdec) change_dec(tmp, cdec, TYPEOF(xj)); */ } Rconn_printf(con, "%s", tmp); } Rconn_printf(con, "%s", ceol); } } else { /* A matrix */ if(!isVectorAtomic(x)) UNIMPLEMENTED_TYPE("write.table, matrix method", x); /* quick integrity check */ if(LENGTH(x) != nr * nc) error(_("corrupt matrix -- dims not not match length")); for(i = 0; i < nr; i++) { if(i % 1000 == 999) R_CheckUserInterrupt(); if(!isNull(rnames)) Rconn_printf(con, "%s%s", EncodeElement2(rnames, i, quote_rn, qmethod, &strBuf, cdec), csep); for(j = 0; j < nc; j++) { if(j > 0) Rconn_printf(con, "%s", csep); if(isna(x, i + j*nr)) tmp = cna; else { tmp = EncodeElement2(x, i + j*nr, quote_col[j], qmethod, &strBuf, cdec); /* if(cdec) change_dec(tmp, cdec, TYPEOF(x)); */ } Rconn_printf(con, "%s", tmp); } Rconn_printf(con, "%s", ceol); } } endcontext(&cntxt); wt_cleanup(&wi); return R_NilValue; }