static void cat_cleanup(void *data) { cat_info *pci = (cat_info *) data; Rconnection con = pci->con; Rboolean wasopen = pci->wasopen; int changedcon = pci->changedcon; con->fflush(con); if(changedcon) switch_stdout(-1, 0); /* previous line might have closed it */ if(!wasopen && con->isopen) con->close(con); #ifdef Win32 WinUTF8out = FALSE; #endif }
SEXP attribute_hidden do_readDCF(SEXP call, SEXP op, SEXP args, SEXP env) { int nwhat, nret, nc, nr, m, k, lastm, need; Rboolean blank_skip, field_skip = FALSE; int whatlen, dynwhat, buflen = 8096; // was 100, but that re-alloced often char *line, *buf; regex_t blankline, contline, trailblank, regline, eblankline; regmatch_t regmatch[1]; SEXP file, what, what2, retval, retval2, dims, dimnames; Rconnection con = NULL; Rboolean wasopen, is_eblankline; RCNTXT cntxt; SEXP fold_excludes; Rboolean field_fold = TRUE, has_fold_excludes; const char *field_name; int offset = 0; /* -Wall */ checkArity(op, args); file = CAR(args); con = getConnection(asInteger(file)); wasopen = con->isopen; if(!wasopen) { if(!con->open(con)) error(_("cannot open the connection")); /* Set up a context which will close the connection on error */ begincontext(&cntxt, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv, R_NilValue, R_NilValue); cntxt.cend = &con_cleanup; cntxt.cenddata = con; } if(!con->canread) error(_("cannot read from this connection")); args = CDR(args); PROTECT(what = coerceVector(CAR(args), STRSXP)); /* argument fields */ nwhat = LENGTH(what); dynwhat = (nwhat == 0); args = CDR(args); PROTECT(fold_excludes = coerceVector(CAR(args), STRSXP)); has_fold_excludes = (LENGTH(fold_excludes) > 0); buf = (char *) malloc(buflen); if(!buf) error(_("could not allocate memory for 'read.dcf'")); nret = 20; /* it is easier if we first have a record per column */ PROTECT(retval = allocMatrixNA(STRSXP, LENGTH(what), nret)); /* These used to use [:blank:] but that can match \xa0 as part of a UTF-8 character (and is nbspace on Windows). */ tre_regcomp(&blankline, "^[[:blank:]]*$", REG_NOSUB & REG_EXTENDED); tre_regcomp(&trailblank, "[ \t]+$", REG_EXTENDED); tre_regcomp(&contline, "^[[:blank:]]+", REG_EXTENDED); tre_regcomp(®line, "^[^:]+:[[:blank:]]*", REG_EXTENDED); tre_regcomp(&eblankline, "^[[:space:]]+\\.[[:space:]]*$", REG_EXTENDED); k = 0; lastm = -1; /* index of the field currently being recorded */ blank_skip = TRUE; void *vmax = vmaxget(); while((line = Rconn_getline2(con))) { if(strlen(line) == 0 || tre_regexecb(&blankline, line, 0, 0, 0) == 0) { /* A blank line. The first one after a record ends a new * record, subsequent ones are skipped */ if(!blank_skip) { k++; if(k > nret - 1){ nret *= 2; PROTECT(retval2 = allocMatrixNA(STRSXP, LENGTH(what), nret)); transferVector(retval2, retval); UNPROTECT_PTR(retval); retval = retval2; } blank_skip = TRUE; lastm = -1; field_skip = FALSE; field_fold = TRUE; } } else { blank_skip = FALSE; if(tre_regexecb(&contline, line, 1, regmatch, 0) == 0) { /* A continuation line: wrong if at the beginning of a record. */ if((lastm == -1) && !field_skip) { line[20] = '\0'; error(_("Found continuation line starting '%s ...' at begin of record."), line); } if(lastm >= 0) { need = (int) strlen(CHAR(STRING_ELT(retval, lastm + nwhat * k))) + 2; if(tre_regexecb(&eblankline, line, 0, NULL, 0) == 0) { is_eblankline = TRUE; } else { is_eblankline = FALSE; if(field_fold) { offset = regmatch[0].rm_eo; /* Also remove trailing whitespace. */ if((tre_regexecb(&trailblank, line, 1, regmatch, 0) == 0)) line[regmatch[0].rm_so] = '\0'; } else { offset = 0; } need += (int) strlen(line + offset); } if(buflen < need) { char *tmp = (char *) realloc(buf, need); if(!tmp) { free(buf); error(_("could not allocate memory for 'read.dcf'")); } else buf = tmp; buflen = need; } strcpy(buf,CHAR(STRING_ELT(retval, lastm + nwhat * k))); strcat(buf, "\n"); if(!is_eblankline) strcat(buf, line + offset); SET_STRING_ELT(retval, lastm + nwhat * k, mkChar(buf)); } } else { if(tre_regexecb(®line, line, 1, regmatch, 0) == 0) { for(m = 0; m < nwhat; m++){ whatlen = (int) strlen(CHAR(STRING_ELT(what, m))); if(strlen(line) > whatlen && line[whatlen] == ':' && strncmp(CHAR(STRING_ELT(what, m)), line, whatlen) == 0) { /* An already known field we are recording. */ lastm = m; field_skip = FALSE; field_name = CHAR(STRING_ELT(what, lastm)); if(has_fold_excludes) { field_fold = field_is_foldable_p(field_name, fold_excludes); } if(field_fold) { offset = regmatch[0].rm_eo; /* Also remove trailing whitespace. */ if((tre_regexecb(&trailblank, line, 1, regmatch, 0) == 0)) line[regmatch[0].rm_so] = '\0'; } else { offset = 0; } SET_STRING_ELT(retval, m + nwhat * k, mkChar(line + offset)); break; } else { /* This is a field, but not one prespecified */ lastm = -1; field_skip = TRUE; } } if(dynwhat && (lastm == -1)) { /* A previously unseen field and we are * recording all fields */ field_skip = FALSE; PROTECT(what2 = allocVector(STRSXP, nwhat+1)); PROTECT(retval2 = allocMatrixNA(STRSXP, nrows(retval)+1, ncols(retval))); if(nwhat > 0) { copyVector(what2, what); for(nr = 0; nr < nrows(retval); nr++){ for(nc = 0; nc < ncols(retval); nc++){ SET_STRING_ELT(retval2, nr+nc*nrows(retval2), STRING_ELT(retval, nr+nc*nrows(retval))); } } } UNPROTECT_PTR(retval); UNPROTECT_PTR(what); retval = retval2; what = what2; /* Make sure enough space was used */ need = (int) (Rf_strchr(line, ':') - line + 1); if(buflen < need){ char *tmp = (char *) realloc(buf, need); if(!tmp) { free(buf); error(_("could not allocate memory for 'read.dcf'")); } else buf = tmp; buflen = need; } strncpy(buf, line, Rf_strchr(line, ':') - line); buf[Rf_strchr(line, ':') - line] = '\0'; SET_STRING_ELT(what, nwhat, mkChar(buf)); nwhat++; /* lastm uses C indexing, hence nwhat - 1 */ lastm = nwhat - 1; field_name = CHAR(STRING_ELT(what, lastm)); if(has_fold_excludes) { field_fold = field_is_foldable_p(field_name, fold_excludes); } offset = regmatch[0].rm_eo; if(field_fold) { /* Also remove trailing whitespace. */ if((tre_regexecb(&trailblank, line, 1, regmatch, 0) == 0)) line[regmatch[0].rm_so] = '\0'; } SET_STRING_ELT(retval, lastm + nwhat * k, mkChar(line + offset)); } } else { /* Must be a regular line with no tag ... */ line[20] = '\0'; error(_("Line starting '%s ...' is malformed!"), line); } } } } vmaxset(vmax); if(!wasopen) {endcontext(&cntxt); con->close(con);} free(buf); tre_regfree(&blankline); tre_regfree(&contline); tre_regfree(&trailblank); tre_regfree(®line); tre_regfree(&eblankline); if(!blank_skip) k++; /* and now transpose the whole matrix */ PROTECT(retval2 = allocMatrixNA(STRSXP, k, LENGTH(what))); copyMatrix(retval2, retval, 1); PROTECT(dimnames = allocVector(VECSXP, 2)); PROTECT(dims = allocVector(INTSXP, 2)); INTEGER(dims)[0] = k; INTEGER(dims)[1] = LENGTH(what); SET_VECTOR_ELT(dimnames, 1, what); setAttrib(retval2, R_DimSymbol, dims); setAttrib(retval2, R_DimNamesSymbol, dimnames); UNPROTECT(6); return(retval2); }
static void con_cleanup(void *data) { Rconnection con = data; if(con->isopen) con->close(con); }
/* "do_parse" - the user interface input/output to files. The internal R_Parse.. functions are defined in ./gram.y (-> gram.c) .Internal( parse(file, n, text, prompt, srcfile, encoding) ) If there is text then that is read and the other arguments are ignored. */ SEXP attribute_hidden do_parse(SEXP call, SEXP op, SEXP args, SEXP env) { SEXP text, prompt, s, source; Rconnection con; Rboolean wasopen, old_latin1 = known_to_be_latin1, old_utf8 = known_to_be_utf8, allKnown = TRUE; int ifile, num, i; const char *encoding; ParseStatus status; checkArity(op, args); R_ParseError = 0; R_ParseErrorMsg[0] = '\0'; ifile = asInteger(CAR(args)); args = CDR(args); con = getConnection(ifile); wasopen = con->isopen; num = asInteger(CAR(args)); args = CDR(args); if (num == 0) return(allocVector(EXPRSXP, 0)); PROTECT(text = coerceVector(CAR(args), STRSXP)); if(length(CAR(args)) && !length(text)) errorcall(call, _("coercion of 'text' to character was unsuccessful")); args = CDR(args); prompt = CAR(args); args = CDR(args); source = CAR(args); args = CDR(args); if(!isString(CAR(args)) || LENGTH(CAR(args)) != 1) error(_("invalid '%s' value"), "encoding"); encoding = CHAR(STRING_ELT(CAR(args), 0)); /* ASCII */ known_to_be_latin1 = known_to_be_utf8 = FALSE; /* allow 'encoding' to override declaration on 'text'. */ if(streql(encoding, "latin1")) { known_to_be_latin1 = TRUE; allKnown = FALSE; } else if(streql(encoding, "UTF-8")) { known_to_be_utf8 = TRUE; allKnown = FALSE; } else if(!streql(encoding, "unknown") && !streql(encoding, "native.enc")) warning(_("argument '%s = \"%s\"' will be ignored"), "encoding", encoding); if (prompt == R_NilValue) PROTECT(prompt); else PROTECT(prompt = coerceVector(prompt, STRSXP)); if (length(text) > 0) { /* If 'text' has known encoding then we can be sure it will be correctly re-encoded to the current encoding by translateChar in the parser and so could mark the result in a Latin-1 or UTF-8 locale. A small complication is that different elements could have different encodings, but all that matters is that all non-ASCII elements have known encoding. */ for(i = 0; i < length(text); i++) if(!ENC_KNOWN(STRING_ELT(text, i)) && !IS_ASCII(STRING_ELT(text, i))) { allKnown = FALSE; break; } if(allKnown) { known_to_be_latin1 = old_latin1; known_to_be_utf8 = old_utf8; } if (num == NA_INTEGER) num = -1; s = R_ParseVector(text, num, &status, source); if (status != PARSE_OK) parseError(call, R_ParseError); } else if (ifile >= 3) {/* file != "" */ if (num == NA_INTEGER) num = -1; try { if(!wasopen && !con->open(con)) error(_("cannot open the connection")); if(!con->canread) error(_("cannot read from this connection")); s = R_ParseConn(con, num, &status, source); if(!wasopen) con->close(con); } catch (...) { if (!wasopen && con->isopen) con->close(con); throw; } if (status != PARSE_OK) parseError(call, R_ParseError); } else { if (num == NA_INTEGER) num = 1; s = R_ParseBuffer(&R_ConsoleIob, num, &status, prompt, source); if (status != PARSE_OK) parseError(call, R_ParseError); } UNPROTECT(2); known_to_be_latin1 = old_latin1; known_to_be_utf8 = old_utf8; return s; }